From 7cedfa48de4bd0970fcac86a3b13cd7407231b08 Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Wed, 19 Oct 2022 17:56:12 -0700 Subject: [PATCH] Load Testing Script (#18020) * script skeleton * add API call to source_definitions to fetch E2E Test Source definition ID * createSource implementation * add destination creation logic implementation * get definition IDs, catalogId, and implement connection creation * add cleanup script and write created ids to a file that can be cleaned up * make cloud header a command-line argument, other cleanup * script comments fix * remove kube references and fix indentation * temp commit - don't push * remove discover catalog function * more cleanups * more cleanups * cleanup help text * exit codes and show how many connections left * add README Co-authored-by: Xiaohan Song --- tools/bin/load_test/.gitignore | 1 + tools/bin/load_test/README.md | 103 ++++++++++ tools/bin/load_test/cleanup_load_test.sh | 152 ++++++++++++++ tools/bin/load_test/connection_spec.json | 47 +++++ tools/bin/load_test/destination_spec.json | 8 + tools/bin/load_test/load_test_airbyte.sh | 240 ++++++++++++++++++++++ tools/bin/load_test/load_test_utils.sh | 45 ++++ tools/bin/load_test/source_spec.json | 17 ++ 8 files changed, 613 insertions(+) create mode 100644 tools/bin/load_test/.gitignore create mode 100644 tools/bin/load_test/README.md create mode 100755 tools/bin/load_test/cleanup_load_test.sh create mode 100644 tools/bin/load_test/connection_spec.json create mode 100644 tools/bin/load_test/destination_spec.json create mode 100755 tools/bin/load_test/load_test_airbyte.sh create mode 100644 tools/bin/load_test/load_test_utils.sh create mode 100644 tools/bin/load_test/source_spec.json diff --git a/tools/bin/load_test/.gitignore b/tools/bin/load_test/.gitignore new file mode 100644 index 000000000000..9a748532e4be --- /dev/null +++ b/tools/bin/load_test/.gitignore @@ -0,0 +1 @@ +cleanup/ diff --git a/tools/bin/load_test/README.md b/tools/bin/load_test/README.md new file mode 100644 index 000000000000..ef45405e8917 --- /dev/null +++ b/tools/bin/load_test/README.md @@ -0,0 +1,103 @@ +# Load Testing Airbyte + +## Overview +To perform a stress test of an Airbyte deployment, the `load_test_airbyte.sh` shell script is useful to quickly and easily create many connections. +This script creates a new E2E Test Source, E2E Test Destination, and a configurable number of connections in the indicated workspace. + +## Instructions +From your top-level `/airbyte` directory, run the following to perform a load test: + +``` +./tools/bin/load_test/load_test_airbyte.sh -W -C +``` + + +By default, the script assumes that the Airbyte instance's server is accessible at `localhost:8001`. This is the default server location when +deploying Airbyte with `docker-compose up`. + +Additionally, the E2E Test Source created by the script will take 10 minutes to complete a sync by default. + +These defaults can be overridden with flags. All available flags are described as follows: + +``` + -h + Display help + + -W + Specify the workspace ID where new connectors and connections should be created. + Required. + + -H + Specify the Airbyte API server hostname that the script should call to create new connectors and connections. + Defaults to 'localhost'. + + -P + Specify the port for the Airbyte server. + Defaults to '8001'. + + -X
+ Specify the X-Endpoint-API-UserInfo header value for API authentication. + For Google Cloud Endpoint authentication only. + + -C + Specify the number of connections that should be created by the script. + Defaults to '1'. + + -T + Specify the time in minutes that each connection should sync for. + Defaults to '10'. +``` + + +### Load Testing on Kubernetes + +To load test a deployment of Airbyte running on Kubernetes, you will need to set up port-forwarding to the `airbyte-server` deployment. +This can be accomplished with the following command: + +``` +kubectl port-forward deployment/airbyte-server -n ab 8001:8001 +``` + +This will make the Airbyte server available at `localhost:8001` + + +### Authentication + +If your deployment of Airbyte happens to use Google Cloud Endpoints for authentication, you can use the `-X` option to pass +an `X-Endpoint-API-UserInfo` header value. + + +## Cleanup +The `load_test_airbyte.sh` script writes created IDs to files in the script's `/cleanup` directory. To delete resources that were created by the load +test script, you can run `cleanup_load_test.sh`, which reads IDs from the `/cleanup` directory and calls the Airbyte API to delete them. + + +### Cleanup Instructions +To run the cleanup script, from the top-level `airbyte` directory, run the following: + +``` +./tools/bin/load_test/cleanup_load_test.sh -W +``` + +All available cleanup script flags are described as follows: + +``` + -h + Display help + + -W + Specify the workspace ID from where connectors and connections should be deleted. + Required. + + -H + Specify the Airbyte API server hostname that the script should call to delete connectors and connections. + Defaults to 'localhost'. + + -P + Specify the port for the Airbyte server. + Defaults to '8001'. + + -X
+ Specify the X-Endpoint-API-UserInfo header value for API authentication. + For Google Cloud Endpoint authentication only. +``` diff --git a/tools/bin/load_test/cleanup_load_test.sh b/tools/bin/load_test/cleanup_load_test.sh new file mode 100755 index 000000000000..03e60bc2558b --- /dev/null +++ b/tools/bin/load_test/cleanup_load_test.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset + +< + ${GREEN}Specify the workspace ID from where connectors and connections should be deleted. + Required. + + ${CLEAR}-H + ${GREEN}Specify the Airbyte API server hostname that the script should call to delete connectors and connections. + Defaults to 'localhost'. + + ${CLEAR}-P + ${GREEN}Specify the port for the Airbyte server. + Defaults to '8001'. + + ${CLEAR}-X
+ ${GREEN}Specify the X-Endpoint-API-UserInfo header value for API authentication. + For Google Cloud Endpoint authentication only. + """ && exit 1 +} + +hostname=localhost +api_port=8001 +x_endpoint_header="" + +while getopts "hW:H:P:X:kN:" options ; do + case "${options}" in + h) + showhelp + ;; + W) + workspace_id="${OPTARG}" + ;; + H) + hostname="${OPTARG}" + ;; + P) + api_port="${OPTARG}" + ;; + X) + x_endpoint_header="${OPTARG}" + ;; + *) + showhelp + ;; + esac +done + +function setup { + if test -z "$workspace_id"; then + echo "error: must set a workspace id with -W" + exit 1 + fi + + echo "set workspace_id to ${workspace_id}" + echo "set hostname to ${hostname}" + echo "set api_port to ${api_port}" + + setCleanupFilesForWorkspace $workspace_id +} + +function deleteConnections { + while test -s $CONNECTION_CLEANUP_FILE + do + connectionId=$(readFirstLineFromFile $CONNECTION_CLEANUP_FILE) + callApi "connections/delete" "{\"connectionId\":\"$connectionId\"}" + echo "deleted connection with ID $connectionId" + + # deletion succeeded, so remove the ID from the cleanup file + removeFirstLineFromFile $CONNECTION_CLEANUP_FILE + done + + if ! test -s $CONNECTION_CLEANUP_FILE + then + rm $CONNECTION_CLEANUP_FILE + echo "removed cleanup file $CONNECTION_CLEANUP_FILE" + fi +} + +function deleteSources { + while test -s $SOURCE_CLEANUP_FILE + do + sourceId=$(readFirstLineFromFile $SOURCE_CLEANUP_FILE) + callApi "sources/delete" "{\"sourceId\":\"$sourceId\"}" + echo "deleted source with ID $sourceId" + + # deletion succeeded, so remove the ID from the cleanup file + removeFirstLineFromFile $SOURCE_CLEANUP_FILE + done + + if ! test -s $SOURCE_CLEANUP_FILE + then + rm $SOURCE_CLEANUP_FILE + echo "removed cleanup file $SOURCE_CLEANUP_FILE" + fi +} + +function deleteDestinations { + while test -s $DESTINATION_CLEANUP_FILE + do + destinationId=$(readFirstLineFromFile $DESTINATION_CLEANUP_FILE) + callApi "destinations/delete" "{\"destinationId\":\"$destinationId\"}" + echo "deleted destination with ID $destinationId" + + # deletion succeeded, so remove the ID from the cleanup file + removeFirstLineFromFile $DESTINATION_CLEANUP_FILE + done + + if test -z $DESTINATION_CLEANUP_FILE + then + rm $DESTINATION_CLEANUP_FILE + echo "removed cleanup file $DESTINATION_CLEANUP_FILE" + fi +} + +############ +## MAIN ## +############ + +if [[ $# -eq 0 ]] ; then + showhelp + exit 0 +fi + +setup + +deleteConnections + +deleteSources + +deleteDestinations + +echo "Finished!" diff --git a/tools/bin/load_test/connection_spec.json b/tools/bin/load_test/connection_spec.json new file mode 100644 index 000000000000..b4678cf58a98 --- /dev/null +++ b/tools/bin/load_test/connection_spec.json @@ -0,0 +1,47 @@ +{ + "sourceId": "replace_source_id", + "destinationId": "replace_destination_id", + "syncCatalog": { + "streams": [ + { + "config": { + "syncMode": "full_refresh", + "cursorField": [], + "destinationSyncMode": "overwrite", + "primaryKey": [], + "aliasName": "data_stream", + "selected": true + }, + "stream": { + "name": "data_stream", + "jsonSchema": { + "type": "object", + "properties": { + "column1": { + "type": "string" + } + } + }, + "supportedSyncModes": [ + "full_refresh" + ], + "defaultCursorField": [], + "sourceDefinedPrimaryKey": [] + } + } + ] + }, + "prefix": "", + "namespaceDefinition": "source", + "namespaceFormat": "${SOURCE_NAMESPACE}", + "scheduleType": "basic", + "scheduleData": { + "basicSchedule": { + "units": 24, + "timeUnit": "hours" + } + }, + "name": "replace_connection_name", + "operations": [], + "status": "active" +} diff --git a/tools/bin/load_test/destination_spec.json b/tools/bin/load_test/destination_spec.json new file mode 100644 index 000000000000..dc645d969d55 --- /dev/null +++ b/tools/bin/load_test/destination_spec.json @@ -0,0 +1,8 @@ +{ + "name": "End-to-End Testing (/dev/null)", + "destinationDefinitionId": "replace_destination_definition_id", + "workspaceId": "replace_workspace_id", + "connectionConfiguration": { + "type": "SILENT" + } +} diff --git a/tools/bin/load_test/load_test_airbyte.sh b/tools/bin/load_test/load_test_airbyte.sh new file mode 100755 index 000000000000..dc7f893b651e --- /dev/null +++ b/tools/bin/load_test/load_test_airbyte.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset + +< + ${GREEN}Specify the workspace ID where new connectors and connections should be created. + Required. + + ${CLEAR}-H + ${GREEN}Specify the Airbyte API server hostname that the script should call to create new connectors and connections. + Defaults to 'localhost'. + + ${CLEAR}-P + ${GREEN}Specify the port for the Airbyte server. + Defaults to '8001'. + + ${CLEAR}-X
+ ${GREEN}Specify the X-Endpoint-API-UserInfo header value for API authentication. + For Google Cloud Endpoint authentication only. + + ${CLEAR}-C + ${GREEN}Specify the number of connections that should be created by the script. + Defaults to '1'. + + ${CLEAR}-T + ${GREEN}Specify the time in minutes that each connection should sync for. + Defaults to '10'. + """ +} + +hostname=localhost +api_port=8001 +x_endpoint_header= +num_connections=1 +sync_minutes=10 + +while getopts "hW:H:P:X:C:T:kN:-:" options ; do + case "${options}" in + -) + case "${OPTARG}" in + debug) + PS4="$GREEN"'${BASH_SOURCE}:${LINENO}:$CLEAR ' + set -o xtrace #xtrace calls the PS4 string and show all lines as executed + ;; + *) + showhelp + exit 0 + ;; + esac;; + h) + showhelp + ;; + W) + workspace_id="${OPTARG}" + ;; + H) + hostname="${OPTARG}" + ;; + P) + api_port="${OPTARG}" + ;; + X) + x_endpoint_header="${OPTARG}" + ;; + C) + num_connections="${OPTARG}" + ;; + T) + sync_minutes="${OPTARG}" + ;; + *) + showhelp + exit 1 + ;; + esac +done + +function setup { + set -e + if test -z "$workspace_id"; then + echo "error: must set a workspace id with -W" + exit 1 + fi + + echo "set workspace_id to ${workspace_id}" + echo "set hostname to ${hostname}" + echo "set api_port to ${api_port}" + echo "set x_endpoint_header to ${x_endpoint_header}" + echo "set num_connections to ${num_connections}" + echo "set sync_minutes to ${sync_minutes}" + + setCleanupFilesForWorkspace $workspace_id + + mkdir -p cleanup + + touch $CONNECTION_CLEANUP_FILE + touch $SOURCE_CLEANUP_FILE + touch $DESTINATION_CLEANUP_FILE +} + +function getE2ETestSourceDefinitionId { + # call source_definitions/list and search response for the E2E Test dockerRepository to get the ID. + # local uses `source-e2e-test`, while cloud uses `source-e2e-test-cloud` + sourceDefinitionId=$( + callApi "source_definitions/list" | + jq -r '.sourceDefinitions[] | + select( + (.dockerRepository == "airbyte/source-e2e-test") or + (.dockerRepository == "airbyte/source-e2e-test-cloud") + ) | + .sourceDefinitionId' + ) + export sourceDefinitionId +} + +function getE2ETestDestinationDefinition { + # call destination_definitions/list and search response for the E2E Test dockerRepository to get the ID. + # local uses `destination-dev-null`, while cloud uses `destination-e2e-test-cloud` + destinationDefinitionId=$( + callApi "destination_definitions/list" | + jq -r '.destinationDefinitions[] | + select( + (.dockerRepository == "airbyte/destination-e2e-test") or + (.dockerRepository == "airbyte/destination-dev-null") + ) | + .destinationDefinitionId' + ) + export destinationDefinitionId +} + +function createSource { + body=$( + sed " + s/replace_source_read_secs/$(( 60*sync_minutes ))/g ; + s/replace_source_definition_id/$sourceDefinitionId/g ; + s/replace_workspace_id/$workspace_id/g" source_spec.json | + tr -d '\n' | + tr -d ' ' + ) + + sourceId=$( + callApi "sources/create" $body | + jq -r '.sourceId' + ) + export sourceId + echo $sourceId >> $SOURCE_CLEANUP_FILE +} + +function createDestination { + body=$( + sed " + s/replace_destination_definition_id/$destinationDefinitionId/g ; + s/replace_workspace_id/$workspace_id/g" destination_spec.json | + tr -d '\n' | + tr -d ' ' + ) + destinationId=$( + callApi "destinations/create" $body | + jq -r '.destinationId' + ) + export destinationId + echo $destinationId >> $DESTINATION_CLEANUP_FILE +} + +function createMultipleConnections { + for i in $(seq 1 $num_connections) + do + echo "Creating connection number $i (out of $num_connections)..." + createConnection $i + done + echo "Finished creating $num_connections connections." +} + +# Call the API to create a connection. Replace strings in connection_spec.json with real IDs. +# $1 arg is the connection count, which is used in the name of the created connection +# Connection spec might change and this function could break in the future. If that happens, we need +# to update the connection spec. +function createConnection { + body=$( + sed " + s/replace_source_id/$sourceId/g ; + s/replace_destination_id/$destinationId/g ; + s/replace_connection_name/load_test_connection_$1/g" connection_spec.json | + tr -d '\n' | + tr -d ' ' + ) + + connectionId=$( + callApi "web_backend/connections/create" $body | + jq -r '.connectionId' + ) + echo $connectionId >> $CONNECTION_CLEANUP_FILE +} + +############ +## MAIN ## +############ + +if [[ $# -eq 0 ]] ; then + showhelp + exit 0 +fi + +setup + +getE2ETestSourceDefinitionId +echo "Retrieved E2E Test Source Definition ID: ${sourceDefinitionId}" + +getE2ETestDestinationDefinition +echo "Retrieved E2E Test Destination Definition ID: ${destinationDefinitionId}" + +createSource +echo "Created Source with ID: ${sourceId}" + +createDestination +echo "Created Destination with ID: ${destinationId}" + +createMultipleConnections + +echo "Finished!" diff --git a/tools/bin/load_test/load_test_utils.sh b/tools/bin/load_test/load_test_utils.sh new file mode 100644 index 000000000000..1d70b506590c --- /dev/null +++ b/tools/bin/load_test/load_test_utils.sh @@ -0,0 +1,45 @@ +<