Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run performance benchmark on pull requests #14760

Merged
merged 8 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 155 additions & 0 deletions .github/benchmark-configs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{
"name": "Cluster and opensearch-benchmark configurations",
"id_1": {
"description": "Indexing only configuration for NYC_TAXIS workload",
"supported_major_versions": ["2", "3"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "nyc_taxis",
"WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
"EXCLUDE_TASKS": "type:search",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_2": {
"description": "Indexing only configuration for HTTP_LOGS workload",
"supported_major_versions": ["2", "3"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "http_logs",
"WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
"EXCLUDE_TASKS": "type:search",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_3": {
"description": "Search only test-procedure for NYC_TAXIS, uses snapshot to restore the data for OS-3.0.0",
"supported_major_versions": ["3"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "nyc_taxis",
"WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"nyc_taxis_1_shard\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_4": {
"description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-3.0.0",
"supported_major_versions": ["3"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "http_logs",
"WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"http_logs_1_shard\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_5": {
"description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-3.0.0",
"supported_major_versions": ["3"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "big5",
"WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"big5_1_shard\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_6": {
"description": "Search only test-procedure for NYC_TAXIS, uses snapshot to restore the data for OS-2.x",
"supported_major_versions": ["2"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "nyc_taxis",
"WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots\",\"snapshot_name\":\"nyc_taxis_1_shard\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_7": {
"description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-2.x",
"supported_major_versions": ["2"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "http_logs",
"WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots\",\"snapshot_name\":\"http_logs_1_shard\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_8": {
"description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-2.x",
"supported_major_versions": ["2"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "big5",
"WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots\",\"snapshot_name\":\"big5_1_shard\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_9": {
"description": "Indexing and search configuration for pmc workload",
"supported_major_versions": ["2", "3"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "pmc",
"WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
},
"id_10": {
"description": "Indexing only configuration for stack-overflow workload",
"supported_major_versions": ["2", "3"],
"cluster-benchmark-configs": {
"SINGLE_NODE_CLUSTER": "true",
"MIN_DISTRIBUTION": "true",
"TEST_WORKLOAD": "so",
"WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
"CAPTURE_NODE_STAT": "true"
},
"cluster_configuration": {
"size": "Single-Node",
"data_instance_config": "4vCPU, 32G Mem, 16G Heap"
}
}
}
25 changes: 25 additions & 0 deletions .github/workflows/add-performance-comment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Performance Label Action

on:
pull_request:
types: [labeled]

jobs:
add-comment:
if: github.event.label.name == 'Performance'
runs-on: ubuntu-latest
permissions:
pull-requests: write

steps:
- name: Add comment to PR
uses: actions/github-script@v6
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: "Hello!\nWe have added a performance benchmark workflow that runs by adding a comment on the PR.\n Please refer https://github.com/opensearch-project/OpenSearch/blob/main/PERFORMANCE_BENCHMARKS.md on how to run benchmarks on pull requests."
})
142 changes: 142 additions & 0 deletions .github/workflows/benchmark-pull-request.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
name: Run performance benchmark on pull request
on:
issue_comment:
reta marked this conversation as resolved.
Show resolved Hide resolved
types: [created]
jobs:
run-performance-benchmark-on-pull-request:
if: ${{ (github.event.issue.pull_request) && (contains(github.event.comment.body, '"run-benchmark-test"')) }}
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
issues: write
pull-requests: write
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Set up required env vars
run: |
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
echo "REPOSITORY=${{ github.event.repository.full_name }}" >> $GITHUB_ENV
OPENSEARCH_VERSION=$(awk -F '=' '/^opensearch[[:space:]]*=/ {gsub(/[[:space:]]/, "", $2); print $2}' buildSrc/version.properties)
echo "OPENSEARCH_VERSION=$OPENSEARCH_VERSION" >> $GITHUB_ENV
major_version=$(echo $OPENSEARCH_VERSION | cut -d'.' -f1)
echo "OPENSEARCH_MAJOR_VERSION=$major_version" >> $GITHUB_ENV
echo "USER_TAGS=pull_request_number:${{ github.event.issue.number }},repository:OpenSearch" >> $GITHUB_ENV
- name: Check comment format
id: check_comment
run: |
comment='${{ github.event.comment.body }}'
if echo "$comment" | jq -e 'has("run-benchmark-test")'; then
echo "Valid comment format detected, check if valid config id is provided"
config_id=$(echo $comment | jq -r '.["run-benchmark-test"]')
benchmark_configs=$(cat .github/benchmark-configs.json)
if echo $benchmark_configs | jq -e --arg id "$config_id" 'has($id)' && echo "$benchmark_configs" | jq -e --arg version "$OPENSEARCH_MAJOR_VERSION" --arg id "$config_id" '.[$id].supported_major_versions | index($version) != null' > /dev/null; then
echo $benchmark_configs | jq -r --arg id "$config_id" '.[$id]."cluster-benchmark-configs" | to_entries[] | "\(.key)=\(.value)"' >> $GITHUB_ENV
else
echo "invalid=true" >> $GITHUB_OUTPUT
fi
else
echo "invalid=true" >> $GITHUB_OUTPUT
fi
- name: Post invalid format comment
if: steps.check_comment.outputs.invalid == 'true'
uses: actions/github-script@v6
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: 'Invalid comment format or config id. Please refer to https://github.com/opensearch-project/OpenSearch/blob/main/PERFORMANCE_BENCHMARKS.md on how to run benchmarks on pull requests.'
})
- name: Fail workflow for invalid comment
if: steps.check_comment.outputs.invalid == 'true'
run: |
echo "Invalid comment format detected. Failing the workflow."
exit 1
- id: get_approvers
run: |
echo "approvers=$(cat .github/CODEOWNERS | grep '^\*' | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
- uses: trstringer/manual-approval@v1
if: (!contains(steps.get_approvers.outputs.approvers, github.event.comment.user.login))
with:
secret: ${{ github.TOKEN }}
approvers: ${{ steps.get_approvers.outputs.approvers }}
minimum-approvals: 1
issue-title: 'Request to approve/deny benchmark run for PR #${{ env.PR_NUMBER }}'
issue-body: "Please approve or deny the benchmark run for PR #${{ env.PR_NUMBER }}"
exclude-workflow-initiator-as-approver: false
- name: Get PR Details
id: get_pr
uses: actions/github-script@v7
with:
script: |
const issue = context.payload.issue;
const prNumber = issue.number;
console.log(`Pull Request Number: ${prNumber}`);

const { data: pull_request } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
});

return {
"headRepoFullName": pull_request.head.repo.full_name,
"headRef": pull_request.head.ref
};
- name: Set pr details env vars
run: |
echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRepoFullName'
echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRef'
headRepo=$(echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRepoFullName')
headRef=$(echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRef')
echo "prHeadRepo=$headRepo" >> $GITHUB_ENV
echo "prheadRef=$headRef" >> $GITHUB_ENV
- name: Checkout PR Repo
uses: actions/checkout@v2
with:
repository: ${{ env.prHeadRepo }}
ref: ${{ env.prHeadRef }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Java
uses: actions/setup-java@v1
with:
java-version: 21
- name: Build and Assemble OpenSearch from PR
run: |
./gradlew :distribution:archives:linux-tar:assemble -Dbuild.snapshot=false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.UPLOAD_ARCHIVE_ARTIFACT_ROLE }}
role-session-name: publish-to-s3
aws-region: us-west-2
- name: Push to S3
reta marked this conversation as resolved.
Show resolved Hide resolved
run: |
aws s3 cp distribution/archives/linux-tar/build/distributions/opensearch-min-$OPENSEARCH_VERSION-linux-x64.tar.gz s3://${{ secrets.ARCHIVE_ARTIFACT_BUCKET_NAME }}/PR-$PR_NUMBER/
echo "DISTRIBUTION_URL=${{ secrets.ARTIFACT_BUCKET_CLOUDFRONT_URL }}/PR-$PR_NUMBER/opensearch-min-$OPENSEARCH_VERSION-linux-x64.tar.gz" >> $GITHUB_ENV
- name: Checkout opensearch-build repo
uses: actions/checkout@v4
with:
repository: opensearch-project/opensearch-build
ref: main
path: opensearch-build
- name: Trigger jenkins workflow to run gradle check
run: |
cat $GITHUB_ENV
bash opensearch-build/scripts/benchmark/benchmark-pull-request.sh ${{ secrets.JENKINS_PR_BENCHMARK_GENERIC_WEBHOOK_TOKEN }}
- name: Update PR with Job Url
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const workflowUrl = process.env.WORKFLOW_URL;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `The Jenkins job url is ${workflowUrl} . Final results will be published once the job is completed.`
})
Loading
Loading