Vendorise pdfs for integration test #222
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: integration_tests | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
branches: | |
- main | |
env: | |
TEST_DATA_FILE_PATH: ./integration_tests/data/new_and_updated_documents.json | |
TEST_DATA_UPLOAD_PATH: input/2022-11-01T21.53.26.945831/new_and_updated_documents.json | |
UPDATES_FILE_NAME: new_and_updated_documents.json | |
INGEST_OUTPUT_PREFIX: ingest_unit_test_parser_input | |
INDEXER_INPUT_PREFIX: ingest_unit_test_indexer_input | |
EMBEDDINGS_INPUT_PREFIX: ingest_unit_test_embeddings_input | |
DOCUMENT_NAME_KEY: document_id | |
PARSER_INPUT_EXPECTED_DATA_FILE_PATH: integration_tests/data/parser_input_dir_post_run.json | |
ARCHIVE_EXPECTED_DATA_FILE_PATH: integration_tests/data/docs_test_subset_archive_parser_input_expected.json | |
EXECUTION_DATA_PREFIX: execution_data | |
EXECUTION_DATA_FILE_NAME: 123456.json | |
EXECUTION_DATA_ID: 123456 | |
jobs: | |
test: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Setting up Github Actions | |
uses: actions/checkout@v3 | |
- name: Setting up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.10' | |
- name: Installing Dependencies | |
run: | | |
python -m pip install "poetry==1.3.2" && poetry install --only integration-test | |
- name: Building the Image | |
run: make build | |
- name: Configuring AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.UNIT_TESTS_AWS_ACCESS_KEY_ID }} | |
aws-region: ${{ secrets.UNIT_TESTS_AWS_REGION }} | |
aws-secret-access-key: ${{ secrets.UNIT_TESTS_AWS_SECRET_ACCESS_KEY }} | |
- name: Set Dynamic Bucket Names | |
run: | | |
echo "INGEST_DOCUMENT_BUCKET=testdocbucket${GITHUB_SHA}" >> $GITHUB_ENV | |
echo "INGEST_PIPELINE_BUCKET=testpipbucket${GITHUB_SHA}" >> $GITHUB_ENV | |
- name: Destroying Infrastructure | |
run: | | |
poetry run python -m integration_tests.remove_test_buckets ${{ env.INGEST_DOCUMENT_BUCKET }} ${{ env.INGEST_PIPELINE_BUCKET }} ${{ secrets.UNIT_TESTS_AWS_REGION }} | |
- name: Building s3 buckets and uploading test data | |
run: | | |
poetry run python -m integration_tests.setup_test_buckets ${{ env.INGEST_DOCUMENT_BUCKET }} ${{ env.INGEST_PIPELINE_BUCKET }} ${{ secrets.UNIT_TESTS_AWS_REGION }} | |
poetry run python -m integration_tests.setup_execution_data_file ${{ env.INGEST_PIPELINE_BUCKET }} ${{ env.EXECUTION_DATA_PREFIX }}/${{ env.EXECUTION_DATA_FILE_NAME }} ${{ env.TEST_DATA_UPLOAD_PATH }} | |
aws s3 sync integration_tests/data/pipeline_in s3://${{ env.INGEST_PIPELINE_BUCKET }} | |
- name: Running the Ingest Stage | |
run: | | |
docker run -e AWS_ACCESS_KEY_ID=${{ secrets.UNIT_TESTS_AWS_ACCESS_KEY_ID }} -e AWS_SECRET_ACCESS_KEY=${{ secrets.UNIT_TESTS_AWS_SECRET_ACCESS_KEY }} -e API_HOST="" -e MACHINE_USER_EMAIL="" -e MACHINE_USER_PASSWORD="" navigator-data-ingest --pipeline-bucket ${{ env.INGEST_PIPELINE_BUCKET }} --document-bucket ${{ env.INGEST_DOCUMENT_BUCKET }} --updates-file-name ${{ env.UPDATES_FILE_NAME }} --output-prefix ${{ env.INGEST_OUTPUT_PREFIX }} --embeddings-input-prefix ${{ env.EMBEDDINGS_INPUT_PREFIX }} --indexer-input-prefix ${{ env.INDEXER_INPUT_PREFIX }} --execution-id ${{ env.EXECUTION_DATA_ID }} --execution-data-prefix ${{ env.EXECUTION_DATA_PREFIX }} | |
- name: Running Integration Tests on the Ingest Stage Output 🚀 | |
run: | | |
poetry run python -m pytest -vvv integration_tests/ -m integration | |
env: | |
INGEST_PIPELINE_BUCKET: ${{ env.INGEST_PIPELINE_BUCKET }} | |
INGEST_OUTPUT_PREFIX: ${{ env.INGEST_OUTPUT_PREFIX }} | |
EMBEDDINGS_INPUT_PREFIX: ${{ env.EMBEDDINGS_INPUT_PREFIX }} | |
INDEXER_INPUT_PREFIX: ${{ env.INDEXER_INPUT_PREFIX }} | |
DOCUMENT_NAME_KEY: ${{ env.DOCUMENT_NAME_KEY }} | |
PARSER_INPUT_EXPECTED_DATA_FILE_PATH: ${{ env.PARSER_INPUT_EXPECTED_DATA_FILE_PATH }} | |
TEST_DATA_FILE_PATH: ${{ env.TEST_DATA_FILE_PATH }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.UNIT_TESTS_AWS_ACCESS_KEY_ID }} | |
AWS_DEFAULT_REGION: ${{ secrets.UNIT_TESTS_AWS_REGION }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.UNIT_TESTS_AWS_SECRET_ACCESS_KEY }} | |
- name: Destroying Infrastructure | |
if: always() | |
run: | | |
poetry run python -m integration_tests.remove_test_buckets ${{ env.INGEST_DOCUMENT_BUCKET }} ${{ env.INGEST_PIPELINE_BUCKET }} ${{ secrets.UNIT_TESTS_AWS_REGION }} |