Skip to content

Vendorise pdfs for integration test #222

Vendorise pdfs for integration test

Vendorise pdfs for integration test #222

name: integration_tests
on:
push:
branches:
- main
pull_request:
branches:
- main
env:
TEST_DATA_FILE_PATH: ./integration_tests/data/new_and_updated_documents.json
TEST_DATA_UPLOAD_PATH: input/2022-11-01T21.53.26.945831/new_and_updated_documents.json
UPDATES_FILE_NAME: new_and_updated_documents.json
INGEST_OUTPUT_PREFIX: ingest_unit_test_parser_input
INDEXER_INPUT_PREFIX: ingest_unit_test_indexer_input
EMBEDDINGS_INPUT_PREFIX: ingest_unit_test_embeddings_input
DOCUMENT_NAME_KEY: document_id
PARSER_INPUT_EXPECTED_DATA_FILE_PATH: integration_tests/data/parser_input_dir_post_run.json
ARCHIVE_EXPECTED_DATA_FILE_PATH: integration_tests/data/docs_test_subset_archive_parser_input_expected.json
EXECUTION_DATA_PREFIX: execution_data
EXECUTION_DATA_FILE_NAME: 123456.json
EXECUTION_DATA_ID: 123456
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Setting up Github Actions
uses: actions/checkout@v3
- name: Setting up Python
uses: actions/setup-python@v2
with:
python-version: '3.10'
- name: Installing Dependencies
run: |
python -m pip install "poetry==1.3.2" && poetry install --only integration-test
- name: Building the Image
run: make build
- name: Configuring AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.UNIT_TESTS_AWS_ACCESS_KEY_ID }}
aws-region: ${{ secrets.UNIT_TESTS_AWS_REGION }}
aws-secret-access-key: ${{ secrets.UNIT_TESTS_AWS_SECRET_ACCESS_KEY }}
- name: Set Dynamic Bucket Names
run: |
echo "INGEST_DOCUMENT_BUCKET=testdocbucket${GITHUB_SHA}" >> $GITHUB_ENV
echo "INGEST_PIPELINE_BUCKET=testpipbucket${GITHUB_SHA}" >> $GITHUB_ENV
- name: Destroying Infrastructure
run: |
poetry run python -m integration_tests.remove_test_buckets ${{ env.INGEST_DOCUMENT_BUCKET }} ${{ env.INGEST_PIPELINE_BUCKET }} ${{ secrets.UNIT_TESTS_AWS_REGION }}
- name: Building s3 buckets and uploading test data
run: |
poetry run python -m integration_tests.setup_test_buckets ${{ env.INGEST_DOCUMENT_BUCKET }} ${{ env.INGEST_PIPELINE_BUCKET }} ${{ secrets.UNIT_TESTS_AWS_REGION }}
poetry run python -m integration_tests.setup_execution_data_file ${{ env.INGEST_PIPELINE_BUCKET }} ${{ env.EXECUTION_DATA_PREFIX }}/${{ env.EXECUTION_DATA_FILE_NAME }} ${{ env.TEST_DATA_UPLOAD_PATH }}
aws s3 sync integration_tests/data/pipeline_in s3://${{ env.INGEST_PIPELINE_BUCKET }}
- name: Running the Ingest Stage
run: |
docker run -e AWS_ACCESS_KEY_ID=${{ secrets.UNIT_TESTS_AWS_ACCESS_KEY_ID }} -e AWS_SECRET_ACCESS_KEY=${{ secrets.UNIT_TESTS_AWS_SECRET_ACCESS_KEY }} -e API_HOST="" -e MACHINE_USER_EMAIL="" -e MACHINE_USER_PASSWORD="" navigator-data-ingest --pipeline-bucket ${{ env.INGEST_PIPELINE_BUCKET }} --document-bucket ${{ env.INGEST_DOCUMENT_BUCKET }} --updates-file-name ${{ env.UPDATES_FILE_NAME }} --output-prefix ${{ env.INGEST_OUTPUT_PREFIX }} --embeddings-input-prefix ${{ env.EMBEDDINGS_INPUT_PREFIX }} --indexer-input-prefix ${{ env.INDEXER_INPUT_PREFIX }} --execution-id ${{ env.EXECUTION_DATA_ID }} --execution-data-prefix ${{ env.EXECUTION_DATA_PREFIX }}
- name: Running Integration Tests on the Ingest Stage Output 🚀
run: |
poetry run python -m pytest -vvv integration_tests/ -m integration
env:
INGEST_PIPELINE_BUCKET: ${{ env.INGEST_PIPELINE_BUCKET }}
INGEST_OUTPUT_PREFIX: ${{ env.INGEST_OUTPUT_PREFIX }}
EMBEDDINGS_INPUT_PREFIX: ${{ env.EMBEDDINGS_INPUT_PREFIX }}
INDEXER_INPUT_PREFIX: ${{ env.INDEXER_INPUT_PREFIX }}
DOCUMENT_NAME_KEY: ${{ env.DOCUMENT_NAME_KEY }}
PARSER_INPUT_EXPECTED_DATA_FILE_PATH: ${{ env.PARSER_INPUT_EXPECTED_DATA_FILE_PATH }}
TEST_DATA_FILE_PATH: ${{ env.TEST_DATA_FILE_PATH }}
AWS_ACCESS_KEY_ID: ${{ secrets.UNIT_TESTS_AWS_ACCESS_KEY_ID }}
AWS_DEFAULT_REGION: ${{ secrets.UNIT_TESTS_AWS_REGION }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.UNIT_TESTS_AWS_SECRET_ACCESS_KEY }}
- name: Destroying Infrastructure
if: always()
run: |
poetry run python -m integration_tests.remove_test_buckets ${{ env.INGEST_DOCUMENT_BUCKET }} ${{ env.INGEST_PIPELINE_BUCKET }} ${{ secrets.UNIT_TESTS_AWS_REGION }}