From 901395f50e0aa5c73f2834e2202f31fd8123f2a8 Mon Sep 17 00:00:00 2001 From: tobias-watzel <136797078+tobias-watzel@users.noreply.github.com> Date: Fri, 1 Sep 2023 16:31:36 +0200 Subject: [PATCH] Feature/2023.07 os test refactor test cases (#21) * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Delete data_extractor/data/TEST directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Delete data_extractor/data/TEST/interim/rb/work directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Delete DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Delete requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add a default folder for the models Signed-off-by: ktj8l89 * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Added s3-communication.py file as a wrapper to communicate with s3 buckets Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update DOCKER_TEST file for s3 communication Signed-off-by: ktj8l89 * Update requirements.txt in TEST_SETUP Signed-off-by: ktj8l89 * Adding user-friendly shell /bin/bash Signed-off-by: ktj8l89 * Adding VS Code folders for remote access Signed-off-by: ktj8l89 * Update train_on_pdf.py First step to s3 connection of main_terminal Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Create s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update kpi_mapping.py root folder has access right issues and hence we have to change to another folder Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Updated curation step for S3 Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update Dockerfile had to change mode of code folder to ensure accessibility Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update extraction_server.py Added create directory to empty "folders" Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Init pytest and creating test cases for upcoming refactoring Signed-off-by: ktj8l89 * Ongoing creation of test cases Signed-off-by: ktj8l89 * Ongoing creation of test cases Signed-off-by: ktj8l89 * Ongoing creation and modification of test cases Signed-off-by: ktj8l89 * Ongoing creation of test cases Signed-off-by: ktj8l89 * Reset branch to restore old status Signed-off-by: ktj8l89 * Ongoing test cases for save_train_info Signed-off-by: ktj8l89 * Adding test cases for save_train_info Signed-off-by: ktj8l89 * Finished added test cases for save_train_info Signed-off-by: ktj8l89 * Adding test cases for run_router function Signed-off-by: ktj8l89 * Backup Signed-off-by: ktj8l89 * Finalizing test cases for train_on_pdf Signed-off-by: ktj8l89 * Finishing tests for run_router function Signed-off-by: ktj8l89 * Adding first test cases for train_on_pdf script Signed-off-by: ktj8l89 * Ongoing test creation for train_on_pdf function Signed-off-by: ktj8l89 * Ongoing creation of tests Signed-off-by: ktj8l89 * Ongoing test creation Signed-off-by: ktj8l89 * Ongoing creation of test cases Signed-off-by: ktj8l89 * Ongoing test creation Signed-off-by: ktj8l89 * Finishing test cases for train_on_pdf script Signed-off-by: ktj8l89 * Adding missing file in test folder Signed-off-by: ktj8l89 * Create .gitignore and protect credentials from being leaked Signed-off-by: Heather Ackenhusen <90428947+HeatherAck@users.noreply.github.com> Signed-off-by: ktj8l89 * Update README.md with status Signed-off-by: Heather Ackenhusen <90428947+HeatherAck@users.noreply.github.com> Signed-off-by: ktj8l89 * Update train_on_pdf.py Turn back wrong commit Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update inference_server.py Undo previous push (wrong branch) Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Feature/2023.04 os test (#12) * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete data_extractor/data/TEST directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete data_extractor/data/TEST/interim/rb/work directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add a default folder for the models * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Added s3-communication.py file as a wrapper to communicate with s3 buckets Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST file for s3 communication * Update requirements.txt in TEST_SETUP * Adding user-friendly shell /bin/bash * Adding VS Code folders for remote access * Update train_on_pdf.py First step to s3 connection of main_terminal Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Create s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update kpi_mapping.py root folder has access right issues and hence we have to change to another folder Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Updated curation step for S3 Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile had to change mode of code folder to ensure accessibility Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Added create directory to empty "folders" Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Add s3 communication to model docker Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Add folder data and models to the models docker Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update config_farm_train.py Adjust file path to be more generic for training folder Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py First step to integrate S3 to train_relevance Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update kpi_mapping.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update kpi_mapping.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Co-authored-by: ktj8l89 Signed-off-by: ktj8l89 * Feature/2023.04 os test (#14) * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete data_extractor/data/TEST directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete data_extractor/data/TEST/interim/rb/work directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add a default folder for the models * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Added s3-communication.py file as a wrapper to communicate with s3 buckets Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST file for s3 communication * Update requirements.txt in TEST_SETUP * Adding user-friendly shell /bin/bash * Adding VS Code folders for remote access * Update train_on_pdf.py First step to s3 connection of main_terminal Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Create s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update kpi_mapping.py root folder has access right issues and hence we have to change to another folder Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Updated curation step for S3 Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile had to change mode of code folder to ensure accessibility Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Added create directory to empty "folders" Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Add s3 communication to model docker Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Add folder data and models to the models docker Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update config_farm_train.py Adjust file path to be more generic for training folder Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py First step to integrate S3 to train_relevance Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update kpi_mapping.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update kpi_mapping.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Feature/2023.07 os test refactor test cases (#13) * Init pytest and creating test cases for upcoming refactoring * Ongoing creation of test cases * Ongoing creation of test cases * Ongoing creation and modification of test cases * Ongoing creation of test cases --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Co-authored-by: ktj8l89 --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Co-authored-by: ktj8l89 Signed-off-by: ktj8l89 * Feature/2023.04 os test (#16) * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Delete data_extractor/data/TEST directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Delete data_extractor/data/TEST/interim/rb/work directory Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Delete DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Delete requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Delete entry.sh Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Create requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add a default folder for the models Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Add files via upload Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Added s3-communication.py file as a wrapper to communicate with s3 buckets Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST file for s3 communication Signed-off-by: VEIY82L * Update requirements.txt in TEST_SETUP Signed-off-by: VEIY82L * Adding user-friendly shell /bin/bash Signed-off-by: VEIY82L * Adding VS Code folders for remote access Signed-off-by: VEIY82L * Update train_on_pdf.py First step to s3 connection of main_terminal Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Create s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update s3_settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update kpi_mapping.py root folder has access right issues and hence we have to change to another folder Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Updated curation step for S3 Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile had to change mode of code folder to ensure accessibility Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Added create directory to empty "folders" Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Add s3 communication to model docker Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Add folder data and models to the models docker Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update config_farm_train.py Adjust file path to be more generic for training folder Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py First step to integrate S3 to train_relevance Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update kpi_mapping.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update kpi_mapping.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update extraction_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update settings.yaml Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update inference_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update train_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update DOCKER_TEST Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update requirements.txt Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update rb_server.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update infer_on_pdf.py Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Feature/2023.07 os test refactor test cases (#13) * Init pytest and creating test cases for upcoming refactoring * Ongoing creation of test cases * Ongoing creation of test cases * Ongoing creation and modification of test cases * Ongoing creation of test cases --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Co-authored-by: ktj8l89 Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Update Dockerfile Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L * Added a coordinator docker and smaller changes Signed-off-by: VEIY82L * Extraction Dockerfile adjusted Signed-off-by: VEIY82L * Extraction Dockerfile adjusted Signed-off-by: VEIY82L * Extraction Dockerfile adjusted Signed-off-by: VEIY82L * Coordinator Dockerfile adjusted Signed-off-by: VEIY82L * Pyyaml added to requirements_coordinator Signed-off-by: VEIY82L * Pandas added to requirements_coordinator Signed-off-by: VEIY82L * boto3 added to requirements_coordinator Signed-off-by: VEIY82L * Openpyxl added to requirements_coordinator Signed-off-by: VEIY82L * Added folder permissions Signed-off-by: VEIY82L * Missing folder in infer_on_pdf.py Signed-off-by: VEIY82L * Update the visitor container and some small improvements Signed-off-by: VEIY82L * Path change in docker_visitor Signed-off-by: VEIY82L * Typo in filename Signed-off-by: VEIY82L * PyYaml missing in requirements file Signed-off-by: VEIY82L * Refactoring and small changes Signed-off-by: VEIY82L * Added different default runs for main and test branch Signed-off-by: VEIY82L * Small issues Signed-off-by: VEIY82L * Create directory and Small adjustments for PEP Signed-off-by: VEIY82L * Changed the import settings for the visitor and coordinator Signed-off-by: VEIY82L * Updated Docker-File Signed-off-by: VEIY82L --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L Co-authored-by: ktj8l89 Signed-off-by: ktj8l89 * Feature/2023.04 os test (#16) (#17) * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update entry.sh * Update Dockerfile * Update Dockerfile * Update requirements.txt * Update requirements.txt * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update requirements.txt * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update entry.sh * Add files via upload * Update Dockerfile * Update Dockerfile * Update Dockerfile * Delete entry.sh * Update Dockerfile * Update entry.sh * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update entry.sh * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Add files via upload * Add files via upload * Delete data_extractor/data/TEST directory * Add files via upload * Delete data_extractor/data/TEST/interim/rb/work directory * Add files via upload * Delete DOCKER_TEST * Add files via upload * Update train_on_pdf.py * Update settings.yaml * Update infer_on_pdf.py * Update settings.yaml * Create requirements.txt * Update DOCKER_TEST * Update entry.sh * Update DOCKER_TEST * Update DOCKER_TEST * Delete requirements.txt * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Delete entry.sh * Add files via upload * Update train_on_pdf.py * Update settings.yaml * Update settings.yaml * Update settings.yaml * Update DOCKER_TEST * Create requirements.txt * Update requirements.txt * Update DOCKER_TEST * Update requirements.txt * Update requirements.txt * Update infer_on_pdf.py * Update requirements.txt * Update requirements.txt * Update DOCKER_TEST * Update DOCKER_TEST * Add a default folder for the models * Update DOCKER_TEST * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Add files via upload * Update Dockerfile Added s3-communication.py file as a wrapper to communicate with s3 buckets * Update DOCKER_TEST file for s3 communication * Update requirements.txt in TEST_SETUP * Adding user-friendly shell /bin/bash * Adding VS Code folders for remote access * Update train_on_pdf.py First step to s3 connection of main_terminal * Update train_on_pdf.py * Update train_on_pdf.py * Update train_on_pdf.py * Create s3_settings.yaml * Update s3_settings.yaml * Update train_on_pdf.py * Update Dockerfile * Update train_on_pdf.py * Update train_on_pdf.py * Update extraction_server.py * Update train_on_pdf.py * Update extraction_server.py * Update train_on_pdf.py * Update extraction_server.py * Update train_on_pdf.py * Update extraction_server.py * Update Dockerfile * Update extraction_server.py * Update s3_settings.yaml * Update extraction_server.py * Update kpi_mapping.py root folder has access right issues and hence we have to change to another folder * Update extraction_server.py Updated curation step for S3 * Update Dockerfile had to change mode of code folder to ensure accessibility * Update extraction_server.py Added create directory to empty "folders" * Update Dockerfile Add s3 communication to model docker * Update Dockerfile Add folder data and models to the models docker * Update config_farm_train.py Adjust file path to be more generic for training folder * Update inference_server.py First step to integrate S3 to train_relevance * Update Dockerfile * Update requirements.txt * Update Dockerfile * Update inference_server.py * Update settings.yaml * Update settings.yaml * Update inference_server.py * Update kpi_mapping.py * Update inference_server.py * Update inference_server.py * Update train_on_pdf.py * Update kpi_mapping.py * Update inference_server.py * Update inference_server.py * Update inference_server.py * Update settings.yaml * Update settings.yaml * Update inference_server.py * Update extraction_server.py * Update extraction_server.py * Update train_on_pdf.py * Update train_on_pdf.py * Update infer_on_pdf.py * Update infer_on_pdf.py * Update infer_on_pdf.py * Update inference_server.py * Update settings.yaml * Update inference_server.py * Update infer_on_pdf.py * Update infer_on_pdf.py * Update inference_server.py * Update infer_on_pdf.py * Update train_on_pdf.py * Update infer_on_pdf.py * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update rb_server.py * Update infer_on_pdf.py * Update DOCKER_TEST * Update requirements.txt * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update DOCKER_TEST * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update Dockerfile * Update rb_server.py * Update rb_server.py * Update infer_on_pdf.py * Update rb_server.py * Update Dockerfile * Update rb_server.py * Update rb_server.py * Update infer_on_pdf.py * Update rb_server.py * Update infer_on_pdf.py * Feature/2023.07 os test refactor test cases (#13) * Init pytest and creating test cases for upcoming refactoring * Ongoing creation of test cases * Ongoing creation of test cases * Ongoing creation and modification of test cases * Ongoing creation of test cases --------- * Update Dockerfile * Update Dockerfile * Added a coordinator docker and smaller changes * Extraction Dockerfile adjusted * Extraction Dockerfile adjusted * Extraction Dockerfile adjusted * Coordinator Dockerfile adjusted * Pyyaml added to requirements_coordinator * Pandas added to requirements_coordinator * boto3 added to requirements_coordinator * Openpyxl added to requirements_coordinator * Added folder permissions * Missing folder in infer_on_pdf.py * Update the visitor container and some small improvements * Path change in docker_visitor * Typo in filename * PyYaml missing in requirements file * Refactoring and small changes * Added different default runs for main and test branch * Small issues * Create directory and Small adjustments for PEP * Changed the import settings for the visitor and coordinator * Updated Docker-File --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: VEIY82L Co-authored-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Feature/2023.08 os test (#19) * Update Dockerfile Newest cython 3.0.0 creates issues w.r.t. pycocotools and hence we fix latest working version pip install cython==0.29.36. Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile mmdetection only used for table detection which is not used in the moment. Hence we can delete it for a first test run. Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile Added s3 communication and user rights Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> * Update Dockerfile /app/data created and rights extended Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Update infer_on_pdf.py Infer on pdf had the wrong mode Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 * Modifying tests for taking the last updates on train_on_pdf.py into account Signed-off-by: ktj8l89 * Ongoing modification of test cases Signed-off-by: ktj8l89 * Adapted the tests of the function generate_text_3434 Signed-off-by: ktj8l89 * Adapting tests for save_train_info function Signed-off-by: ktj8l89 * Onging test adaption for train_on_pdf script... Signed-off-by: ktj8l89 * Modifying test cases for save_train_info function Signed-off-by: ktj8l89 * Finishing adaptation of test cases for train_on_pdf script Signed-off-by: ktj8l89 * Solving minor issues Signed-off-by: ktj8l89 * Some cosmetics and consistency changes Signed-off-by: ktj8l89 * Minor changes for better readability Signed-off-by: ktj8l89 * Some cleanup and finishing tests Signed-off-by: ktj8l89 * Resolving some conflics Signed-off-by: ktj8l89 * Resolving conflicts Signed-off-by: ktj8l89 * Some minor fixes Signed-off-by: ktj8l89 * Fixing last conflicts Signed-off-by: ktj8l89 --------- Signed-off-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Signed-off-by: ktj8l89 Signed-off-by: Heather Ackenhusen <90428947+HeatherAck@users.noreply.github.com> Signed-off-by: VEIY82L Co-authored-by: DaBeIDS <90916810+DaBeIDS@users.noreply.github.com> Co-authored-by: VEIY82L Co-authored-by: Tobias Co-authored-by: Heather Ackenhusen <90428947+HeatherAck@users.noreply.github.com> --- data_extractor/code/infer_on_pdf.py | 2 +- data_extractor/code/tests/pytest.ini | 2 +- .../tests/root_testing/data/s3_settings.yaml | 13 + .../input/pdfs/training/xlsx_file.xlsx | Bin 0 -> 4993 bytes .../ml/annotations/aggregated_annotation.csv | 7 + .../code/tests/test_train_on_pdf.py | 448 ++++++++++++++++++ .../test_utils/test_convert_xls_to_csv.py | 157 +++--- .../test_copy_file_without_overwrite.py | 43 +- .../tests/test_utils/test_create_directory.py | 8 +- .../tests/test_utils/test_generate_text.py | 170 ++++++- .../code/tests/test_utils/test_link_files.py | 65 ++- .../code/tests/test_utils/test_run_router.py | 303 ++++++++++++ .../code/tests/test_utils/test_running.py | 11 +- .../tests/test_utils/test_save_train_info.py | 129 ++++- data_extractor/code/tests/utils_test.py | 19 +- 15 files changed, 1192 insertions(+), 185 deletions(-) create mode 100644 data_extractor/code/tests/root_testing/data/s3_settings.yaml create mode 100644 data_extractor/code/tests/root_testing/input/pdfs/training/xlsx_file.xlsx create mode 100644 data_extractor/code/tests/root_testing/interim/ml/annotations/aggregated_annotation.csv create mode 100644 data_extractor/code/tests/test_train_on_pdf.py create mode 100644 data_extractor/code/tests/test_utils/test_run_router.py diff --git a/data_extractor/code/infer_on_pdf.py b/data_extractor/code/infer_on_pdf.py index 5ba56f4..089a7a8 100644 --- a/data_extractor/code/infer_on_pdf.py +++ b/data_extractor/code/infer_on_pdf.py @@ -579,4 +579,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/data_extractor/code/tests/pytest.ini b/data_extractor/code/tests/pytest.ini index bc3d61b..71e06a7 100644 --- a/data_extractor/code/tests/pytest.ini +++ b/data_extractor/code/tests/pytest.ini @@ -1,2 +1,2 @@ [pytest] -addopts = -s --tb=auto \ No newline at end of file +#addopts = -s --tb=auto diff --git a/data_extractor/code/tests/root_testing/data/s3_settings.yaml b/data_extractor/code/tests/root_testing/data/s3_settings.yaml new file mode 100644 index 0000000..f2c8ba8 --- /dev/null +++ b/data_extractor/code/tests/root_testing/data/s3_settings.yaml @@ -0,0 +1,13 @@ +# global variables in the docker/pod for the s3 connection where the input and output will be stored +main_bucket: + s3_endpoint: LANDING_AWS_ENDPOINT + s3_access_key: LANDING_AWS_ACCESS_KEY + s3_secret_key: LANDING_AWS_SECRET_KEY + s3_bucket_name: LANDING_AWS_BUCKET_NAME +interim_bucket: + s3_endpoint: INTERIM_AWS_ENDPOINT + s3_access_key: INTERIM_AWS_ACCESS_KEY + s3_secret_key: INTERIM_AWS_SECRET_KEY + s3_bucket_name: INTERIM_AWS_BUCKET_NAME +# variables necessary to find the files in s3 bucket +prefix: corporate_data_extraction_projects diff --git a/data_extractor/code/tests/root_testing/input/pdfs/training/xlsx_file.xlsx b/data_extractor/code/tests/root_testing/input/pdfs/training/xlsx_file.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..777c5553a9122ee779b68a8b77ad5c421ad47d9d GIT binary patch literal 4993 zcmZ`-1yoeq_a0J#Q5t0EG#I)|i8qv_)WAs32%|Iz1JY6wlG30wNIwxN>2N^0Q=~_w zWoY=1y5921|NZV=_uO^Yy5GL%e0T3}@3VF85)je=0D#MYl*mIvWs??QAnvIWcTwOj zxTCd>i=&gPfTfcYKg!-t^SLHjt02W+=jxbQ2ZJp(A-0j6dGr%i3Go=Xl6&xpw_Z#1 zwCtQGCP zX(!Cz-b$}BHHaecAg{)szHL&9I%fz04wE{`M{q(4dqBxho}*#e z?@mNu!B&e!54=6~kE3@gYYD!kr{@^!4U4|eOLIkrg61+=%y;U78933pmPfQ#3gWx? z>NR}rH#a=UHAD@D=|=4b-RkQW*F!w^ju(#FHgDNwh(FnljuHsW4mI~+Zv_0wPRjjs zP6}cGApJT3K#gMuWhdZfi?B!h8ijuFvu*-&osy*S-7FtK*iI3tEgR(?$cE% zLF-zq!IaP_J5P9QkYAIEI7Lt5mGwByx-?lrFy^=qqO!Us>AAbXZ?zT@mav1em=u^E ztA8gjH*X}&MX4ajP*A?rOq!=*_SOUmXp{6oQ&Y9g1V&uGi=ycnyFU*D32=^mshXZO z3_{vvT~mQu_?Urh; z?dvZiyQkbl?LqU4+bGHEo}maQCc=5$`pnjh?{-bO3M5rI$N~5ScB(8k|8|+Y>7-FG?cqlrv%}Ayb(RjGt~73IhqjeH>nt8{`-b@7ei;OsJ!^qi;DK` zvivX`0P5sE_Hl?H(d_2Y;{?zobN#Hs{VnK*)HBMn&JMycKLqTl1hRL`l%MV;)uVGoyfS>G`yM6uA&c<}*6Y*y z<4;`ImR=Ubk83LSIW>SuwvRbsmZ8@jWT;)FnQRoj`?1*AJZ|Cs%|p_!TCz5*hKUuX z{ryXskuIF|vskDPR$Ik^bG)ee4aazbG8g3K2fn%`O3AlsSA?nWT+RqdeyGEQpGzW? zm=$tR)mcJ0Pwn%r@$!dHQDOVVj;V4+Sx~j9paTkJ&_cTFitC`=@ufWFL99W;OimJ% z7BA`AC(+kp;EplWi{|apbXjINDQFsk8K9`K<&2t|Rr> zXyhBt0ym!CH4#vYayjGUP}?C{uQQ^_N!-EtYbw=c%Qwl)Q?! ztr)~XE^p}qs-261EKI062T@u>Rg@>jC1UZnEk1nfjeXoV7JHR4&P)RLZeK(UWfMz? zqZ^)>K9r*da$_Tys5;;%79gMK(rJPh_lJ`M|4`03x z6}?@^B1}rOUCVNN*$YJ~k=WUwQx^CH?|g7dqP6%SuvyBF-={xsv-Z1O*3df%P9K|= z;Rx8AvIDIBxJ+njT$KNYwUJBjhc5l{_?r__FPp!e+TPCGUO$@T3Mx-E=HsM`xf}3C zfve^uqI>IBJINXx#!b?rKa7wFr^-}hbinBCslOOsF0Z{&BMV!?lxMl!2KOJzny*&% zl8Gq2bGiGaI-&6`kEHtd%8yQ3EImLO8`(JKYL8dUV#ioh_a)9eLA@lU8jxZmfqh-Z zjI?BCa%?T>@VywvwnIz@3bY_KNm%QXY-GPWF*ta<+M-*LvniX%+!eNy&AyQplHA{> z6zciJJlW<&r;aF%y3qanhw_^q4E7xNq*{^5)HcQ4bBM|o1Ll4M*iJ>`L%Awit^PCP(ho0z zlv;FTRI1hq^!e)9y9|1btGsIM29iw>k!q@>YYmGlUCvs1HB|1^xm})`ObbWL+ExpW(!~H19qs3z9)CsLZzAUq9{@qlm%ojVmc;pZ-qpFea z*+Gft2Epfi!9+|)FTA9)5f?*Djn|ui{%Z?A5T4KEpL|62eg{~4leVw)wv-1nRCcFYj#7^(BxVahs&?9pE2pnJ^q%RB~rSDdH)*>h3qBNn;y<0ZLLl!)}px;{$ zYUHuF+veqv!FT<9+H6Fwie656Hz#GD)YZJ9_~+BY}Hs-u6@XzE>rbnJAf_iWNbPYJ39~b9w|1-gY#0}xqHc*HICRi682rx zJetO;G{3U6b4m#4=?~onW<8Mn6beZNiHt^+NJWu4DvZi6-0AQB@0J!!mfp|E&JaV-~AlzI9e%^j& zk;n1V4%33b5c7@!0{!RF5KBf|6X0}*|ErGbN8I_=S#is18V!x7Tm`xYjJ1LqJJ;{T z1?p?6H>I!ua{TTA^LgYJ4&;lZzO$v&>Tv=MF$syl(&FfmNDx$pUQHohlD`n{&88{V ztbZR}hezJ5mU-Dkhk{t&I+j!yJ^5|Dm~fq{WbS6LQP-@~d0*Q3 zpFeeeLr0|uS1-|#0RUHi{Zv;sPdkL`k9uI%a5{Wikm8i~GkIZra%xR`fH=jCDum^J zR=GO33*XoUA;opPml4af`gdyHx;3>pOYdNcNGii;)4h$7c2~|I>jFbeqXuZTP8&J3 z$vC)k_7oFDoGV_vd^HgpgUz6A_!2wv+D#~J8QGJkHO4DfH9fXx=SaNX4NuJdYGD6; z^o?pDLG_Md1*Z|>@6YNRCEuTT@{UT?(63D}BEYUO;Wy%?ypF~J{cWUgm=>$ z-U#|MTXRah70n{9nztcB(B(Diyc#qLgQA{$GzLuAHl@p`u3H>>HXbD3Cpp98kg?{q z+=K@{dVeq5UA|&xV177-A2J{{9H^`2E0}uB#2*Rc*)8Rz9+$@mSh8eL><-crvb>LP zx0PRck~Jc-ab&EG>1RLH>s5T&WH@0JGTM;rv@!^^Y{g&vq)b-n*vfx)6Z~~)uttC& zUFCSMyhU;w4&l@G^?7~W=hjhr*j3-T$6rp2(N7&9lE;SQ}%c9*l{?ld(zX6&1ned)k&t7iHvzEFH|h{b&lZsGss zV0-u2-|qDKbDebm4$Af$%(n=_si6sd=WdzYo|*o-wR|}r;+DW_S{5BKQRnxhowe{@ z9;iid^z5X+UVA{@YoRpC*S{^m64iZ$A>rz+Q`1t6!!Y&E8Iz8jkE3E?{7}4VMnahw z$95fi&W(lHe76{d#{sF#Bhg>yH9Dk>Lrn8ravTJsXo!O6!JRgu{m;HE9YpPi)^Qsf z(l`@VqKJ)^*-?zhpl*Qcs~GU^W=rV4u9v5F-uQXY_(h>g3a@8s#`!We&aZK94KQ~> z*trVu{}?mkY8~1Hf%1MG1DS%C2H3+y5ZiQwhj@nBTWNvjt(X~^Gdq|Vf8a&l%?r3CRtYX;+m_;mfNdB*BYo!Vk@NId1h6nH7!|4iK zbB)68k7wk`O$%r%IQo}MYd9++Rqu*i-Rd3k6W-qiib0muIbXm)sNv-^_j$F!EDk7L zDg-^8&79}Q+=Y!9#*|xgRC&SH4xMfFNuSSHb4NN`Rh@|iqRGGcUMT57b&SP=I7Q~f zEtLFM@cvL_D@VsiKf*UkcTl}ep5o+%6RX9&!N5FB1cWb)j@PG&EA0Vh)_!8TYO35L zo&tQZFg(`>Zu>a1qu~t>dyY=>U|-3nMA9QcJ;V)B5+jN_`8-%dw`C;|j2PBIEZ=a~ z@r_-kS42xmV*FU*@Yi5Mbp=vVY%|MeCci^pO|5H{@}WB!X`k1dxmXCi8suN3edMal z+9eY*yAk%dJ`Pfnew%2mm$oz>4zH?rxs)Ffcf)(WZ31#(RpI=7i-Ot@ z=}Q)jLx>xO-an*>yHTewAp+0bNXbr@JyUXfJd5;}JpEg8i(Ih5bgc*izo||OTGD8> ztrl;&x6q@Jm|ZB$5PmDs$%JhI&1R1@^r_I+s#^7=m)M7t^)8Z>eF8e6+|DlKbVQ72 z90P8F{kWX|V^auIa&&M*IJlYXdpaRpO@DY%X}qRZn;->lbqE+YKN$KdpV5NM)Ctd8 z8&rH2Z(}kvEuuB0{)&n=ShEB**U+DtR5BM$S+SO}H<7@s>rI1ia7oLGnFCc;#V)0G zl3LB7{x%*)vEQ@&RHua+_4>w>gs-C_RW6Q2D2ykg=9Xqkvz4)IRO`ow>I^wz>Qv($ zEnis^zWh~K>pEaz!@&>L2`>)1f5`0g<%x{Ey0TJb>@%}G>lbUK^3o>FUQ-L*3`)}= zBI&~Si-{m>{Gv(%Hk+@5ZbF(onSzo1^^+$p4@f_BYm!n*RTBDjWpJWr4d9%r@ceQV z``>%tjuo_EPa9)<7f*pmiKPH5=t}>=lTP|`#=dRJ){Uk+l?`KBAitG}&v(rHa7^Oi z(*XW=y97t)&%+xV!%i@56omKlb_;4P315 z{xMJ;Kz{kBf&Z-aE?T)*PW)rVA8(1|r z{_lXj2)-EQzrkl$e}ex<%wM!~F_3=SS;Q&xe}t9JT_RjF1^^(zowsrN&G(~Y1^7R? CE4V}e literal 0 HcmV?d00001 diff --git a/data_extractor/code/tests/root_testing/interim/ml/annotations/aggregated_annotation.csv b/data_extractor/code/tests/root_testing/interim/ml/annotations/aggregated_annotation.csv new file mode 100644 index 0000000..61c6127 --- /dev/null +++ b/data_extractor/code/tests/root_testing/interim/ml/annotations/aggregated_annotation.csv @@ -0,0 +1,7 @@ +Unnamed: 0,Data +0,10 +1,20 +2,30 +3,40 +4,50 +5,60 diff --git a/data_extractor/code/tests/test_train_on_pdf.py b/data_extractor/code/tests/test_train_on_pdf.py new file mode 100644 index 0000000..e21590b --- /dev/null +++ b/data_extractor/code/tests/test_train_on_pdf.py @@ -0,0 +1,448 @@ +from pathlib import Path +import pytest +from unittest.mock import patch, Mock, MagicMock +import shutil +import train_on_pdf +import requests +import requests_mock +import config_path +import sys +import yaml +import traceback +from tests.utils_test import modify_project_settings +from tests.test_utils.test_running import prerequisite_running + +# types +import typing +from _pytest.fixtures import FixtureRequest +from _pytest.capture import CaptureFixture + + +@pytest.fixture(params=[()], autouse=True) +def prerequisite_train_on_pdf_try_run( + request: FixtureRequest, + path_folder_root_testing: Path, + path_folder_temporary: Path, + prerequisite_running + ) -> None: + """Defines a fixture for the train_on_pdf script + + :param request: Request for parametrization + :param path_folder_root_testing: Requesting the path_folder_root_testing fixture + :type path_folder_root_testing: Path + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + :param prerequisite_running: Requesting the prerequisite_running fixture + :rtype prerequisite_train_on_pdf_try_run: None + """ + mocked_project_settings = { + 's3_usage': False, + 's3_settings': {}, + 'general': + { + 'ext_port': 0, + 'infer_port': 0, + 'ext_ip': '0.0.0.0', + 'infer_ip': '0.0.0.0', + 'delete_interim_files': False + }, + 'train_relevance': + { + 'output_model_name': 'test', + 'train': False + }, + 'train_kpi': + { + 'output_model_name': 'test', + 'train': False + }, + 'extraction': + { + 'use_extractions': False, + 'store_extractions': False + } + } + + mocked_s3_settings = { + 'prefix': 'test_prefix', + 'main_bucket': { + 's3_endpoint': 'S3_END_MAIN', + 's3_access_key': 'S3_ACCESS_MAIN', + 's3_secret_key': 'S3_SECRET_MAIN', + 's3_bucket_name': 'S3_NAME_MAIN' + }, + 'interim_bucket': { + 's3_endpoint': 'S3_END_INTERIM', + 's3_access_key': 'S3_ACCESS_INTERIM', + 's3_secret_key': 'S3_SECRET_INTERIM', + 's3_bucket_name': 'S3_NAME_INTERIM' + } + } + project_name = 'TEST' + path_folder_data = path_folder_temporary / 'data' + path_folder_models = path_folder_temporary / 'models' + Path(path_folder_data / project_name).mkdir(parents=True, exist_ok=True) + path_folder_models.mkdir(parents=True, exist_ok=True) + + # copy settings files to temporary folder + path_file_settings_root_testing = path_folder_root_testing / 'data' / project_name / 'settings.yaml' + path_file_settings_temporary = path_folder_temporary / 'data' / project_name / 'settings.yaml' + + path_file_settings_s3_root_testing = path_folder_root_testing / 'data' / 's3_settings.yaml' + path_file_settings_s3_temporary = path_folder_temporary / 'data' / 's3_settings.yaml' + + shutil.copy(path_file_settings_root_testing, path_file_settings_temporary) + shutil.copy(path_file_settings_s3_root_testing, path_file_settings_s3_temporary) + + def return_project_settings(*args: typing.List[Mock]): + """Helper function for choosing the right settings file + + :return: Project or S3 Settings file + :rtype: typing.Dict[str] + """ + if 's3' in args[0].name: + return mocked_s3_settings + else: + return mocked_project_settings + + # modifying the project settings file via parametrization + mocked_project_settings = modify_project_settings(mocked_project_settings, request.param) + + with ( + patch('train_on_pdf.argparse.ArgumentParser.parse_args', Mock()) as mocked_argpase, + patch('train_on_pdf.config_path', Mock()) as mocked_config_path, + patch('train_on_pdf.yaml', Mock()) as mocked_yaml, + patch('train_on_pdf.project_settings', mocked_project_settings) + ): + mocked_argpase.return_value.project_name = project_name + mocked_argpase.return_value.s3_usage = 'N' + mocked_config_path.DATA_DIR = str(path_folder_data) + mocked_config_path.MODEL_DIR = str(path_folder_models) + mocked_yaml.safe_load.side_effect = return_project_settings + yield + + # cleanup + shutil.rmtree(path_folder_temporary) + + +def test_train_on_pdf_check_running(capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests if everything is printed when another training is running + + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None]) + """ + with patch('train_on_pdf.check_running'): + return_value = train_on_pdf.main() + + output_cmd, _ = capsys.readouterr() + string_expected = 'Another training or inference process is currently running.' + train_on_pdf.check_running.assert_called_once() + assert return_value is None + assert string_expected in output_cmd + + +@pytest.mark.parametrize('project_name', + [None, + '']) +def test_train_on_pdf_wrong_input_project_name(project_name: typing.Union[str, None], + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests the correct behaviour of wrong given project names + + :param project_name: Project name + :type project_name: typing.Union[str, None] + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None]) + """ + with (patch('train_on_pdf.argparse.ArgumentParser.parse_args', Mock()) as mocked_argpase, + patch('train_on_pdf.input', Mock()) as mocked_input): + mocked_argpase.return_value.project_name = project_name + mocked_input.return_value = project_name + + return_value = train_on_pdf.main() + + output_cmd, _ = capsys.readouterr() + string_expected = 'project name must not be empty' + if project_name is None: + string_call_expected = 'What is the project name? ' + mocked_input.assert_called_once() + mocked_input.assert_called_with(string_call_expected) + assert string_expected in output_cmd + assert return_value is None + + +def test_train_on_pdf_correct_input_project_name(): + """Tests that a correct project name is accepted + """ + with (patch('train_on_pdf.argparse.ArgumentParser.parse_args', Mock()) as mocked_argpase, + patch('train_on_pdf.input', Mock()) as mocked_input): + mocked_argpase.return_value.s3_usage = True + mocked_input.side_effect = lambda: 'TEST' + + train_on_pdf.main() + + assert mocked_input() == 'TEST' + + +@pytest.mark.parametrize('s3_usage', + [None, + 'X']) +def test_train_on_pdf_wrong_input_s3(s3_usage: typing.Union[str, None], + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests the correct behaviour of wrong s3 input is given + + :param s3_usage: S3 usage (yes or no) + :type s3_usage: typing.Union[str, None] + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None]) + """ + with (patch('train_on_pdf.argparse.ArgumentParser.parse_args', Mock()) as mocked_argpase, + patch('train_on_pdf.input', Mock()) as mocked_input): + mocked_argpase.return_value.project_name = 'TEST' + mocked_argpase.return_value.s3_usage = s3_usage + + return_value = train_on_pdf.main() + + output_cmd, _ = capsys.readouterr() + string_expected = 'Answer to S3 usage must by Y or N. Stop program. Please restart.' + if s3_usage is None: + string_call_expected = 'Do you want to use S3? Type either Y or N.' + mocked_input.assert_called_once() + mocked_input.assert_called_with(string_call_expected) + assert string_expected in output_cmd + assert return_value is None + + +@pytest.mark.parametrize('s3_usage', + ['Y', + 'N']) +def test_train_on_pdf_correct_input_s3_usage(s3_usage: typing.Union[str, None]): + """Tests that the correct s3 usage is accepted + + :param s3_usage: S3 usage (yes or no) + :type s3_usage: typing.Union[str, None] + """ + with (patch('train_on_pdf.argparse.ArgumentParser.parse_args', Mock()) as mocked_argpase, + patch('train_on_pdf.input', Mock()) as mocked_input, + patch('train_on_pdf.create_directory', + side_effect=lambda *args: Path(args[0]).mkdir(parents=True, exist_ok=True)), + patch('train_on_pdf.S3Communication', Mock()) as mocked_s3_communication): + mocked_argpase.return_value.project_name = 'TEST' + mocked_argpase.return_value.s3_usage = None + mocked_input.side_effect = lambda *args: s3_usage + + train_on_pdf.main() + + assert mocked_input() == s3_usage + if s3_usage == 'Y': + assert mocked_s3_communication.call_count == 2 + + mocked_s3_communication.return_value.download_file_from_s3.assert_called_once() + + +def test_train_on_pdf_s3_usage(): + """Tests if the s3 usage is correctly performed + + """ + project_name = 'TEST' + + with (patch('train_on_pdf.os.getenv', Mock(side_effect=lambda *args: args[0])), + patch('train_on_pdf.argparse.ArgumentParser.parse_args', Mock()) as mocked_argpase, + patch('train_on_pdf.S3Communication', Mock()) as mocked_s3_communication, + patch('train_on_pdf.create_directory', Mock())): + + mocked_argpase.return_value.project_name = project_name + mocked_argpase.return_value.s3_usage = 'Y' + + train_on_pdf.main() + + mocked_s3_communication.assert_any_call( + s3_endpoint_url='S3_END_MAIN', + aws_access_key_id='S3_ACCESS_MAIN', + aws_secret_access_key='S3_SECRET_MAIN', + s3_bucket='S3_NAME_MAIN' + ) + + mocked_s3_communication.assert_any_call( + s3_endpoint_url='S3_END_INTERIM', + aws_access_key_id='S3_ACCESS_INTERIM', + aws_secret_access_key='S3_SECRET_INTERIM', + s3_bucket='S3_NAME_INTERIM' + ) + + mocked_s3_communication.return_value.download_file_from_s3.assert_called_once() + + +def test_train_on_pdf_folders_default_created(path_folder_temporary: Path): + """Tests of the required default folders are created + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + """ + + paths_folders_expected = [ + r'/interim/ml', + r'/interim/pdfs/', + r'/interim/ml/annotations/', + r'/interim/kpi_mapping/', + r'/interim/ml/extraction/', + r'/interim/ml/training/', + r'/interim/ml/curation/', + r'/output/RELEVANCE/Text'] + + with (patch('train_on_pdf.link_files', Mock()), + patch('train_on_pdf.run_router', side_effect=lambda *args: False), + patch('train_on_pdf.create_directory', Mock()) as mocked_create_directory): + + train_on_pdf.main() + + # we have to combine pathlib object with str path... + path_folder_temporary = path_folder_temporary / 'data' + path_folder_temporary = str(path_folder_temporary) + '/TEST' + for path_current in paths_folders_expected: + path_folder_current = path_folder_temporary + path_current + mocked_create_directory.assert_any_call(str(path_folder_current)) + + +@pytest.mark.parametrize('prerequisite_train_on_pdf_try_run', + [('train_relevance', 'train', True)], + indirect=True) +def test_train_on_pdf_folders_relevance_created(path_folder_temporary: Path): + """Tests of the relevance folder is created + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + """ + + with (patch('train_on_pdf.link_files', Mock()), + patch('train_on_pdf.run_router', side_effect=lambda *args: False), + patch('train_on_pdf.create_directory', Mock()) as mocked_create_directory): + + train_on_pdf.main() + + # we have to combine pathlib object with str path... + path_folder_temporary = path_folder_temporary / 'models' + path_folder_temporary = str(path_folder_temporary) + '/TEST' + path_folder_expected = path_folder_temporary + '/RELEVANCE/Text/test' + mocked_create_directory.assert_any_call(str(path_folder_expected)) + + +@pytest.mark.parametrize('prerequisite_train_on_pdf_try_run', + [('train_kpi', 'train', True)], + indirect=True) +def test_train_on_pdf_folders_kpi_extraction_created(path_folder_temporary: Path): + """Tests of the kpi extraction folder is created + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + """ + with (patch('train_on_pdf.link_files', Mock()), + patch('train_on_pdf.run_router', side_effect=lambda *args: False), + patch('train_on_pdf.create_directory', Mock()) as mocked_create_directory): + + train_on_pdf.main() + + # we have to combine pathlib object with str path... + path_folder_temporary = path_folder_temporary / 'models' + path_folder_temporary = str(path_folder_temporary) + '/TEST' + path_folder_expected = path_folder_temporary + '/KPI_EXTRACTION/Text/test' + mocked_create_directory.assert_any_call(str(path_folder_expected)) + + +@pytest.mark.parametrize('prerequisite_train_on_pdf_try_run', + [('extraction', 'store_extractions', True)], + indirect=True) +def test_train_on_pdf_e2e_store_extractions(path_folder_temporary: Path, + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests of the extraction works properly + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None]) + """ + + with (patch('train_on_pdf.link_files', Mock()), + patch('train_on_pdf.run_router', side_effect=lambda *args: True), + patch('train_on_pdf.save_train_info', Mock()) as mocked_save_train_info, + patch('train_on_pdf.copy_file_without_overwrite', Mock()) as mocked_copy_files, + patch('train_on_pdf.create_directory', Mock())): + mocked_copy_files.return_value = False + + train_on_pdf.main() + + # we have to combine pathlib object with str path... + path_folder_root = path_folder_temporary / 'data' + path_folder_root_source = str(path_folder_root) + '/TEST/interim/ml/extraction/' + path_folder_root_destination = str(path_folder_root) + '/TEST/output/TEXT_EXTRACTION' + output_cmd, _ = capsys.readouterr() + + assert 'Finally we transfer the text extraction to the output folder\n' in output_cmd + mocked_copy_files.assert_called_with(path_folder_root_source, path_folder_root_destination) + + +@pytest.mark.parametrize('prerequisite_train_on_pdf_try_run', + [('general', 'delete_interim_files', True)], + indirect=True) +def test_train_on_pdf_e2e_delete_interim_files(path_folder_root_testing: Path): + """Tests if interim files are getting deleted + + :param path_folder_root_testing: Requesting the path_folder_root_testing fixture + :type path_folder_root_testing: Path + """ + + # define the folders for getting checked + paths_folders_expected = [ + r'interim/pdfs/', + r'interim/kpi_mapping/', + r'interim/ml/annotations/', + r'interim/ml/extraction/', + r'interim/ml/training/', + r'interim/ml/curation/', + ] + + with (patch('train_on_pdf.link_files', Mock()), + patch('train_on_pdf.run_router', side_effect=lambda *args: True), + patch('train_on_pdf.save_train_info', Mock()) as mocked_save_train_info, + patch('train_on_pdf.create_directory', Mock())): + + train_on_pdf.main() + + # we have to combine pathlib object with str path... + path_folder_root_testing = path_folder_root_testing / 'data' / 'TEST' + for path_current in paths_folders_expected: + path_folder_current = path_folder_root_testing / path_current + assert not any(path_folder_current.iterdir()) + + +def test_train_on_pdf_e2e_save_train_info(capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests if the train info of this run is saved + + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None] + """ + with (patch('train_on_pdf.link_files', Mock()), + patch('train_on_pdf.run_router', side_effect=lambda *args: True), + patch('train_on_pdf.save_train_info', Mock()) as mocked_save_train_info, + patch('train_on_pdf.create_directory', Mock())): + train_on_pdf.main() + + mocked_save_train_info.assert_called_once() + output_cmd, _ = capsys.readouterr() + assert output_cmd == "End-to-end inference complete\n" + + +def test_train_on_pdf_process_failed(capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests for cmd output if exception is raised + + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None] + """ + with (patch('train_on_pdf.link_files', Mock()), + patch('train_on_pdf.run_router', side_effect=lambda *args: False), + patch('train_on_pdf.link_files', side_effect=ValueError()), + patch('train_on_pdf.create_directory', lambda *args: Path(args[0]).mkdir(exist_ok=True))): + + train_on_pdf.main() + + output_cmd, _ = capsys.readouterr() + assert "Process failed to run. Reason: " in output_cmd \ No newline at end of file diff --git a/data_extractor/code/tests/test_utils/test_convert_xls_to_csv.py b/data_extractor/code/tests/test_utils/test_convert_xls_to_csv.py index a5c37dc..ae65cee 100644 --- a/data_extractor/code/tests/test_utils/test_convert_xls_to_csv.py +++ b/data_extractor/code/tests/test_utils/test_convert_xls_to_csv.py @@ -9,48 +9,23 @@ import pandas as pd -def mock_s3_download_single_file(*args, **kwargs): - """Mock the download_files_in_prefix_to_dir method of the S3Communication object - by creating a single xlsx file in path_folder_download_source""" - return create_single_xlsx_file(Path(args[1])) - -def mock_s3_download_multiple_files(*args, **kwargs): - """Mock the download_files_in_prefix_to_dir method of the S3Communication object - by creating a single xlsx file in path_folder_download_source""" - return create_multiple_xlsx_files(Path(args[1])) - - -def mock_s3_upload_files(*args, **kwargs): - """Mock the upload_files_in_dir_to_prefix method of the S3Communication object - by creating a dummy function - """ - def mock_s3_upload_files_return(*args, **kwargs): - pass - - return mock_s3_upload_files_return(*args, **kwargs) - -@pytest.fixture -def prerequisites_convert_xls_to_csv(path_folder_temporary: Path): +@pytest.fixture(autouse=True) +def prerequisites_convert_xls_to_csv(path_folder_temporary: Path) -> None: """Defines a fixture for mocking all required objects, methods and functions - :param path_folder_temporary: Requesting the temporary folder fixture + :param path_folder_temporary: Requesting the path_folder_temporary fixture :type path_folder_temporary: Path + :rtype: None """ path_source_annotation = path_folder_temporary / 'input' / 'pdfs' / 'training' path_destination_annotation = path_folder_temporary / 'interim' / 'ml' / 'annotations' path_source_annotation.mkdir(parents = True, exist_ok = True) path_destination_annotation.mkdir(parents = True, exist_ok = True) - project_prefix = 'corporate_data_extraction_projects' - s3_usage = False + project_prefix = str(path_folder_temporary) with (patch('train_on_pdf.source_annotation', str(path_source_annotation)), patch('train_on_pdf.destination_annotation', str(path_destination_annotation)), - patch('train_on_pdf.project_prefix', project_prefix), - patch('train_on_pdf.s3_usage', s3_usage), - patch('train_on_pdf.s3c_main', Mock(spec = s3_communication.S3Communication)) as mock_s3c_main, - patch('train_on_pdf.s3c_interim', Mock(spec = s3_communication.S3Communication)) as mock_s3c_interim): - mock_s3c_main.download_files_in_prefix_to_dir.side_effect = mock_s3_download_single_file - mock_s3c_interim.upload_files_in_dir_to_prefix.side_effect = mock_s3_upload_files + patch('train_on_pdf.project_prefix', project_prefix)): yield # cleanup @@ -58,87 +33,99 @@ def prerequisites_convert_xls_to_csv(path_folder_temporary: Path): shutil.rmtree(path) -def test_convert_xls_to_csv_download_s3(prerequisites_convert_xls_to_csv): +def test_convert_xls_to_csv_download_s3(): """Tests the function convert_xls_to_csv for successfully downloading files from a S3 bucket. All required variables/functions/methods are mocked by the - prerequisites_convert_xls_to_csv fixture""" - # set the name of the project - project_name = 'TEST' + prerequisites_convert_xls_to_csv fixture + Requesting prerequisites_convert_xls_to_csv automatically (autouse) + """ - # perform the convert_xls_to_csv call - convert_xls_to_csv(project_name) + s3_usage = True + mocked_s3c_main = Mock(spec = s3_communication.S3Communication) + mocked_s3c_main.download_files_in_prefix_to_dir.side_effect = lambda *args: create_single_xlsx_file(Path(args[1])) + mocked_s3c_interim = Mock(spec = s3_communication.S3Communication) - # assert that function has been called - train_on_pdf.s3c_main.download_files_in_prefix_to_dir.assert_called_once() - # assert that files exists in source_annotation folder + convert_xls_to_csv(s3_usage, mocked_s3c_main, mocked_s3c_interim) + + mocked_s3c_main.download_files_in_prefix_to_dir.assert_called_once() content_folder_source_annotation = list(Path(train_on_pdf.source_annotation).glob('*.xlsx')) assert len(content_folder_source_annotation) == 1 -def test_convert_xls_to_csv_upload_s3(prerequisites_convert_xls_to_csv): +def test_convert_xls_to_csv_upload_s3(): """Tests the function convert_xls_to_csv for successfully uploading - files to a S3 bucket""" - project_name = 'TEST' - train_on_pdf.s3_usage = True + files to a S3 bucket + Requesting prerequisites_convert_xls_to_csv automatically (autouse) + """ + s3_usage = True + mocked_s3c_main = Mock(spec = s3_communication.S3Communication) + mocked_s3c_main.download_files_in_prefix_to_dir.side_effect = lambda *args: create_single_xlsx_file(Path(args[1])) + mocked_s3c_interim = Mock(spec = s3_communication.S3Communication) + mocked_s3c_interim.upload_files_in_dir_to_prefix.side_effect = lambda *args: create_multiple_xlsx_files(Path(args[1])) - # perform the convert_xls_to_csv call - convert_xls_to_csv(project_name) + convert_xls_to_csv(s3_usage, mocked_s3c_main, mocked_s3c_interim) - # assert upload function has been called - train_on_pdf.s3c_interim.upload_files_in_dir_to_prefix.assert_called_once() + mocked_s3c_interim.upload_files_in_dir_to_prefix.assert_called_once() -def test_convert_xls_to_csv_value_error_multiple_xls(prerequisites_convert_xls_to_csv): +def test_convert_xls_to_csv_value_error_multiple_xls(): """Test the function convert_xls_to_csv for raising ValueError if more than one - xlsx files exist""" - project_name = 'TEST' - - # mock the function download_files_in_prefix_to_dir of the S3_Connection object - train_on_pdf.s3c_main.download_files_in_prefix_to_dir.side_effect = mock_s3_download_multiple_files + xlsx files exist + Requesting prerequisites_convert_xls_to_csv automatically (autouse) + """ + s3_usage = True + mocked_s3c_main = Mock(spec = s3_communication.S3Communication) + # create more than one file executing mocked_s3c_main + mocked_s3c_main.download_files_in_prefix_to_dir.side_effect = lambda *args: create_multiple_xlsx_files(Path(args[1])) + mocked_s3c_interim = Mock(spec = s3_communication.S3Communication) - # perform the convert_xls_to_csv call and check for ValueError with pytest.raises(ValueError, match = 'More than one excel sheet found'): - convert_xls_to_csv(project_name) - # assert that function has been called - train_on_pdf.s3c_main.download_files_in_prefix_to_dir.assert_called_once() + convert_xls_to_csv(s3_usage, mocked_s3c_main, mocked_s3c_interim) + mocked_s3c_main.download_files_in_prefix_to_dir.assert_called_once() -def test_convert_xls_to_csv_value_error_no_annotation_xls(prerequisites_convert_xls_to_csv): - """Test the function convert_xls_to_csv for raising ValueError if no annotation xlsx files - exist""" - project_name = 'TEST' - # mock the function download_files_in_prefix_to_dir of the S3_Connection object - train_on_pdf.s3c_main.download_files_in_prefix_to_dir.side_effect = lambda *args: None +def test_convert_xls_to_csv_value_error_no_annotation_xls(): + """Test the function convert_xls_to_csv for raising ValueError if no annotation xlsx files + exist + Requesting prerequisites_convert_xls_to_csv automatically (autouse) + """ + s3_usage = True + mocked_s3c_main = Mock(spec = s3_communication.S3Communication) + # do not create any file + mocked_s3c_main.download_files_in_prefix_to_dir.side_effect = lambda *args: None + mocked_s3c_interim = Mock(spec = s3_communication.S3Communication) - # perform the convert_xls_to_csv call and check for ValueError with pytest.raises(ValueError, match = 'No annotation excel sheet found'): - convert_xls_to_csv(project_name) - # assert that function has been called - train_on_pdf.s3c_main.download_files_in_prefix_to_dir.assert_called_once() + convert_xls_to_csv(s3_usage, mocked_s3c_main, mocked_s3c_interim) + + mocked_s3c_main.download_files_in_prefix_to_dir.assert_called_once() -def test_convert_xls_to_csv_s3_usage(prerequisites_convert_xls_to_csv): - """Tests the function convert_xls_to_csv for actively using an S3 bucket""" - project_name = 'TEST' - train_on_pdf.s3_usage = True +def test_convert_xls_to_csv_s3_usage(): + """Tests the function convert_xls_to_csv for actively using an S3 bucket + Requesting prerequisites_convert_xls_to_csv automatically (autouse) + """ + s3_usage = True + mocked_s3c_main = Mock(spec = s3_communication.S3Communication) + mocked_s3c_main.download_files_in_prefix_to_dir.side_effect = lambda *args: create_single_xlsx_file(Path(args[1])) + mocked_s3c_interim = Mock(spec = s3_communication.S3Communication) + mocked_s3c_interim.upload_files_in_dir_to_prefix.side_effect = lambda *args: create_multiple_xlsx_files(Path(args[1])) - # perform the convert_xls_to_csv call - convert_xls_to_csv(project_name) + convert_xls_to_csv(s3_usage, mocked_s3c_main, mocked_s3c_interim) - # assert that s3_usage is True and upload_files_in_dir_to_prefix has been called - assert train_on_pdf.s3_usage == True - train_on_pdf.s3c_interim.upload_files_in_dir_to_prefix.assert_called_once() + mocked_s3c_interim.upload_files_in_dir_to_prefix.assert_called_once() -def test_convert_xls_to_csv_no_s3_usage(prerequisites_convert_xls_to_csv): - """Tests the function convert_xls_to_csv for not using an S3 bucket""" - project_name = 'TEST' +def test_convert_xls_to_csv_no_s3_usage(): + """Tests the function convert_xls_to_csv for not using an S3 bucket + Requesting prerequisites_convert_xls_to_csv automatically (autouse) + """ s3_usage = False + mocked_s3c_main = Mock(spec = s3_communication.S3Communication) + mocked_s3c_interim = Mock(spec = s3_communication.S3Communication) - # perform the convert_xls_to_csv call - convert_xls_to_csv(project_name) + with pytest.raises(ValueError, match = 'No annotation excel sheet found'): + convert_xls_to_csv(s3_usage, mocked_s3c_main, mocked_s3c_interim) - # assert that s3_usage is True and upload_files_in_dir_to_prefix has been called - assert train_on_pdf.s3_usage == False - train_on_pdf.s3c_interim.upload_files_in_dir_to_prefix.assert_not_called() + mocked_s3c_interim.upload_files_in_dir_to_prefix.assert_not_called() diff --git a/data_extractor/code/tests/test_utils/test_copy_file_without_overwrite.py b/data_extractor/code/tests/test_utils/test_copy_file_without_overwrite.py index 70718b3..8e14c13 100644 --- a/data_extractor/code/tests/test_utils/test_copy_file_without_overwrite.py +++ b/data_extractor/code/tests/test_utils/test_copy_file_without_overwrite.py @@ -4,61 +4,56 @@ import pytest -@pytest.fixture -def path_folders_required_copy_file(path_folder_temporary: Path) -> tuple[Path, Path]: +@pytest.fixture(autouse=True) +def prerequisites_copy_file_without_overwrite(path_folder_temporary: Path) -> None: """Defines a fixture for creating the source and destination folder - :param path_folder_temporary: Requesting the temporary folder fixture + :param path_folder_temporary: Requesting the path_folder_temporary fixture :type path_folder_temporary: Path - :return: Tuple containing the paths to the source and destination folders - :rtype: tuple[Path, Path] - :yield: Tuple containing the paths to the source and destination folders - :rtype: Iterator[tuple[Path, Path]] + :rtype: None """ path_folder_source = path_folder_temporary / 'source' path_folder_destination = path_folder_temporary / 'destination' path_folder_source.mkdir(parents = True) path_folder_destination.mkdir(parents = True) - yield (path_folder_source, path_folder_destination) + yield # cleanup for path in path_folder_temporary.glob("*"): shutil.rmtree(path) -def test_copy_file_without_overwrite_result(path_folders_required_copy_file: tuple[Path, Path]): +def test_copy_file_without_overwrite_result(path_folder_temporary: Path): """Tests if copy_file_without_overwrite returns True if executed - - :param path_folders_required_copy_file: Tuple containing the paths to the source and destination folders - :type path_folders_required_copy_file: tuple[Path, Path] + Requesting prerequisites_copy_file_without_overwrite automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path """ - - # create test file in source folder - path_folder_source, path_folder_destination = path_folders_required_copy_file + path_folder_source = path_folder_temporary / 'source' + path_folder_destination = path_folder_temporary / 'destination' path_folder_source_file = path_folder_source / 'test.txt' path_folder_source_file.touch() - # execute copy_file_without_overwrite result = copy_file_without_overwrite(str(path_folder_source), str(path_folder_destination)) assert result == True -def test_copy_file_without_overwrite_file_not_exists(path_folders_required_copy_file: tuple[Path, Path]): +def test_copy_file_without_overwrite_file_not_exists(path_folder_temporary: Path): """Tests that copy_file_without_overwrite copies the files from the source to the destination folder if they do no exist in the destination folder - - :param path_folders_required_copy_file: Tuple containing the paths to the source and destination folders - :type path_folders_required_copy_file: tuple[Path, Path] + Requesting prerequisites_copy_file_without_overwrite automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path """ - # create test file in source folder - path_folder_source, path_folder_destination = path_folders_required_copy_file + path_folder_source = path_folder_temporary / 'source' + path_folder_destination = path_folder_temporary / 'destination' path_folder_source_file = path_folder_source / 'test.txt' path_folder_source_file.touch() - # create test file path for destination folder path_folder_destination_file = path_folder_destination / 'test.txt' assert not path_folder_destination_file.exists() - # execute copy_file_without_overwrite copy_file_without_overwrite(str(path_folder_source), str(path_folder_destination)) assert path_folder_destination_file.exists() diff --git a/data_extractor/code/tests/test_utils/test_create_directory.py b/data_extractor/code/tests/test_utils/test_create_directory.py index d425c71..e2e53c9 100644 --- a/data_extractor/code/tests/test_utils/test_create_directory.py +++ b/data_extractor/code/tests/test_utils/test_create_directory.py @@ -6,26 +6,24 @@ def test_create_directory(path_folder_temporary: Path): """Tests of create_directory creates a folder - :param path_folder_temporary: Requesting the temporary folder fixture + :param path_folder_temporary: Requesting the path_folder_temporary fixture :type path_folder_temporary: Path """ - # call create_folder and check if the folder exists create_directory(str(path_folder_temporary)) + assert path_folder_temporary.exists() def test_create_directory_cleanup(path_folder_temporary: Path): """Tests of create_directory performs a clean-up if folder exists - :param path_folder_temporary: Requesting the temporary folder fixture + :param path_folder_temporary: Requesting the path_folder_temporary fixture :type path_folder_temporary: Path """ - # create folder with files path_folder_temporary.mkdir(exist_ok = True) for i in range(10): path_current_test_file = path_folder_temporary / f'test_{i}.txt' path_current_test_file.touch() - # call create_directory and check for empty folder create_directory(str(path_folder_temporary)) assert not any(path_folder_temporary.iterdir()) \ No newline at end of file diff --git a/data_extractor/code/tests/test_utils/test_generate_text.py b/data_extractor/code/tests/test_utils/test_generate_text.py index c49fc05..c998645 100644 --- a/data_extractor/code/tests/test_utils/test_generate_text.py +++ b/data_extractor/code/tests/test_utils/test_generate_text.py @@ -2,18 +2,24 @@ from train_on_pdf import generate_text_3434 from tests.utils_test import write_to_file import shutil -from unittest.mock import patch +from unittest.mock import patch, Mock, call +import s3_communication +import train_on_pdf +import pytest +# types +import typing +from _pytest.capture import CaptureFixture -def test_generate_text(path_folder_temporary: Path): - """Tests the generate_text_3434 which takes files from the folder relevance, - reads them in and puts the content into the file text_3434.csv. Note that - the header of text_3434.csv is taken from the first file read in - :param path_folder_temporary: Requesting the temporary folder fixture +@pytest.fixture(autouse=True) +def prerequisites_generate_text(path_folder_temporary: Path) -> None: + """Defines a fixture for mocking all required paths and creating required temporary folders + + :param path_folder_temporary: Requesting the path_folder_temporary fixture :type path_folder_temporary: Path + :rtype: None """ - project_name = 'test' path_folder_relevance = path_folder_temporary / 'relevance' path_folder_text_3434 = path_folder_temporary / 'folder_test_3434' path_folder_relevance.mkdir(parents = True) @@ -21,15 +27,82 @@ def test_generate_text(path_folder_temporary: Path): # create multiple files in the folder_relevance with the same header for i in range(5): - path_current_file = path_folder_relevance / f'test_{i}.csv' + path_current_file = path_folder_relevance / f'{i}_test.csv' path_current_file.touch() write_to_file(path_current_file, f'That is a test {i}', 'HEADER') - - # mock the global variables required for generate_text_3434 and execute the function + with (patch('train_on_pdf.folder_relevance', str(path_folder_relevance)), - patch('train_on_pdf.folder_text_3434', str(path_folder_text_3434))): - generate_text_3434(project_name) + patch('train_on_pdf.folder_text_3434', str(path_folder_text_3434)), + patch('train_on_pdf.os.getenv', lambda *args: args[0])): + yield + + # cleanup + for path in path_folder_temporary.glob("*"): + shutil.rmtree(path) + + +def test_generate_text_with_s3(path_folder_temporary: Path): + """Tests if the s3 connection objects are created and their methods are called + Requesting prerequisites_generate_text automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + """ + # get the path to the temporary folder + path_folder_text_3434 = path_folder_temporary / 'folder_test_3434' + project_name = 'test' + + mocked_s3_settings = { + 'prefix': 'test_prefix', + 'main_bucket': { + 's3_endpoint': 'S3_END_MAIN', + 's3_access_key': 'S3_ACCESS_MAIN', + 's3_secret_key': 'S3_SECRET_MAIN', + 's3_bucket_name': 'S3_NAME_MAIN' + }, + 'interim_bucket': { + 's3_endpoint': 'S3_END_INTERIM', + 's3_access_key': 'S3_ACCESS_INTERIM', + 's3_secret_key': 'S3_SECRET_INTERIM', + 's3_bucket_name': 'S3_NAME_INTERIM' + } + } + + with (patch('train_on_pdf.S3Communication', Mock(spec=s3_communication.S3Communication)) as mocked_s3): + generate_text_3434(project_name, True, mocked_s3_settings) + + # check for calls + mocked_s3.assert_any_call(s3_endpoint_url='S3_END_MAIN', + aws_access_key_id='S3_ACCESS_MAIN', + aws_secret_access_key='S3_SECRET_MAIN', + s3_bucket='S3_NAME_MAIN') + mocked_s3.assert_any_call(s3_endpoint_url='S3_END_INTERIM', + aws_access_key_id='S3_ACCESS_INTERIM', + aws_secret_access_key='S3_SECRET_INTERIM', + s3_bucket='S3_NAME_INTERIM') + call_list = [call[0] for call in mocked_s3.mock_calls] + assert any([call for call in call_list if 'download_files_in_prefix_to_dir' in call]) + assert any([call for call in call_list if 'upload_file_to_s3' in call]) + + +def test_generate_text_no_s3(path_folder_temporary: Path): + """Tests if files are taken from the folder relevance, + then read in and putting the content into the file text_3434.csv. Note that + the header of text_3434.csv is taken from the first file read in + Requesting prerequisites_generate_text automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + """ + # get the path to the temporary folder + path_folder_text_3434 = path_folder_temporary / 'folder_test_3434' + project_name = 'test' + s3_usage = False + project_settings = None + + generate_text_3434(project_name, s3_usage, project_settings) + # ensure that the header and the content form the first file is written to # the file text_3434.csv in folder relevance and the the content of the other # files in folder relevance is appended without the header @@ -39,13 +112,76 @@ def test_generate_text(path_folder_temporary: Path): assert path_file_text_3434_csv.exists() # check if header and content of files exist + strings_expected = [ + f'That is a test {line_number}' for line_number in range(5) + ] + with open(str(path_file_text_3434_csv), 'r') as file_text_3434: for line_number, line_content in enumerate(file_text_3434, start = -1): if line_number == -1: assert line_content.rstrip() == 'HEADER' else: - assert line_content.rstrip() == f'That is a test {line_number}' - - # cleanup - for path in path_folder_temporary.glob("*"): - shutil.rmtree(path) \ No newline at end of file + assert line_content.rstrip() in strings_expected + + +def test_generate_text_successful(path_folder_temporary: Path): + """Tests if the function returns true + Requesting prerequisites_generate_text automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + """ + project_name = 'test' + s3_usage = False + project_settings = None + + return_value = generate_text_3434(project_name, s3_usage, project_settings) + assert return_value == True + + +def test_generate_text_not_successful_empty_folder(path_folder_temporary: Path, + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests if the function returns false + Requesting prerequisites_generate_text automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + :param capsys: Requesting default fixture for capturing cmd output + :type capsys: typing.Generator[CaptureFixture[str], None, None]) + """ + project_name = 'test' + s3_usage = False + project_settings = None + + # clear the relevance folder + path_folder_relevance = path_folder_temporary / 'relevance' + [file.unlink() for file in path_folder_relevance.glob("*") if file.is_file()] + + # call the function + return_value = generate_text_3434(project_name, s3_usage, project_settings) + + output_cmd, _ = capsys.readouterr() + assert 'No relevance inference results found.' in output_cmd + assert return_value == False + + +def test_generate_text_not_successful_exception(path_folder_temporary: Path): + """Tests if the function returns false + Requesting prerequisites_generate_text automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + """ + project_name = 'test' + s3_usage = False + project_settings = None + + # clear the relevance folder + path_folder_relevance = path_folder_temporary / 'relevance' + [file.unlink() for file in path_folder_relevance.glob("*") if file.is_file()] + + # patch glob.iglob to force an exception... + with patch('train_on_pdf.glob.iglob', side_effect=lambda *args: [None]): + return_value = generate_text_3434(project_name, s3_usage, project_settings) + + assert return_value == False diff --git a/data_extractor/code/tests/test_utils/test_link_files.py b/data_extractor/code/tests/test_utils/test_link_files.py index aa32521..72b4944 100644 --- a/data_extractor/code/tests/test_utils/test_link_files.py +++ b/data_extractor/code/tests/test_utils/test_link_files.py @@ -4,16 +4,13 @@ import pytest -@pytest.fixture -def path_folders_required_linking(path_folder_temporary: Path) -> tuple[Path, Path, Path]: +@pytest.fixture(autouse=True) +def path_folders_required_linking(path_folder_temporary: Path) -> None: """Defines a fixture for creating the source, source_pdf and destination folder - :param path_folder_temporary: Requesting the temporary folder fixture + :param path_folder_temporary: Requesting the path_folder_temporary fixture :type path_folder_temporary: Path - :return: Tuple containing the paths to the source, source_pdf and destination folders - :rtype: tuple[Path, Path, Path] - :yield: Tuple containing the paths to the source, source_pdf and destination folders - :rtype: Iterator[tuple[Path, Path, Path]] + :return: None """ path_folder_source = path_folder_temporary / 'source' path_folder_source_pdf = path_folder_temporary / 'source_pdf' @@ -21,44 +18,46 @@ def path_folders_required_linking(path_folder_temporary: Path) -> tuple[Path, Pa path_folder_source.mkdir(parents = True) path_folder_source_pdf.mkdir(parents = True) path_folder_destination.mkdir(parents = True) - yield (path_folder_source, path_folder_source_pdf, path_folder_destination) + yield # cleanup for path in path_folder_temporary.glob("*"): shutil.rmtree(path) -def test_link_files(path_folders_required_linking: tuple[Path, Path, Path]): - """Tests if link_files creates proper hard links - :param path_folders_required_linking: Tuple containing the paths to the source - source_pdf and destination folders - :type path_folders_required_linking: tuple[Path, Path, Path] +def test_link_files(path_folder_temporary: Path): + """Tests if link_files creates proper hard links + Requesting path_folders_required_linking automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path """ - path_folder_source, _, path_folder_destination = path_folders_required_linking + path_folder_source = path_folder_temporary / 'source' + path_folder_source_pdf = path_folder_temporary / 'source_pdf' + path_folder_destination = path_folder_temporary / 'destination' - # create sample files for i in range(10): path_current_file = path_folder_source / f'test_{i}.txt' path_current_file.touch() - # perform the linking link_files(str(path_folder_source), str(path_folder_destination)) - # check for hard links for i in range(10): path_current_file = path_folder_source / f'test_{i}.txt' assert path_current_file.stat().st_nlink == 2 -def test_link_extracted_files_result(path_folders_required_linking: tuple[Path, Path, Path]): +def test_link_extracted_files_result(path_folder_temporary: Path): """Tests if link_extracted_files returns True if executed - - :param path_folders_required_linking: Tuple containing the paths to the source - source_pdf and destination folders - :type path_folders_required_linking: tuple[Path, Path, Path] + Requesting path_folders_required_linking automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path """ - path_folder_source, path_folder_source_pdf, path_folder_destination = path_folders_required_linking - # single pdf and json file + path_folder_source = path_folder_temporary / 'source' + path_folder_source_pdf = path_folder_temporary / 'source_pdf' + path_folder_destination = path_folder_temporary / 'destination' + path_folder_source_file_pdf = path_folder_source / f'test.pdf' path_folder_source_file_json = path_folder_source / f'test.json' path_source_file_pdf = path_folder_source_pdf / f'test.pdf' @@ -68,17 +67,18 @@ def test_link_extracted_files_result(path_folders_required_linking: tuple[Path, assert result == True -def test_link_extracted_files_copy(path_folders_required_linking: tuple[Path, Path, Path]): +def test_link_extracted_files_copy(path_folder_temporary: Path): """Tests if the extracted json files in folder_source has a regarding pdf in the folder_source_pdf and if so, copy the json file to the folder_destination - - :param path_folders_required_linking: Tuple containing the paths to the source - source_pdf and destination folders - :type path_folders_required_linking: tuple[Path, Path, Path] + Requesting path_folders_required_linking automatically (autouse) + + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path """ - path_folder_source, path_folder_source_pdf, path_folder_destination = path_folders_required_linking + path_folder_source = path_folder_temporary / 'source' + path_folder_source_pdf = path_folder_temporary / 'source_pdf' + path_folder_destination = path_folder_temporary / 'destination' - # create test pdf and json files in the source_extraction and source_pdf folders for i in range(10): path_current_file = path_folder_source / f'test_{i}.pdf' path_current_file.touch() @@ -87,16 +87,13 @@ def test_link_extracted_files_copy(path_folders_required_linking: tuple[Path, Pa path_current_file = path_folder_source_pdf / f'test_{i}.pdf' path_current_file.touch() - # check if no files exist in the destination_extraction folder for i in range(10): path_current_file = path_folder_destination / f'test_{i}.json' assert not path_current_file.exists() == True - # perform extracted file linking link_extracted_files(str(path_folder_source), str(path_folder_source_pdf), str(path_folder_destination)) - # check if files exist in the destination_extraction folder for i in range(10): path_current_file = path_folder_destination / f'test_{i}.json' assert path_current_file.exists() == True diff --git a/data_extractor/code/tests/test_utils/test_run_router.py b/data_extractor/code/tests/test_utils/test_run_router.py new file mode 100644 index 0000000..5bbf85e --- /dev/null +++ b/data_extractor/code/tests/test_utils/test_run_router.py @@ -0,0 +1,303 @@ +from pathlib import Path +from train_on_pdf import run_router +import pytest +from unittest.mock import patch, Mock +import shutil +import train_on_pdf +import requests +import requests_mock +from tests.test_utils.test_convert_xls_to_csv import prerequisites_convert_xls_to_csv +from tests.test_utils.test_generate_text import prerequisites_generate_text + +# types +import typing +from _pytest.capture import CaptureFixture + + +@pytest.fixture +def prerequisites_run_router(prerequisites_convert_xls_to_csv, + prerequisites_generate_text + ) -> requests_mock.mocker.Mocker: + """Prerequisites for running the function run_router + + :param prerequisites_convert_xls_to_csv: Requesting fixture for running function convert_xls_to_csv (required in + run_router) + :param prerequisites_generate_text: Requesting fixture for running function generate_text (required in + run_router) + :rtype: requests_mock.mocker.Mocker + """ + mocked_project_settings = { + 'train_relevance': {'train': False}, + 'train_kpi': {'train': False}, + 's3_usage': None, + 's3_settings': None + } + extraction_ip = '0.0.0.0' + extraction_port = '8000' + inference_ip = '0.0.0.1' + inference_port = '8000' + + with (requests_mock.Mocker() as mocked_server, + patch('train_on_pdf.convert_xls_to_csv', Mock()), + patch('train_on_pdf.project_settings', mocked_project_settings)): + mocked_server.get(f'http://{extraction_ip}:{extraction_port}/liveness', status_code=200) + mocked_server.get(f'http://{extraction_ip}:{extraction_port}/extract', status_code=200) + mocked_server.get(f'http://{extraction_ip}:{extraction_port}/curate', status_code=200) + mocked_server.get(f'http://{inference_ip}:{inference_port}/liveness', status_code=200) + mocked_server.get(f'http://{inference_ip}:{inference_port}/train_relevance', status_code=200) + mocked_server.get(f'http://{inference_ip}:{inference_port}/infer_relevance', status_code=200) + mocked_server.get(f'http://{inference_ip}:{inference_port}/train_kpi', status_code=200) + yield mocked_server + + +@pytest.mark.parametrize('status_code, cmd_output_expected, return_value_expected', + [ + (200, 'Extraction server is up. Proceeding to extraction.', True), + (-1, 'Extraction server is not responding.', False) + ]) +def test_run_router_extraction_liveness_up(prerequisites_run_router: requests_mock.mocker.Mocker, + status_code: int, + cmd_output_expected: str, + return_value_expected: bool, + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests the liveness of the extraction server + + :param prerequisites_run_router: Requesting the prerequisites_run_router fixture + :type prerequisites_run_router: requests_mock.mocker.Mocker + :param status_code: Status code used in extraction server + :type status_code: int + :param cmd_output_expected: Expeceted cmd output + :type cmd_output_expected: str + :param return_value_expected: Expected return_value + :type return_value_expected: bool + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None]) + """ + extraction_ip = '0.0.0.0' + extraction_port = '8000' + inference_port = '8000' + project_name = 'TEST' + mocked_server = prerequisites_run_router + + mocked_server.get(f'http://{extraction_ip}:{extraction_port}/liveness', status_code=status_code) + return_value = run_router(extraction_port, inference_port, project_name) + + cmd_output, _ = capsys.readouterr() + assert cmd_output_expected in cmd_output + assert return_value == return_value_expected + + +def test_run_router_extraction_server_down(prerequisites_run_router: requests_mock.mocker.Mocker): + """Tests the return value if the extraction server is down + + :param prerequisites_run_router: Requesting the prerequisites_run_router fixture + :type prerequisites_run_router: requests_mock.mocker.Mocker + """ + extraction_ip = '0.0.0.0' + extraction_port = '8000' + inference_port = '8000' + project_name = 'TEST' + mocked_server = prerequisites_run_router + + mocked_server.get(f'http://{extraction_ip}:{extraction_port}/extract', status_code=-1) + return_value = run_router(extraction_port, inference_port, project_name) + + assert return_value is False + + +def test_run_router_extraction_curation_server_down(prerequisites_run_router: requests_mock.mocker.Mocker): + """Tests the return value of the curation of the extraction server + + :param prerequisites_run_router: Requesting the prerequisites_run_router fixture + :type prerequisites_run_router: requests_mock.mocker.Mocker + """ + extraction_ip = '0.0.0.0' + extraction_port = '8000' + inference_port = '8000' + project_name = 'TEST' + mocked_server = prerequisites_run_router + + mocked_server.get(f'http://{extraction_ip}:{extraction_port}/curate', status_code=-1) + return_value = run_router(extraction_port, inference_port, project_name) + + assert return_value is False + + +@pytest.mark.parametrize('status_code, cmd_output_expected, return_value_expected', + [ + (200, 'Inference server is up. Proceeding to Inference.', True), + (-1, 'Inference server is not responding.', False) + ]) +def test_run_router_inference_liveness(prerequisites_run_router: requests_mock.mocker.Mocker, + status_code: int, + cmd_output_expected: str, + return_value_expected: bool, + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests the liveness of the inference server, up as well as down + + :param prerequisites_run_router: Requesting the prerequisites_run_router fixture + :type prerequisites_run_router: requests_mock.mocker.Mocker + :param status_code: Status code for liveness + :type status_code: int + :param cmd_output_expected: Expected cmd output + :type cmd_output_expected: str + :param return_value_expected: Expected return_value + :type return_value_expected: bool + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None] + """ + extraction_ip = '0.0.0.0' + extraction_port = '8000' + inference_ip = '0.0.0.1' + inference_port = '8000' + project_name = 'TEST' + mocked_server = prerequisites_run_router + + mocked_server.get(f'http://{inference_ip}:{inference_port}/liveness', status_code=status_code) + return_value = run_router(extraction_port, inference_port, project_name, infer_ip=inference_ip) + + cmd_output, _ = capsys.readouterr() + assert cmd_output_expected in cmd_output + assert return_value == return_value_expected + + +@pytest.mark.parametrize('train_relevance, status_code, cmd_output_expected, return_value_expected', + [ + (True, -1, "Relevance training will be started.", False), + (True, 200, "Relevance training will be started.", True), + (False, -1, ("No relevance training done. If you want to have a relevance training please " + "set variable train under train_relevance to true."), True) + ]) +def test_run_router_relevance_training(prerequisites_run_router: requests_mock.mocker.Mocker, + train_relevance: bool, + status_code: int, + cmd_output_expected: str, + return_value_expected: bool, + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests if the relevance training fails and successfully starts + + :param prerequisites_run_router: Requesting the prerequisites_run_router fixture + :type prerequisites_run_router: requests_mock.mocker.Mocker + :param train_relevance: Flag for train relevance + :type train_relevance: bool + :param status_code: Status code for train relevance + :type status_code: int + :param cmd_output_expected: Expected cmd output + :type cmd_output_expected: str + :param return_value_expected: Expected return_value + :type return_value_expected: bool + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None] + """ + extraction_port = '8000' + inference_ip = '0.0.0.1' + inference_port = '8000' + project_name = 'TEST' + mocked_server = prerequisites_run_router + train_on_pdf.project_settings['train_relevance']['train'] = train_relevance + + mocked_server.get(f'http://{inference_ip}:{inference_port}/train_relevance', status_code=status_code) + return_value = run_router(extraction_port, inference_port, project_name, infer_ip=inference_ip) + + cmd_output, _ = capsys.readouterr() + assert cmd_output_expected in cmd_output + assert return_value == return_value_expected + + +@pytest.mark.parametrize('train_kpi, status_code_infer_relevance, project_name, status_code_train_kpi, cmd_output_expected, return_value_expected', + [ + (True, -1, "TEST", -1, "", False), + (True, 200, "TEST", -1, "text_3434 was generated without error", False), + (True, 200, "TEST", 200, "text_3434 was not generated without error", True), + (True, 200, None, -1, "Error while generating text_3434.", False), + (True, 200, None, 200, "Error while generating text_3434.", True), + (False, -1, None, -1, ("No kpi training done. If you want to have a kpi " + "training please set variable train under train_kpi to true."), True) + ]) +def test_run_router_kpi_training(prerequisites_run_router: requests_mock.mocker.Mocker, + train_kpi: bool, + status_code_infer_relevance: int, + project_name: typing.Union[str, None], + status_code_train_kpi: int, + cmd_output_expected: str, + return_value_expected: bool, + capsys: typing.Generator[CaptureFixture[str], None, None]): + """Tests if kpi training fails and successfully starts + + :param prerequisites_run_router: Requesting the prerequisites_run_router fixture + :type prerequisites_run_router: requests_mock.mocker.Mocker + :param train_kpi: Flag for train kpi + :type train_kpi: bool + :param status_code_infer_relevance: Status code for infer relevance + :type status_code_infer_relevance: int + :param project_name: Project name + :type project_name: typing.Union[str, None] + :param status_code_train_kpi: Status code for train kpi + :type status_code_train_kpi: int + :param cmd_output_expected: Expected cmd output + :type cmd_output_expected: str + :param return_value_expected: Expected return_value + :type return_value_expected: bool + :param capsys: Requesting the default fixture capsys for capturing cmd outputs + :type capsys: typing.Generator[CaptureFixture[str], None, None] + """ + extraction_ip = '0.0.0.0' + extraction_port = '8000' + inference_ip = '0.0.0.1' + inference_port = '8000' + mocked_server = prerequisites_run_router + train_on_pdf.project_settings['train_kpi']['train'] = train_kpi + + # force an exception of generate_text_3434 by removing the folder_text_3434 + if not project_name: + train_on_pdf.folder_text_3434 = None + + mocked_generate_text = Mock() + if project_name: + if status_code_train_kpi < 0: + mocked_generate_text.side_effect = lambda *args: True + else: + mocked_generate_text.side_effect = lambda *args: False + else: + mocked_generate_text.side_effect = Exception() + + with patch('train_on_pdf.generate_text_3434', mocked_generate_text): + mocked_server.get(f'http://{inference_ip}:{inference_port}/infer_relevance', status_code=status_code_infer_relevance) + mocked_server.get(f'http://{inference_ip}:{inference_port}/train_kpi', status_code=status_code_train_kpi) + return_value = run_router(extraction_port, inference_port, project_name, infer_ip=inference_ip) + + cmd_output, _ = capsys.readouterr() + assert cmd_output_expected in cmd_output + assert return_value == return_value_expected + + +@pytest.mark.parametrize('infer_relevance, train_kpi', + [ + (True, True), + (True, False), + (True, True), + (True, False), + ]) +def test_run_router_successful_run(prerequisites_run_router: requests_mock.mocker.Mocker, + infer_relevance: bool, + train_kpi: bool): + """Tests a successful run of run_router + + :param prerequisites_run_router: Requesting the prerequisites_run_router fixture + :type prerequisites_run_router: requests_mock.mocker.Mocker + :param infer_relevance: Flag for infer relevance + :type infer_relevance: bool + :type train_kpi: Flag for train kpi + :type train_kpi: bool + """ + extraction_ip = '0.0.0.0' + extraction_port = '8000' + inference_ip = '0.0.0.1' + inference_port = '8000' + project_name = 'TEST' + mocked_server = prerequisites_run_router + + with patch('train_on_pdf.generate_text_3434', Mock()): + return_value = run_router(extraction_port, inference_port, project_name, infer_ip=inference_ip) + + assert return_value == True diff --git a/data_extractor/code/tests/test_utils/test_running.py b/data_extractor/code/tests/test_utils/test_running.py index e577e5a..24b36a5 100644 --- a/data_extractor/code/tests/test_utils/test_running.py +++ b/data_extractor/code/tests/test_utils/test_running.py @@ -9,10 +9,8 @@ def prerequisite_running(path_folder_root_testing: Path): """Defines a fixture for the running_file path - :param path_folder_data_sample: _description_ - :type path_folder_data_sample: Path - :yield: Path for the running_file - :rtype: Path + :param path_folder_root_testing: Path for the testing folder + :type path_folder_root_testing: Path """ path_file_running = path_folder_root_testing / 'data' / 'running' # mock the path to the running file @@ -22,11 +20,7 @@ def prerequisite_running(path_folder_root_testing: Path): # cleanup path_file_running.unlink(missing_ok=True) - - # config_path.root_dir = Mock(side_effect=lambda *args: str(path_file_running)) - # with patch('train_on_pdf.config_path.root_dir'): - # yield def test_set_running(prerequisite_running, path_folder_root_testing: Path): """Tests the set_running function creating a running file @@ -86,4 +80,3 @@ def test_clear_running(prerequisite_running, path_folder_root_testing: Path): path_file_running.touch() clear_running() assert not path_file_running.exists() - diff --git a/data_extractor/code/tests/test_utils/test_save_train_info.py b/data_extractor/code/tests/test_utils/test_save_train_info.py index 17eb2a0..a3f5ec6 100644 --- a/data_extractor/code/tests/test_utils/test_save_train_info.py +++ b/data_extractor/code/tests/test_utils/test_save_train_info.py @@ -1,14 +1,127 @@ from pathlib import Path from train_on_pdf import save_train_info import pytest -from unittest.mock import patch +from unittest.mock import patch, Mock +import shutil +import train_on_pdf +import pickle -# def test_save_train_info(): -# source_mapping -# source_annotation -# project_settings -# project_model_dir -# pass +@pytest.fixture(autouse=True) +def prerequisites_save_train_info(path_folder_root_testing: Path, + path_folder_temporary: Path) -> Path: + """Defines a fixture for creating all prerequisites for save_train_info -# def \ No newline at end of file + :param path_folder_root_testing: Requesting the root testing folder fixture + :type path_folder_root_testing: Path + :param path_folder_temporary: Requesting the path_folder_temporary fixture + :type path_folder_temporary: Path + :return: Returns path to pickled save_train_info file + :rtype: Path + :yield: Returns path to pickled save_train_info file + :rtype: Iterator[Path] + """ + mocked_project_settings = { + 'train_relevance': { + 'output_model_name': 'TEST' + }, + 'train_kpi':{ + 'output_model_name': 'TEST' + }, + 's3_settings': { + 'prefix' : 'corporate_data_extraction_projects' + } + } + + path_source_pdf = path_folder_root_testing / 'input' / 'pdf' / 'training' + path_source_annotation = path_folder_root_testing / 'input' / 'pdfs' / 'training' + path_source_mapping = path_folder_root_testing / 'data' / 'TEST' / 'input' / 'kpi_mapping' + path_project_model_dir = path_folder_temporary / 'models' + path_project_model_dir.mkdir(parents=True, exist_ok=True) + + relevance_model = mocked_project_settings['train_relevance']['output_model_name'] + kpi_model = mocked_project_settings['train_kpi']['output_model_name'] + file_train_info = f'SUMMARY_REL_{relevance_model}_KPI_{kpi_model}.pickle' + path_train_info = path_project_model_dir / file_train_info + + with (patch('train_on_pdf.project_settings', mocked_project_settings), + patch('train_on_pdf.source_annotation', str(path_source_annotation)), + patch('train_on_pdf.source_mapping', str(path_source_mapping)), + patch('train_on_pdf.os.listdir', side_effect=lambda *args: 'test.pdf'), + patch('train_on_pdf.source_mapping', str(path_folder_temporary / 'source_mapping')), + patch('train_on_pdf.source_annotation', str(path_folder_temporary / 'source_annotation')), + patch('train_on_pdf.source_pdf', str(path_folder_temporary / 'source_pdf')), + patch('train_on_pdf.pd', Mock()) as mocked_pandas): + train_on_pdf.project_model_dir = str(path_project_model_dir) + mocked_pandas.read_csv.return_value = {None} + mocked_pandas.read_excel.return_value = {None} + yield path_train_info + + # cleanup + shutil.rmtree(path_folder_temporary) + del train_on_pdf.project_model_dir + + +def test_save_train_info_pickle(prerequisites_save_train_info: Path): + """Tests if the train info is pickle correctly + + :param prerequisites_save_train_info: Requesting the prerequisites_save_train_info fixture + :type prerequisites_save_train_info: Path + """ + project_name = 'TEST' + path_train_info = prerequisites_save_train_info + + save_train_info(project_name) + + # we have to combine a pathlib and a string object... + path_parent_train_info = path_train_info.parent + path_file_pickle = path_train_info.name + path_train_info = Path(str(path_parent_train_info) + f'/{path_file_pickle}') + + assert path_train_info.exists() + + +def test_save_train_info_entries(prerequisites_save_train_info: Path): + """Tests if all the train infos exists in the pickled train info file + + :param prerequisites_save_train_info: Requesting the prerequisites_save_train_info fixture + :type prerequisites_save_train_info: Path + """ + project_name = 'TEST' + path_train_info = prerequisites_save_train_info + + save_train_info(project_name) + + with open(str(path_train_info), 'rb') as file: + train_info = pickle.load(file) + + expected_keys = [ + 'project_name', + 'train_settings', + 'pdfs_used', + 'annotations', + 'kpis' + ] + # check that all keys exist in dict + assert all(key in expected_keys for key in train_info.keys()) + + +def test_save_tain_info_return_value(): + project_name = 'TEST' + + assert save_train_info(project_name) is None + + +def test_save_train_info_s3_usage(): + """Tests if the s3_usage flag correctly works + + """ + project_name = 'TEST' + s3_usage = True + mocked_s3 = Mock() + + save_train_info(project_name, s3_usage, mocked_s3) + + assert mocked_s3.download_files_in_prefix_to_dir.call_count == 3 + assert mocked_s3.upload_file_to_s3.called_once() + diff --git a/data_extractor/code/tests/utils_test.py b/data_extractor/code/tests/utils_test.py index 085be89..4da7b9c 100644 --- a/data_extractor/code/tests/utils_test.py +++ b/data_extractor/code/tests/utils_test.py @@ -1,5 +1,6 @@ from pathlib import Path import pandas as pd +import typing def project_tests_root() -> Path: @@ -52,4 +53,20 @@ def create_multiple_xlsx_files(path_folder: Path) -> None: :type path_folder: Path """ for i in range(5): - create_single_xlsx_file(path_folder, file_name = f'xlsx_file_{i}.xlsx') \ No newline at end of file + create_single_xlsx_file(path_folder, file_name = f'xlsx_file_{i}.xlsx') + +def modify_project_settings(project_settings: typing.Dict, + *args: typing.Tuple[str, str, bool]) -> typing.Dict: + """Returns are modified project settings dict based on the input args + + :param project_settings: Project settings + :type project_settings: typing.Dict + :return: Modified project settings + :rtype: typing.Dict + """ + for modifier in args: + # check for valid args + if len(modifier) == 3: + key1, key2, decision = modifier + project_settings[key1][key2] = decision + return project_settings