From 971ae2b7cec2558be25c4f0a39c3f417b69f357a Mon Sep 17 00:00:00 2001 From: Gabo Date: Tue, 9 Jul 2024 11:03:35 +0200 Subject: [PATCH] Rearrange folders --- src/app.py | 4 ++-- src/{ => data_model}/PdfImages.py | 0 .../run_pdf_layout_analysis.py} | 12 ++++++------ .../run_pdf_layout_analysis_fast.py} | 2 +- src/run.py | 5 ----- src/{ => vgt}/create_word_grid.py | 0 src/{ => vgt}/get_json_annotations.py | 2 +- src/{ => vgt}/get_model_configuration.py | 0 src/{ => vgt}/get_most_probable_pdf_segments.py | 2 +- src/{ => vgt}/get_reading_orders.py | 2 +- 10 files changed, 12 insertions(+), 17 deletions(-) rename src/{ => data_model}/PdfImages.py (100%) rename src/{analyze_pdf.py => pdf_layout_analysis/run_pdf_layout_analysis.py} (85%) rename src/{analyze_pdf_fast.py => pdf_layout_analysis/run_pdf_layout_analysis_fast.py} (93%) delete mode 100644 src/run.py rename src/{ => vgt}/create_word_grid.py (100%) rename src/{ => vgt}/get_json_annotations.py (98%) rename src/{ => vgt}/get_model_configuration.py (100%) rename src/{ => vgt}/get_most_probable_pdf_segments.py (99%) rename src/{ => vgt}/get_reading_orders.py (99%) diff --git a/src/app.py b/src/app.py index f08b210..cd1d877 100755 --- a/src/app.py +++ b/src/app.py @@ -6,10 +6,10 @@ from fastapi import FastAPI, UploadFile, File from fastapi.responses import PlainTextResponse -from analyze_pdf_fast import analyze_pdf_fast from catch_exceptions import catch_exceptions from configuration import service_logger, XMLS_PATH -from src.analyze_pdf import analyze_pdf +from pdf_layout_analysis.run_pdf_layout_analysis import analyze_pdf +from pdf_layout_analysis.run_pdf_layout_analysis_fast import analyze_pdf_fast service_logger.info(f"Is PyTorch using GPU: {torch.cuda.is_available()}") diff --git a/src/PdfImages.py b/src/data_model/PdfImages.py similarity index 100% rename from src/PdfImages.py rename to src/data_model/PdfImages.py diff --git a/src/analyze_pdf.py b/src/pdf_layout_analysis/run_pdf_layout_analysis.py similarity index 85% rename from src/analyze_pdf.py rename to src/pdf_layout_analysis/run_pdf_layout_analysis.py index bc989e9..afbde7b 100644 --- a/src/analyze_pdf.py +++ b/src/pdf_layout_analysis/run_pdf_layout_analysis.py @@ -5,13 +5,13 @@ from typing import AnyStr from data_model.SegmentBox import SegmentBox from ditod.VGTTrainer import VGTTrainer -from get_json_annotations import get_annotations -from get_model_configuration import get_model_configuration -from get_most_probable_pdf_segments import get_most_probable_pdf_segments -from get_reading_orders import get_reading_orders -from src.PdfImages import PdfImages +from vgt.get_json_annotations import get_annotations +from vgt.get_model_configuration import get_model_configuration +from vgt.get_most_probable_pdf_segments import get_most_probable_pdf_segments +from vgt.get_reading_orders import get_reading_orders +from data_model.PdfImages import PdfImages from src.configuration import service_logger, JSON_TEST_FILE_PATH, IMAGES_ROOT_PATH -from src.create_word_grid import create_word_grid, remove_word_grids +from vgt.create_word_grid import create_word_grid, remove_word_grids from detectron2.checkpoint import DetectionCheckpointer from detectron2.data.datasets import register_coco_instances from detectron2.data import DatasetCatalog diff --git a/src/analyze_pdf_fast.py b/src/pdf_layout_analysis/run_pdf_layout_analysis_fast.py similarity index 93% rename from src/analyze_pdf_fast.py rename to src/pdf_layout_analysis/run_pdf_layout_analysis_fast.py index 5edd400..84897d5 100644 --- a/src/analyze_pdf_fast.py +++ b/src/pdf_layout_analysis/run_pdf_layout_analysis_fast.py @@ -4,10 +4,10 @@ from fast_trainer.ParagraphExtractorTrainer import ParagraphExtractorTrainer from fast_trainer.model_configuration import MODEL_CONFIGURATION as PARAGRAPH_EXTRACTION_CONFIGURATION from pdf_features.PdfFeatures import PdfFeatures +from pdf_layout_analysis.run_pdf_layout_analysis import pdf_content_to_pdf_path from pdf_tokens_type_trainer.TokenTypeTrainer import TokenTypeTrainer from pdf_tokens_type_trainer.ModelConfiguration import ModelConfiguration -from analyze_pdf import pdf_content_to_pdf_path from configuration import ROOT_PATH, service_logger from data_model.SegmentBox import SegmentBox diff --git a/src/run.py b/src/run.py deleted file mode 100644 index aa9e6dc..0000000 --- a/src/run.py +++ /dev/null @@ -1,5 +0,0 @@ -from configuration import ROOT_PATH - -if __name__ == "__main__": - with open(f"{ROOT_PATH}/test_pdfs/error.pdf", "rb") as stream: - files = {"file": stream} diff --git a/src/create_word_grid.py b/src/vgt/create_word_grid.py similarity index 100% rename from src/create_word_grid.py rename to src/vgt/create_word_grid.py diff --git a/src/get_json_annotations.py b/src/vgt/get_json_annotations.py similarity index 98% rename from src/get_json_annotations.py rename to src/vgt/get_json_annotations.py index bd8ec13..f0d81cf 100644 --- a/src/get_json_annotations.py +++ b/src/vgt/get_json_annotations.py @@ -1,7 +1,7 @@ import json from os import makedirs from pdf_features.PdfToken import PdfToken -from PdfImages import PdfImages +from data_model.PdfImages import PdfImages from configuration import DOCLAYNET_TYPE_BY_ID from configuration import JSONS_ROOT_PATH, JSON_TEST_FILE_PATH diff --git a/src/get_model_configuration.py b/src/vgt/get_model_configuration.py similarity index 100% rename from src/get_model_configuration.py rename to src/vgt/get_model_configuration.py diff --git a/src/get_most_probable_pdf_segments.py b/src/vgt/get_most_probable_pdf_segments.py similarity index 99% rename from src/get_most_probable_pdf_segments.py rename to src/vgt/get_most_probable_pdf_segments.py index f7602f6..f802b4f 100644 --- a/src/get_most_probable_pdf_segments.py +++ b/src/vgt/get_most_probable_pdf_segments.py @@ -9,7 +9,7 @@ from pdf_features.PdfToken import PdfToken from pdf_features.Rectangle import Rectangle from pdf_token_type_labels.TokenType import TokenType -from PdfImages import PdfImages +from data_model.PdfImages import PdfImages from configuration import ROOT_PATH, DOCLAYNET_TYPE_BY_ID from data_model.Prediction import Prediction diff --git a/src/get_reading_orders.py b/src/vgt/get_reading_orders.py similarity index 99% rename from src/get_reading_orders.py rename to src/vgt/get_reading_orders.py index 3c5863b..473a659 100644 --- a/src/get_reading_orders.py +++ b/src/vgt/get_reading_orders.py @@ -3,7 +3,7 @@ from pdf_features.PdfToken import PdfToken from pdf_token_type_labels.TokenType import TokenType -from PdfImages import PdfImages +from data_model.PdfImages import PdfImages def find_segment_for_token(token: PdfToken, segments: list[PdfSegment], tokens_by_segments):