Add endpoint to get xml

huridocs · Jul 4, 2024 · 9cca43c · 9cca43c
1 parent fea5c60
commit 9cca43c
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/src/PdfImages.py b/src/PdfImages.py
@@ -37,8 +37,8 @@ def remove_images():
  shutil.rmtree(IMAGES_ROOT_PATH)
 
  @staticmethod
- def from_pdf_path(pdf_path: str | Path, pdf_name: str = "", xml_name: str = ""):
- xml_path = Path(join(XMLS_PATH, xml_name)) if xml_name else None
+ def from_pdf_path(pdf_path: str | Path, pdf_name: str = "", xml_file_name: str = ""):
+ xml_path = Path(join(XMLS_PATH, xml_file_name)) if xml_file_name else None
 
  if xml_path and not xml_path.parent.exists():
  os.makedirs(xml_path.parent, exist_ok=True)

diff --git a/src/analyze_pdf.py b/src/analyze_pdf.py
@@ -49,10 +49,10 @@ def predict_doclaynet():
  VGTTrainer.test(configuration, model)
 
 
-def analyze_pdf(file: AnyStr, xml_name: str = "") -> list[dict]:
+def analyze_pdf(file: AnyStr, xml_file_name: str = "") -> list[dict]:
  pdf_path = pdf_content_to_pdf_path(file)
  service_logger.info(f"Creating PDF images")
- pdf_images_list: list[PdfImages] = [PdfImages.from_pdf_path(pdf_path, "", xml_name)]
+ pdf_images_list: list[PdfImages] = [PdfImages.from_pdf_path(pdf_path, "", xml_file_name)]
  create_word_grid([pdf_images.pdf_features for pdf_images in pdf_images_list])
  get_annotations(pdf_images_list)
  predict_doclaynet()