Skip to content

Commit

Permalink
Add endpoint to get xml
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Jul 4, 2024
1 parent fea5c60 commit 9cca43c
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/PdfImages.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def remove_images():
shutil.rmtree(IMAGES_ROOT_PATH)

@staticmethod
def from_pdf_path(pdf_path: str | Path, pdf_name: str = "", xml_name: str = ""):
xml_path = Path(join(XMLS_PATH, xml_name)) if xml_name else None
def from_pdf_path(pdf_path: str | Path, pdf_name: str = "", xml_file_name: str = ""):
xml_path = Path(join(XMLS_PATH, xml_file_name)) if xml_file_name else None

if xml_path and not xml_path.parent.exists():
os.makedirs(xml_path.parent, exist_ok=True)
Expand Down
4 changes: 2 additions & 2 deletions src/analyze_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def predict_doclaynet():
VGTTrainer.test(configuration, model)


def analyze_pdf(file: AnyStr, xml_name: str = "") -> list[dict]:
def analyze_pdf(file: AnyStr, xml_file_name: str = "") -> list[dict]:
pdf_path = pdf_content_to_pdf_path(file)
service_logger.info(f"Creating PDF images")
pdf_images_list: list[PdfImages] = [PdfImages.from_pdf_path(pdf_path, "", xml_name)]
pdf_images_list: list[PdfImages] = [PdfImages.from_pdf_path(pdf_path, "", xml_file_name)]
create_word_grid([pdf_images.pdf_features for pdf_images in pdf_images_list])
get_annotations(pdf_images_list)
predict_doclaynet()
Expand Down

0 comments on commit 9cca43c

Please sign in to comment.