diff --git a/README.md b/README.md index df5c9a76b..aedea2d0e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
@@ -200,8 +200,8 @@ To see all available options (export formats etc.) run `docling --help`. ### RAG Check out the following examples showcasing RAG using Docling with standard LLM application frameworks: -- [Basic RAG pipeline with 🦙 LlamaIndex](https://github.com/DS4SD/docling/tree/main/examples/rag_llamaindex.ipynb) -- [Basic RAG pipeline with 🦜🔗 LangChain](https://github.com/DS4SD/docling/tree/main/examples/rag_langchain.ipynb) +- [Basic RAG pipeline with LlamaIndex 🦙](https://github.com/DS4SD/docling/tree/main/docs/examples/rag_llamaindex.ipynb) +- [Basic RAG pipeline with LangChain 🦜🔗](https://github.com/DS4SD/docling/tree/main/docs/examples/rag_langchain.ipynb) ## Advanced features diff --git a/logo.png b/docs/assets/logo.png similarity index 100% rename from logo.png rename to docs/assets/logo.png diff --git a/logo.svg b/docs/assets/logo.svg similarity index 100% rename from logo.svg rename to docs/assets/logo.svg diff --git a/docs/examples/batch_convert.py b/docs/examples/batch_convert.py new file mode 100644 index 000000000..2f3f38b17 --- /dev/null +++ b/docs/examples/batch_convert.py @@ -0,0 +1,105 @@ +import json +import logging +import time +from pathlib import Path +from typing import Iterable + +from docling.datamodel.base_models import ConversionStatus +from docling.datamodel.document import ConversionResult, DocumentConversionInput +from docling.document_converter import DocumentConverter + +_log = logging.getLogger(__name__) + + +def export_documents( + conv_results: Iterable[ConversionResult], + output_dir: Path, +): + output_dir.mkdir(parents=True, exist_ok=True) + + success_count = 0 + failure_count = 0 + partial_success_count = 0 + + for conv_res in conv_results: + if conv_res.status == ConversionStatus.SUCCESS: + success_count += 1 + doc_filename = conv_res.input.file.stem + + # Export Deep Search document JSON format: + with (output_dir / f"{doc_filename}.json").open( + "w", encoding="utf-8" + ) as fp: + fp.write(json.dumps(conv_res.render_as_dict())) + + # Export Text format: + with (output_dir / f"{doc_filename}.txt").open("w", encoding="utf-8") as fp: + fp.write(conv_res.render_as_text()) + + # Export Markdown format: + with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp: + fp.write(conv_res.render_as_markdown()) + + # Export Document Tags format: + with (output_dir / f"{doc_filename}.doctags").open( + "w", encoding="utf-8" + ) as fp: + fp.write(conv_res.render_as_doctags()) + + elif conv_res.status == ConversionStatus.PARTIAL_SUCCESS: + _log.info( + f"Document {conv_res.input.file} was partially converted with the following errors:" + ) + for item in conv_res.errors: + _log.info(f"\t{item.error_message}") + partial_success_count += 1 + else: + _log.info(f"Document {conv_res.input.file} failed to convert.") + failure_count += 1 + + _log.info( + f"Processed {success_count + partial_success_count + failure_count} docs, " + f"of which {failure_count} failed " + f"and {partial_success_count} were partially converted." + ) + return success_count, partial_success_count, failure_count + + +def main(): + logging.basicConfig(level=logging.INFO) + + input_doc_paths = [ + Path("./tests/data/2206.01062.pdf"), + Path("./tests/data/2203.01017v2.pdf"), + Path("./tests/data/2305.03393v1.pdf"), + Path("./tests/data/redp5110.pdf"), + Path("./tests/data/redp5695.pdf"), + ] + + # buf = BytesIO(Path("./test/data/2206.01062.pdf").open("rb").read()) + # docs = [DocumentStream(filename="my_doc.pdf", stream=buf)] + # input = DocumentConversionInput.from_streams(docs) + + doc_converter = DocumentConverter() + + input = DocumentConversionInput.from_paths(input_doc_paths) + + start_time = time.time() + + conv_results = doc_converter.convert(input) + success_count, partial_success_count, failure_count = export_documents( + conv_results, output_dir=Path("./scratch") + ) + + end_time = time.time() - start_time + + _log.info(f"All documents were converted in {end_time:.2f} seconds.") + + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + + +if __name__ == "__main__": + main() diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py new file mode 100644 index 000000000..e386bb3a2 --- /dev/null +++ b/docs/examples/custom_convert.py @@ -0,0 +1,175 @@ +import json +import logging +import time +from pathlib import Path +from typing import Iterable + +from docling.backend.docling_parse_backend import DoclingParseDocumentBackend +from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend +from docling.datamodel.base_models import ConversionStatus, PipelineOptions +from docling.datamodel.document import ConversionResult, DocumentConversionInput +from docling.datamodel.pipeline_options import ( + TesseractCliOcrOptions, + TesseractOcrOptions, +) +from docling.document_converter import DocumentConverter + +_log = logging.getLogger(__name__) + + +def export_documents( + conv_results: Iterable[ConversionResult], + output_dir: Path, +): + output_dir.mkdir(parents=True, exist_ok=True) + + success_count = 0 + failure_count = 0 + + for conv_res in conv_results: + if conv_res.status == ConversionStatus.SUCCESS: + success_count += 1 + doc_filename = conv_res.input.file.stem + + # Export Deep Search document JSON format: + with (output_dir / f"{doc_filename}.json").open( + "w", encoding="utf-8" + ) as fp: + fp.write(json.dumps(conv_res.render_as_dict())) + + # Export Text format: + with (output_dir / f"{doc_filename}.txt").open("w", encoding="utf-8") as fp: + fp.write(conv_res.render_as_text()) + + # Export Markdown format: + with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp: + fp.write(conv_res.render_as_markdown()) + + # Export Document Tags format: + with (output_dir / f"{doc_filename}.doctags").open( + "w", encoding="utf-8" + ) as fp: + fp.write(conv_res.render_as_doctags()) + + else: + _log.info(f"Document {conv_res.input.file} failed to convert.") + failure_count += 1 + + _log.info( + f"Processed {success_count + failure_count} docs, of which {failure_count} failed" + ) + + return success_count, failure_count + + +def main(): + logging.basicConfig(level=logging.INFO) + + input_doc_paths = [ + Path("./tests/data/2206.01062.pdf"), + ] + + ########################################################################### + + # The following sections contain a combination of PipelineOptions + # and PDF Backends for various configurations. + # Uncomment one section at the time to see the differences in the output. + + # PyPdfium without EasyOCR + # -------------------- + # pipeline_options = PipelineOptions() + # pipeline_options.do_ocr=False + # pipeline_options.do_table_structure=True + # pipeline_options.table_structure_options.do_cell_matching = False + + # doc_converter = DocumentConverter( + # pipeline_options=pipeline_options, + # pdf_backend=PyPdfiumDocumentBackend, + # ) + + # PyPdfium with EasyOCR + # ----------------- + # pipeline_options = PipelineOptions() + # pipeline_options.do_ocr=True + # pipeline_options.do_table_structure=True + # pipeline_options.table_structure_options.do_cell_matching = True + + # doc_converter = DocumentConverter( + # pipeline_options=pipeline_options, + # pdf_backend=PyPdfiumDocumentBackend, + # ) + + # Docling Parse without EasyOCR + # ------------------------- + pipeline_options = PipelineOptions() + pipeline_options.do_ocr = False + pipeline_options.do_table_structure = True + pipeline_options.table_structure_options.do_cell_matching = True + + doc_converter = DocumentConverter( + pipeline_options=pipeline_options, + pdf_backend=DoclingParseDocumentBackend, + ) + + # Docling Parse with EasyOCR + # ---------------------- + # pipeline_options = PipelineOptions() + # pipeline_options.do_ocr=True + # pipeline_options.do_table_structure=True + # pipeline_options.table_structure_options.do_cell_matching = True + + # doc_converter = DocumentConverter( + # pipeline_options=pipeline_options, + # pdf_backend=DoclingParseDocumentBackend, + # ) + + # Docling Parse with Tesseract + # ---------------------- + # pipeline_options = PipelineOptions() + # pipeline_options.do_ocr = True + # pipeline_options.do_table_structure = True + # pipeline_options.table_structure_options.do_cell_matching = True + # pipeline_options.ocr_options = TesseractOcrOptions() + + # doc_converter = DocumentConverter( + # pipeline_options=pipeline_options, + # pdf_backend=DoclingParseDocumentBackend, + # ) + + # Docling Parse with Tesseract CLI + # ---------------------- + # pipeline_options = PipelineOptions() + # pipeline_options.do_ocr = True + # pipeline_options.do_table_structure = True + # pipeline_options.table_structure_options.do_cell_matching = True + # pipeline_options.ocr_options = TesseractCliOcrOptions() + + # doc_converter = DocumentConverter( + # pipeline_options=pipeline_options, + # pdf_backend=DoclingParseDocumentBackend, + # ) + + ########################################################################### + + # Define input files + input = DocumentConversionInput.from_paths(input_doc_paths) + + start_time = time.time() + + conv_results = doc_converter.convert(input) + success_count, failure_count = export_documents( + conv_results, output_dir=Path("./scratch") + ) + + end_time = time.time() - start_time + + _log.info(f"All documents were converted in {end_time:.2f} seconds.") + + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + + +if __name__ == "__main__": + main() diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py new file mode 100644 index 000000000..bdffbec15 --- /dev/null +++ b/docs/examples/export_figures.py @@ -0,0 +1,85 @@ +import logging +import time +from pathlib import Path +from typing import Tuple + +from docling.datamodel.base_models import ( + AssembleOptions, + ConversionStatus, + FigureElement, + PageElement, + TableElement, +) +from docling.datamodel.document import DocumentConversionInput +from docling.document_converter import DocumentConverter + +_log = logging.getLogger(__name__) + +IMAGE_RESOLUTION_SCALE = 2.0 + + +def main(): + logging.basicConfig(level=logging.INFO) + + input_doc_paths = [ + Path("./tests/data/2206.01062.pdf"), + ] + output_dir = Path("./scratch") + + input_files = DocumentConversionInput.from_paths(input_doc_paths) + + # Important: For operating with page images, we must keep them, otherwise the DocumentConverter + # will destroy them for cleaning up memory. + # This is done by setting AssembleOptions.images_scale, which also defines the scale of images. + # scale=1 correspond of a standard 72 DPI image + assemble_options = AssembleOptions() + assemble_options.images_scale = IMAGE_RESOLUTION_SCALE + + doc_converter = DocumentConverter(assemble_options=assemble_options) + + start_time = time.time() + + conv_results = doc_converter.convert(input_files) + + success_count = 0 + failure_count = 0 + output_dir.mkdir(parents=True, exist_ok=True) + for conv_res in conv_results: + if conv_res.status != ConversionStatus.SUCCESS: + _log.info(f"Document {conv_res.input.file} failed to convert.") + failure_count += 1 + continue + + doc_filename = conv_res.input.file.stem + + # Export page images + for page in conv_res.pages: + page_no = page.page_no + 1 + page_image_filename = output_dir / f"{doc_filename}-{page_no}.png" + with page_image_filename.open("wb") as fp: + page.image.save(fp, format="PNG") + + # Export figures and tables + for element, image in conv_res.render_element_images( + element_types=(FigureElement, TableElement) + ): + element_image_filename = ( + output_dir / f"{doc_filename}-element-{element.id}.png" + ) + with element_image_filename.open("wb") as fp: + image.save(fp, "PNG") + + success_count += 1 + + end_time = time.time() - start_time + + _log.info(f"All documents were converted in {end_time:.2f} seconds.") + + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + + +if __name__ == "__main__": + main() diff --git a/docs/examples/export_multimodal.py b/docs/examples/export_multimodal.py new file mode 100644 index 000000000..7c016b195 --- /dev/null +++ b/docs/examples/export_multimodal.py @@ -0,0 +1,116 @@ +import datetime +import logging +import time +from pathlib import Path + +import pandas as pd + +from docling.datamodel.base_models import AssembleOptions, ConversionStatus +from docling.datamodel.document import DocumentConversionInput +from docling.document_converter import DocumentConverter +from docling.utils.export import generate_multimodal_pages + +_log = logging.getLogger(__name__) + +IMAGE_RESOLUTION_SCALE = 2.0 + + +def main(): + logging.basicConfig(level=logging.INFO) + + input_doc_paths = [ + Path("./tests/data/2206.01062.pdf"), + ] + output_dir = Path("./scratch") + + input_files = DocumentConversionInput.from_paths(input_doc_paths) + + # Important: For operating with page images, we must keep them, otherwise the DocumentConverter + # will destroy them for cleaning up memory. + # This is done by setting AssembleOptions.images_scale, which also defines the scale of images. + # scale=1 correspond of a standard 72 DPI image + assemble_options = AssembleOptions() + assemble_options.images_scale = IMAGE_RESOLUTION_SCALE + + doc_converter = DocumentConverter(assemble_options=assemble_options) + + start_time = time.time() + + converted_docs = doc_converter.convert(input_files) + + success_count = 0 + failure_count = 0 + output_dir.mkdir(parents=True, exist_ok=True) + for doc in converted_docs: + if doc.status != ConversionStatus.SUCCESS: + _log.info(f"Document {doc.input.file} failed to convert.") + failure_count += 1 + continue + + rows = [] + for ( + content_text, + content_md, + content_dt, + page_cells, + page_segments, + page, + ) in generate_multimodal_pages(doc): + + dpi = page._default_image_scale * 72 + + rows.append( + { + "document": doc.input.file.name, + "hash": doc.input.document_hash, + "page_hash": page.page_hash, + "image": { + "width": page.image.width, + "height": page.image.height, + "bytes": page.image.tobytes(), + }, + "cells": page_cells, + "contents": content_text, + "contents_md": content_md, + "contents_dt": content_dt, + "segments": page_segments, + "extra": { + "page_num": page.page_no + 1, + "width_in_points": page.size.width, + "height_in_points": page.size.height, + "dpi": dpi, + }, + } + ) + success_count += 1 + + # Generate one parquet from all documents + df = pd.json_normalize(rows) + now = datetime.datetime.now() + output_filename = output_dir / f"multimodal_{now:%Y-%m-%d_%H%M%S}.parquet" + df.to_parquet(output_filename) + + end_time = time.time() - start_time + + _log.info(f"All documents were converted in {end_time:.2f} seconds.") + + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + + # This block demonstrates how the file can be opened with the HF datasets library + # from datasets import Dataset + # from PIL import Image + # multimodal_df = pd.read_parquet(output_filename) + + # # Convert pandas DataFrame to Hugging Face Dataset and load bytes into image + # dataset = Dataset.from_pandas(multimodal_df) + # def transforms(examples): + # examples["image"] = Image.frombytes('RGB', (examples["image.width"], examples["image.height"]), examples["image.bytes"], 'raw') + # return examples + # dataset = dataset.map(transforms) + + +if __name__ == "__main__": + main() diff --git a/docs/examples/export_tables.py b/docs/examples/export_tables.py new file mode 100644 index 000000000..a0c605c13 --- /dev/null +++ b/docs/examples/export_tables.py @@ -0,0 +1,74 @@ +import logging +import time +from pathlib import Path +from typing import Tuple + +import pandas as pd + +from docling.datamodel.base_models import ConversionStatus +from docling.datamodel.document import DocumentConversionInput +from docling.document_converter import DocumentConverter + +_log = logging.getLogger(__name__) + + +def main(): + logging.basicConfig(level=logging.INFO) + + input_doc_paths = [ + Path("./tests/data/2206.01062.pdf"), + ] + output_dir = Path("./scratch") + + input_files = DocumentConversionInput.from_paths(input_doc_paths) + + doc_converter = DocumentConverter() + + start_time = time.time() + + conv_results = doc_converter.convert(input_files) + + success_count = 0 + failure_count = 0 + output_dir.mkdir(parents=True, exist_ok=True) + for conv_res in conv_results: + if conv_res.status != ConversionStatus.SUCCESS: + _log.info(f"Document {conv_res.input.file} failed to convert.") + failure_count += 1 + continue + + doc_filename = conv_res.input.file.stem + + # Export tables + for table_ix, table in enumerate(conv_res.output.tables): + table_df: pd.DataFrame = table.export_to_dataframe() + print(f"## Table {table_ix}") + print(table_df.to_markdown()) + + # Save the table as csv + element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.csv" + _log.info(f"Saving CSV table to {element_csv_filename}") + table_df.to_csv(element_csv_filename) + + # Save the table as html + element_html_filename = ( + output_dir / f"{doc_filename}-table-{table_ix+1}.html" + ) + _log.info(f"Saving HTML table to {element_html_filename}") + with element_html_filename.open("w") as fp: + fp.write(table.export_to_html()) + + success_count += 1 + + end_time = time.time() - start_time + + _log.info(f"All documents were converted in {end_time:.2f} seconds.") + + if failure_count > 0: + raise RuntimeError( + f"The example failed converting {failure_count} on {len(input_doc_paths)}." + ) + + +if __name__ == "__main__": + main() diff --git a/docs/examples/minimal.py b/docs/examples/minimal.py new file mode 100644 index 000000000..837db718b --- /dev/null +++ b/docs/examples/minimal.py @@ -0,0 +1,6 @@ +from docling.document_converter import DocumentConverter + +source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL +converter = DocumentConverter() +doc = converter.convert_single(source) +print(doc.render_as_markdown()) # output: ## Docling Technical Report [...]" diff --git a/examples/rag_langchain.ipynb b/docs/examples/rag_langchain.ipynb similarity index 99% rename from examples/rag_langchain.ipynb rename to docs/examples/rag_langchain.ipynb index 30e383296..f2464f298 100644 --- a/examples/rag_langchain.ipynb +++ b/docs/examples/rag_langchain.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# RAG with Docling and 🦜🔗 LangChain" + "# RAG with LangChain 🦜🔗" ] }, { diff --git a/examples/rag_llamaindex.ipynb b/docs/examples/rag_llamaindex.ipynb similarity index 98% rename from examples/rag_llamaindex.ipynb rename to docs/examples/rag_llamaindex.ipynb index f5c0e91ae..48ade3686 100644 --- a/examples/rag_llamaindex.ipynb +++ b/docs/examples/rag_llamaindex.ipynb @@ -11,7 +11,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# RAG with Docling and 🦙 LlamaIndex" + "# RAG with LlamaIndex 🦙" ] }, { @@ -25,9 +25,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "LlamaIndex extensions `DoclingReader` and `DoclingNodeParser` presented in this notebook seamlessly integrate Docling into LlamaIndex, enabling you to:\n", + "This example leverages the official [LlamaIndex Docling extension](../../integrations/llamaindex/).\n", + "\n", + "Presented extensions `DoclingReader` and `DoclingNodeParser` enable you to:\n", "- use PDF documents in your LLM applications with ease and speed, and\n", - "- leverage Docling's rich format for advanced, document-native grounding." + "- harness Docling's rich format for advanced, document-native grounding." ] }, { diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 000000000..6f218e481 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,29 @@ +# Docling + +
+
+
+
+
🎉 Docling is now officially supported in LlamaIndex! Check it out!
+{% endblock %} +#} diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 000000000..5beec9779 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,3 @@ +[data-md-color-scheme="default"] .md-banner a { + color: #5e8bde; +} diff --git a/examples/batch_convert.py b/examples/batch_convert.py deleted file mode 100644 index 2f3f38b17..000000000 --- a/examples/batch_convert.py +++ /dev/null @@ -1,105 +0,0 @@ -import json -import logging -import time -from pathlib import Path -from typing import Iterable - -from docling.datamodel.base_models import ConversionStatus -from docling.datamodel.document import ConversionResult, DocumentConversionInput -from docling.document_converter import DocumentConverter - -_log = logging.getLogger(__name__) - - -def export_documents( - conv_results: Iterable[ConversionResult], - output_dir: Path, -): - output_dir.mkdir(parents=True, exist_ok=True) - - success_count = 0 - failure_count = 0 - partial_success_count = 0 - - for conv_res in conv_results: - if conv_res.status == ConversionStatus.SUCCESS: - success_count += 1 - doc_filename = conv_res.input.file.stem - - # Export Deep Search document JSON format: - with (output_dir / f"{doc_filename}.json").open( - "w", encoding="utf-8" - ) as fp: - fp.write(json.dumps(conv_res.render_as_dict())) - - # Export Text format: - with (output_dir / f"{doc_filename}.txt").open("w", encoding="utf-8") as fp: - fp.write(conv_res.render_as_text()) - - # Export Markdown format: - with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp: - fp.write(conv_res.render_as_markdown()) - - # Export Document Tags format: - with (output_dir / f"{doc_filename}.doctags").open( - "w", encoding="utf-8" - ) as fp: - fp.write(conv_res.render_as_doctags()) - - elif conv_res.status == ConversionStatus.PARTIAL_SUCCESS: - _log.info( - f"Document {conv_res.input.file} was partially converted with the following errors:" - ) - for item in conv_res.errors: - _log.info(f"\t{item.error_message}") - partial_success_count += 1 - else: - _log.info(f"Document {conv_res.input.file} failed to convert.") - failure_count += 1 - - _log.info( - f"Processed {success_count + partial_success_count + failure_count} docs, " - f"of which {failure_count} failed " - f"and {partial_success_count} were partially converted." - ) - return success_count, partial_success_count, failure_count - - -def main(): - logging.basicConfig(level=logging.INFO) - - input_doc_paths = [ - Path("./tests/data/2206.01062.pdf"), - Path("./tests/data/2203.01017v2.pdf"), - Path("./tests/data/2305.03393v1.pdf"), - Path("./tests/data/redp5110.pdf"), - Path("./tests/data/redp5695.pdf"), - ] - - # buf = BytesIO(Path("./test/data/2206.01062.pdf").open("rb").read()) - # docs = [DocumentStream(filename="my_doc.pdf", stream=buf)] - # input = DocumentConversionInput.from_streams(docs) - - doc_converter = DocumentConverter() - - input = DocumentConversionInput.from_paths(input_doc_paths) - - start_time = time.time() - - conv_results = doc_converter.convert(input) - success_count, partial_success_count, failure_count = export_documents( - conv_results, output_dir=Path("./scratch") - ) - - end_time = time.time() - start_time - - _log.info(f"All documents were converted in {end_time:.2f} seconds.") - - if failure_count > 0: - raise RuntimeError( - f"The example failed converting {failure_count} on {len(input_doc_paths)}." - ) - - -if __name__ == "__main__": - main() diff --git a/examples/batch_convert.py b/examples/batch_convert.py new file mode 120000 index 000000000..5117377f6 --- /dev/null +++ b/examples/batch_convert.py @@ -0,0 +1 @@ +../docs/examples/batch_convert.py \ No newline at end of file diff --git a/examples/custom_convert.py b/examples/custom_convert.py deleted file mode 100644 index e386bb3a2..000000000 --- a/examples/custom_convert.py +++ /dev/null @@ -1,175 +0,0 @@ -import json -import logging -import time -from pathlib import Path -from typing import Iterable - -from docling.backend.docling_parse_backend import DoclingParseDocumentBackend -from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend -from docling.datamodel.base_models import ConversionStatus, PipelineOptions -from docling.datamodel.document import ConversionResult, DocumentConversionInput -from docling.datamodel.pipeline_options import ( - TesseractCliOcrOptions, - TesseractOcrOptions, -) -from docling.document_converter import DocumentConverter - -_log = logging.getLogger(__name__) - - -def export_documents( - conv_results: Iterable[ConversionResult], - output_dir: Path, -): - output_dir.mkdir(parents=True, exist_ok=True) - - success_count = 0 - failure_count = 0 - - for conv_res in conv_results: - if conv_res.status == ConversionStatus.SUCCESS: - success_count += 1 - doc_filename = conv_res.input.file.stem - - # Export Deep Search document JSON format: - with (output_dir / f"{doc_filename}.json").open( - "w", encoding="utf-8" - ) as fp: - fp.write(json.dumps(conv_res.render_as_dict())) - - # Export Text format: - with (output_dir / f"{doc_filename}.txt").open("w", encoding="utf-8") as fp: - fp.write(conv_res.render_as_text()) - - # Export Markdown format: - with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp: - fp.write(conv_res.render_as_markdown()) - - # Export Document Tags format: - with (output_dir / f"{doc_filename}.doctags").open( - "w", encoding="utf-8" - ) as fp: - fp.write(conv_res.render_as_doctags()) - - else: - _log.info(f"Document {conv_res.input.file} failed to convert.") - failure_count += 1 - - _log.info( - f"Processed {success_count + failure_count} docs, of which {failure_count} failed" - ) - - return success_count, failure_count - - -def main(): - logging.basicConfig(level=logging.INFO) - - input_doc_paths = [ - Path("./tests/data/2206.01062.pdf"), - ] - - ########################################################################### - - # The following sections contain a combination of PipelineOptions - # and PDF Backends for various configurations. - # Uncomment one section at the time to see the differences in the output. - - # PyPdfium without EasyOCR - # -------------------- - # pipeline_options = PipelineOptions() - # pipeline_options.do_ocr=False - # pipeline_options.do_table_structure=True - # pipeline_options.table_structure_options.do_cell_matching = False - - # doc_converter = DocumentConverter( - # pipeline_options=pipeline_options, - # pdf_backend=PyPdfiumDocumentBackend, - # ) - - # PyPdfium with EasyOCR - # ----------------- - # pipeline_options = PipelineOptions() - # pipeline_options.do_ocr=True - # pipeline_options.do_table_structure=True - # pipeline_options.table_structure_options.do_cell_matching = True - - # doc_converter = DocumentConverter( - # pipeline_options=pipeline_options, - # pdf_backend=PyPdfiumDocumentBackend, - # ) - - # Docling Parse without EasyOCR - # ------------------------- - pipeline_options = PipelineOptions() - pipeline_options.do_ocr = False - pipeline_options.do_table_structure = True - pipeline_options.table_structure_options.do_cell_matching = True - - doc_converter = DocumentConverter( - pipeline_options=pipeline_options, - pdf_backend=DoclingParseDocumentBackend, - ) - - # Docling Parse with EasyOCR - # ---------------------- - # pipeline_options = PipelineOptions() - # pipeline_options.do_ocr=True - # pipeline_options.do_table_structure=True - # pipeline_options.table_structure_options.do_cell_matching = True - - # doc_converter = DocumentConverter( - # pipeline_options=pipeline_options, - # pdf_backend=DoclingParseDocumentBackend, - # ) - - # Docling Parse with Tesseract - # ---------------------- - # pipeline_options = PipelineOptions() - # pipeline_options.do_ocr = True - # pipeline_options.do_table_structure = True - # pipeline_options.table_structure_options.do_cell_matching = True - # pipeline_options.ocr_options = TesseractOcrOptions() - - # doc_converter = DocumentConverter( - # pipeline_options=pipeline_options, - # pdf_backend=DoclingParseDocumentBackend, - # ) - - # Docling Parse with Tesseract CLI - # ---------------------- - # pipeline_options = PipelineOptions() - # pipeline_options.do_ocr = True - # pipeline_options.do_table_structure = True - # pipeline_options.table_structure_options.do_cell_matching = True - # pipeline_options.ocr_options = TesseractCliOcrOptions() - - # doc_converter = DocumentConverter( - # pipeline_options=pipeline_options, - # pdf_backend=DoclingParseDocumentBackend, - # ) - - ########################################################################### - - # Define input files - input = DocumentConversionInput.from_paths(input_doc_paths) - - start_time = time.time() - - conv_results = doc_converter.convert(input) - success_count, failure_count = export_documents( - conv_results, output_dir=Path("./scratch") - ) - - end_time = time.time() - start_time - - _log.info(f"All documents were converted in {end_time:.2f} seconds.") - - if failure_count > 0: - raise RuntimeError( - f"The example failed converting {failure_count} on {len(input_doc_paths)}." - ) - - -if __name__ == "__main__": - main() diff --git a/examples/custom_convert.py b/examples/custom_convert.py new file mode 120000 index 000000000..dc4e853e5 --- /dev/null +++ b/examples/custom_convert.py @@ -0,0 +1 @@ +../docs/examples/custom_convert.py \ No newline at end of file diff --git a/examples/export_figures.py b/examples/export_figures.py deleted file mode 100644 index bdffbec15..000000000 --- a/examples/export_figures.py +++ /dev/null @@ -1,85 +0,0 @@ -import logging -import time -from pathlib import Path -from typing import Tuple - -from docling.datamodel.base_models import ( - AssembleOptions, - ConversionStatus, - FigureElement, - PageElement, - TableElement, -) -from docling.datamodel.document import DocumentConversionInput -from docling.document_converter import DocumentConverter - -_log = logging.getLogger(__name__) - -IMAGE_RESOLUTION_SCALE = 2.0 - - -def main(): - logging.basicConfig(level=logging.INFO) - - input_doc_paths = [ - Path("./tests/data/2206.01062.pdf"), - ] - output_dir = Path("./scratch") - - input_files = DocumentConversionInput.from_paths(input_doc_paths) - - # Important: For operating with page images, we must keep them, otherwise the DocumentConverter - # will destroy them for cleaning up memory. - # This is done by setting AssembleOptions.images_scale, which also defines the scale of images. - # scale=1 correspond of a standard 72 DPI image - assemble_options = AssembleOptions() - assemble_options.images_scale = IMAGE_RESOLUTION_SCALE - - doc_converter = DocumentConverter(assemble_options=assemble_options) - - start_time = time.time() - - conv_results = doc_converter.convert(input_files) - - success_count = 0 - failure_count = 0 - output_dir.mkdir(parents=True, exist_ok=True) - for conv_res in conv_results: - if conv_res.status != ConversionStatus.SUCCESS: - _log.info(f"Document {conv_res.input.file} failed to convert.") - failure_count += 1 - continue - - doc_filename = conv_res.input.file.stem - - # Export page images - for page in conv_res.pages: - page_no = page.page_no + 1 - page_image_filename = output_dir / f"{doc_filename}-{page_no}.png" - with page_image_filename.open("wb") as fp: - page.image.save(fp, format="PNG") - - # Export figures and tables - for element, image in conv_res.render_element_images( - element_types=(FigureElement, TableElement) - ): - element_image_filename = ( - output_dir / f"{doc_filename}-element-{element.id}.png" - ) - with element_image_filename.open("wb") as fp: - image.save(fp, "PNG") - - success_count += 1 - - end_time = time.time() - start_time - - _log.info(f"All documents were converted in {end_time:.2f} seconds.") - - if failure_count > 0: - raise RuntimeError( - f"The example failed converting {failure_count} on {len(input_doc_paths)}." - ) - - -if __name__ == "__main__": - main() diff --git a/examples/export_figures.py b/examples/export_figures.py new file mode 120000 index 000000000..831087f59 --- /dev/null +++ b/examples/export_figures.py @@ -0,0 +1 @@ +../docs/examples/export_figures.py \ No newline at end of file diff --git a/examples/export_multimodal.py b/examples/export_multimodal.py deleted file mode 100644 index 7c016b195..000000000 --- a/examples/export_multimodal.py +++ /dev/null @@ -1,116 +0,0 @@ -import datetime -import logging -import time -from pathlib import Path - -import pandas as pd - -from docling.datamodel.base_models import AssembleOptions, ConversionStatus -from docling.datamodel.document import DocumentConversionInput -from docling.document_converter import DocumentConverter -from docling.utils.export import generate_multimodal_pages - -_log = logging.getLogger(__name__) - -IMAGE_RESOLUTION_SCALE = 2.0 - - -def main(): - logging.basicConfig(level=logging.INFO) - - input_doc_paths = [ - Path("./tests/data/2206.01062.pdf"), - ] - output_dir = Path("./scratch") - - input_files = DocumentConversionInput.from_paths(input_doc_paths) - - # Important: For operating with page images, we must keep them, otherwise the DocumentConverter - # will destroy them for cleaning up memory. - # This is done by setting AssembleOptions.images_scale, which also defines the scale of images. - # scale=1 correspond of a standard 72 DPI image - assemble_options = AssembleOptions() - assemble_options.images_scale = IMAGE_RESOLUTION_SCALE - - doc_converter = DocumentConverter(assemble_options=assemble_options) - - start_time = time.time() - - converted_docs = doc_converter.convert(input_files) - - success_count = 0 - failure_count = 0 - output_dir.mkdir(parents=True, exist_ok=True) - for doc in converted_docs: - if doc.status != ConversionStatus.SUCCESS: - _log.info(f"Document {doc.input.file} failed to convert.") - failure_count += 1 - continue - - rows = [] - for ( - content_text, - content_md, - content_dt, - page_cells, - page_segments, - page, - ) in generate_multimodal_pages(doc): - - dpi = page._default_image_scale * 72 - - rows.append( - { - "document": doc.input.file.name, - "hash": doc.input.document_hash, - "page_hash": page.page_hash, - "image": { - "width": page.image.width, - "height": page.image.height, - "bytes": page.image.tobytes(), - }, - "cells": page_cells, - "contents": content_text, - "contents_md": content_md, - "contents_dt": content_dt, - "segments": page_segments, - "extra": { - "page_num": page.page_no + 1, - "width_in_points": page.size.width, - "height_in_points": page.size.height, - "dpi": dpi, - }, - } - ) - success_count += 1 - - # Generate one parquet from all documents - df = pd.json_normalize(rows) - now = datetime.datetime.now() - output_filename = output_dir / f"multimodal_{now:%Y-%m-%d_%H%M%S}.parquet" - df.to_parquet(output_filename) - - end_time = time.time() - start_time - - _log.info(f"All documents were converted in {end_time:.2f} seconds.") - - if failure_count > 0: - raise RuntimeError( - f"The example failed converting {failure_count} on {len(input_doc_paths)}." - ) - - # This block demonstrates how the file can be opened with the HF datasets library - # from datasets import Dataset - # from PIL import Image - # multimodal_df = pd.read_parquet(output_filename) - - # # Convert pandas DataFrame to Hugging Face Dataset and load bytes into image - # dataset = Dataset.from_pandas(multimodal_df) - # def transforms(examples): - # examples["image"] = Image.frombytes('RGB', (examples["image.width"], examples["image.height"]), examples["image.bytes"], 'raw') - # return examples - # dataset = dataset.map(transforms) - - -if __name__ == "__main__": - main() diff --git a/examples/export_multimodal.py b/examples/export_multimodal.py new file mode 120000 index 000000000..96ea158a0 --- /dev/null +++ b/examples/export_multimodal.py @@ -0,0 +1 @@ +../docs/examples/export_multimodal.py \ No newline at end of file diff --git a/examples/export_tables.py b/examples/export_tables.py deleted file mode 100644 index a0c605c13..000000000 --- a/examples/export_tables.py +++ /dev/null @@ -1,74 +0,0 @@ -import logging -import time -from pathlib import Path -from typing import Tuple - -import pandas as pd - -from docling.datamodel.base_models import ConversionStatus -from docling.datamodel.document import DocumentConversionInput -from docling.document_converter import DocumentConverter - -_log = logging.getLogger(__name__) - - -def main(): - logging.basicConfig(level=logging.INFO) - - input_doc_paths = [ - Path("./tests/data/2206.01062.pdf"), - ] - output_dir = Path("./scratch") - - input_files = DocumentConversionInput.from_paths(input_doc_paths) - - doc_converter = DocumentConverter() - - start_time = time.time() - - conv_results = doc_converter.convert(input_files) - - success_count = 0 - failure_count = 0 - output_dir.mkdir(parents=True, exist_ok=True) - for conv_res in conv_results: - if conv_res.status != ConversionStatus.SUCCESS: - _log.info(f"Document {conv_res.input.file} failed to convert.") - failure_count += 1 - continue - - doc_filename = conv_res.input.file.stem - - # Export tables - for table_ix, table in enumerate(conv_res.output.tables): - table_df: pd.DataFrame = table.export_to_dataframe() - print(f"## Table {table_ix}") - print(table_df.to_markdown()) - - # Save the table as csv - element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.csv" - _log.info(f"Saving CSV table to {element_csv_filename}") - table_df.to_csv(element_csv_filename) - - # Save the table as html - element_html_filename = ( - output_dir / f"{doc_filename}-table-{table_ix+1}.html" - ) - _log.info(f"Saving HTML table to {element_html_filename}") - with element_html_filename.open("w") as fp: - fp.write(table.export_to_html()) - - success_count += 1 - - end_time = time.time() - start_time - - _log.info(f"All documents were converted in {end_time:.2f} seconds.") - - if failure_count > 0: - raise RuntimeError( - f"The example failed converting {failure_count} on {len(input_doc_paths)}." - ) - - -if __name__ == "__main__": - main() diff --git a/examples/export_tables.py b/examples/export_tables.py new file mode 120000 index 000000000..c6842a50f --- /dev/null +++ b/examples/export_tables.py @@ -0,0 +1 @@ +../docs/examples/export_tables.py \ No newline at end of file diff --git a/examples/minimal.py b/examples/minimal.py deleted file mode 100644 index 837db718b..000000000 --- a/examples/minimal.py +++ /dev/null @@ -1,6 +0,0 @@ -from docling.document_converter import DocumentConverter - -source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL -converter = DocumentConverter() -doc = converter.convert_single(source) -print(doc.render_as_markdown()) # output: ## Docling Technical Report [...]" diff --git a/examples/minimal.py b/examples/minimal.py new file mode 120000 index 000000000..0964db6d0 --- /dev/null +++ b/examples/minimal.py @@ -0,0 +1 @@ +../docs/examples/minimal.py \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 000000000..2deb6463f --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,97 @@ +site_name: Docling +site_url: https://ds4sd.github.io/docling/ +repo_name: DS4SD/docling +repo_url: https://github.com/DS4SD/docling + +theme: + name: material + custom_dir: docs/overrides + palette: + # Palette toggle for automatic mode + - media: "(prefers-color-scheme)" + scheme: default + primary: black + toggle: + icon: material/brightness-auto + name: Switch to light mode + + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: black + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + toggle: + icon: material/brightness-4 + name: Switch to system preference + + logo: assets/logo.png + favicon: assets/logo.png + features: + - content.tabs.link + - content.code.annotate + - content.code.copy + - announce.dismiss + - navigation.tabs + # - navigation.indexes # <= if set, each "section" can have its own page, if index.md is used + - navigation.instant + - navigation.instant.prefetch + # - navigation.instant.preview + - navigation.instant.progress + - navigation.path + - navigation.sections # <= + - navigation.top + - navigation.tracking + - search.suggest + - toc.follow +nav: + - Get started: + - Home: index.md + - Installation: installation.md + # - Docling v2: v2.md + # - Concepts: + # - Docling Document: concepts/document.md + # - Chunking: concepts/chunking.md + - Examples: + - Conversion: + - "Simple conversion": examples/minimal.py + - "Custom conversion": examples/custom_convert.py + - "Batch conversion": examples/batch_convert.py + - "Figure export": examples/export_figures.py + - "Table export": examples/export_tables.py + - "Multimodal export": examples/export_multimodal.py + - RAG / QA: + - "RAG with LlamaIndex 🦙": examples/rag_llamaindex.ipynb + - "RAG with LangChain 🦜🔗": examples/rag_langchain.ipynb + # - Chunking: + # - Chunking: examples/chunking.md + # - CLI: + # - CLI: examples/cli.md + - Integrations: + - "LlamaIndex 🦙 extension": integrations/llamaindex.md + # - "LangChain 🦜🔗 extension": integrations/langchain.md + # - API reference: + # - API reference: api_reference/index.md + +markdown_extensions: + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + slugify: !!python/object/apply:pymdownx.slugs.slugify + kwds: + case: lower + - admonition + - pymdownx.details + - attr_list +plugins: + - search + - mkdocs-jupyter + +extra_css: + - stylesheets/extra.css diff --git a/poetry.lock b/poetry.lock index 307927dd9..052e8743c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -263,6 +263,20 @@ files = [ pycodestyle = ">=2.11.0" tomli = {version = "*", markers = "python_version < \"3.11\""} +[[package]] +name = "babel" +version = "2.16.0" +description = "Internationalization utilities" +optional = false +python-versions = ">=3.8" +files = [ + {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"}, + {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"}, +] + +[package.extras] +dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] + [[package]] name = "backports-tarfile" version = "1.2.0" @@ -347,6 +361,24 @@ d = ["aiohttp (>=3.10)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "bleach" +version = "6.1.0" +description = "An easy safelist-based HTML-sanitizing tool." +optional = false +python-versions = ">=3.8" +files = [ + {file = "bleach-6.1.0-py3-none-any.whl", hash = "sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6"}, + {file = "bleach-6.1.0.tar.gz", hash = "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe"}, +] + +[package.dependencies] +six = ">=1.9.0" +webencodings = "*" + +[package.extras] +css = ["tinycss2 (>=1.1.0,<1.3)"] + [[package]] name = "certifi" version = "2024.8.30" @@ -931,6 +963,17 @@ tqdm = ">=4.64.0,<5.0.0" [package.extras] toolkit = ["deepsearch-toolkit (>=0.31.0)"] +[[package]] +name = "defusedxml" +version = "0.7.1" +description = "XML bomb protection for Python stdlib modules" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + [[package]] name = "deprecated" version = "1.2.14" @@ -1185,6 +1228,20 @@ files = [ [package.extras] tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] +[[package]] +name = "fastjsonschema" +version = "2.20.0" +description = "Fastest Python implementation of JSON schema" +optional = false +python-versions = "*" +files = [ + {file = "fastjsonschema-2.20.0-py3-none-any.whl", hash = "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a"}, + {file = "fastjsonschema-2.20.0.tar.gz", hash = "sha256:3d48fc5300ee96f5d116f10fe6f28d938e6008f59a6a025c2649475b87f76a23"}, +] + +[package.extras] +devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] + [[package]] name = "filelock" version = "3.16.1" @@ -1444,6 +1501,23 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe, test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] +[[package]] +name = "ghp-import" +version = "2.1.0" +description = "Copy your docs directly to the gh-pages branch." +optional = false +python-versions = "*" +files = [ + {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, + {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, +] + +[package.dependencies] +python-dateutil = ">=2.8.1" + +[package.extras] +dev = ["flake8", "markdown", "twine", "wheel"] + [[package]] name = "gitdb" version = "4.0.11" @@ -2214,6 +2288,17 @@ traitlets = ">=5.3" docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"] test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"] +[[package]] +name = "jupyterlab-pygments" +version = "0.3.0" +description = "Pygments theme using JupyterLab CSS variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780"}, + {file = "jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d"}, +] + [[package]] name = "jupyterlab-widgets" version = "3.0.13" @@ -2225,6 +2310,35 @@ files = [ {file = "jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed"}, ] +[[package]] +name = "jupytext" +version = "1.16.4" +description = "Jupyter notebooks as Markdown documents, Julia, Python or R scripts" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jupytext-1.16.4-py3-none-any.whl", hash = "sha256:76989d2690e65667ea6fb411d8056abe7cd0437c07bd774660b83d62acf9490a"}, + {file = "jupytext-1.16.4.tar.gz", hash = "sha256:28e33f46f2ce7a41fb9d677a4a2c95327285579b64ca104437c4b9eb1e4174e9"}, +] + +[package.dependencies] +markdown-it-py = ">=1.0" +mdit-py-plugins = "*" +nbformat = "*" +packaging = "*" +pyyaml = "*" +tomli = {version = "*", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs (>=1.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist", "sphinx-gallery (<0.8)"] +docs = ["myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +test = ["pytest", "pytest-randomly", "pytest-xdist"] +test-cov = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist"] +test-external = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs (>=1.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-randomly", "pytest-xdist", "sphinx-gallery (<0.8)"] +test-functional = ["pytest", "pytest-randomly", "pytest-xdist"] +test-integration = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-randomly", "pytest-xdist"] +test-ui = ["calysto-bash"] + [[package]] name = "keyring" version = "25.4.1" @@ -2777,6 +2891,21 @@ html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] source = ["Cython (==0.29.37)"] +[[package]] +name = "markdown" +version = "3.7" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"}, + {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"}, +] + +[package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -3008,6 +3137,25 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +description = "Collection of plugins for markdown-it-py" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636"}, + {file = "mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5"}, +] + +[package.dependencies] +markdown-it-py = ">=1.0.0,<4.0.0" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["myst-parser", "sphinx-book-theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "mdurl" version = "0.1.2" @@ -3034,6 +3182,17 @@ files = [ numpy = "*" pandas = "*" +[[package]] +name = "mergedeep" +version = "1.3.4" +description = "A deep merge function for 🐍." +optional = false +python-versions = ">=3.6" +files = [ + {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, + {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, +] + [[package]] name = "milvus-lite" version = "2.4.10" @@ -3067,6 +3226,122 @@ files = [ {file = "minijinja-2.2.0.tar.gz", hash = "sha256:4411052c7a60f8d56468cc6d17d45d72be3d5e89e9578a04f8336cc56601523c"}, ] +[[package]] +name = "mistune" +version = "3.0.2" +description = "A sane and fast Markdown parser with useful plugins and renderers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205"}, + {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"}, +] + +[[package]] +name = "mkdocs" +version = "1.6.1" +description = "Project documentation with Markdown." +optional = false +python-versions = ">=3.8" +files = [ + {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"}, + {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"}, +] + +[package.dependencies] +click = ">=7.0" +colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} +ghp-import = ">=1.0" +jinja2 = ">=2.11.1" +markdown = ">=3.3.6" +markupsafe = ">=2.0.1" +mergedeep = ">=1.3.4" +mkdocs-get-deps = ">=0.2.0" +packaging = ">=20.5" +pathspec = ">=0.11.1" +pyyaml = ">=5.1" +pyyaml-env-tag = ">=0.1" +watchdog = ">=2.0" + +[package.extras] +i18n = ["babel (>=2.9.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] + +[[package]] +name = "mkdocs-get-deps" +version = "0.2.0" +description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"}, + {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"}, +] + +[package.dependencies] +mergedeep = ">=1.3.4" +platformdirs = ">=2.2.0" +pyyaml = ">=5.1" + +[[package]] +name = "mkdocs-jupyter" +version = "0.25.0" +description = "Use Jupyter in mkdocs websites" +optional = false +python-versions = ">=3.9" +files = [ + {file = "mkdocs_jupyter-0.25.0-py3-none-any.whl", hash = "sha256:d83d71deef19f0401505945bf92ec3bd5b40615af89308e72d5112929f8ee00b"}, + {file = "mkdocs_jupyter-0.25.0.tar.gz", hash = "sha256:e26c1d341916bc57f96ea3f93d8d0a88fc77c87d4cee222f66d2007798d924f5"}, +] + +[package.dependencies] +ipykernel = ">6.0.0,<7.0.0" +jupytext = ">1.13.8,<2" +mkdocs = ">=1.4.0,<2" +mkdocs-material = ">9.0.0" +nbconvert = ">=7.2.9,<8" +pygments = ">2.12.0" + +[[package]] +name = "mkdocs-material" +version = "9.5.40" +description = "Documentation that simply works" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mkdocs_material-9.5.40-py3-none-any.whl", hash = "sha256:8e7a16ada34e79a7b6459ff2602584222f522c738b6a023d1bea853d5049da6f"}, + {file = "mkdocs_material-9.5.40.tar.gz", hash = "sha256:b69d70e667ec51fc41f65e006a3184dd00d95b2439d982cb1586e4c018943156"}, +] + +[package.dependencies] +babel = ">=2.10,<3.0" +colorama = ">=0.4,<1.0" +jinja2 = ">=3.0,<4.0" +markdown = ">=3.2,<4.0" +mkdocs = ">=1.6,<2.0" +mkdocs-material-extensions = ">=1.3,<2.0" +paginate = ">=0.5,<1.0" +pygments = ">=2.16,<3.0" +pymdown-extensions = ">=10.2,<11.0" +regex = ">=2022.4" +requests = ">=2.26,<3.0" + +[package.extras] +git = ["mkdocs-git-committers-plugin-2 (>=1.1,<2.0)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"] +imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=10.2,<11.0)"] +recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"] + +[[package]] +name = "mkdocs-material-extensions" +version = "1.3.1" +description = "Extension pack for Python Markdown and MkDocs Material." +optional = false +python-versions = ">=3.8" +files = [ + {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"}, + {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"}, +] + [[package]] name = "more-itertools" version = "10.5.0" @@ -3281,6 +3556,86 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "nbclient" +version = "0.10.0" +description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "nbclient-0.10.0-py3-none-any.whl", hash = "sha256:f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f"}, + {file = "nbclient-0.10.0.tar.gz", hash = "sha256:4b3f1b7dba531e498449c4db4f53da339c91d449dc11e9af3a43b4eb5c5abb09"}, +] + +[package.dependencies] +jupyter-client = ">=6.1.12" +jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" +nbformat = ">=5.1" +traitlets = ">=5.4" + +[package.extras] +dev = ["pre-commit"] +docs = ["autodoc-traits", "mock", "moto", "myst-parser", "nbclient[test]", "sphinx (>=1.7)", "sphinx-book-theme", "sphinxcontrib-spelling"] +test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "pytest (>=7.0,<8)", "pytest-asyncio", "pytest-cov (>=4.0)", "testpath", "xmltodict"] + +[[package]] +name = "nbconvert" +version = "7.16.4" +description = "Converting Jupyter Notebooks (.ipynb files) to other formats. Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script. nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)." +optional = false +python-versions = ">=3.8" +files = [ + {file = "nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3"}, + {file = "nbconvert-7.16.4.tar.gz", hash = "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4"}, +] + +[package.dependencies] +beautifulsoup4 = "*" +bleach = "!=5.0.0" +defusedxml = "*" +jinja2 = ">=3.0" +jupyter-core = ">=4.7" +jupyterlab-pygments = "*" +markupsafe = ">=2.0" +mistune = ">=2.0.3,<4" +nbclient = ">=0.5.0" +nbformat = ">=5.7" +packaging = "*" +pandocfilters = ">=1.4.1" +pygments = ">=2.4.1" +tinycss2 = "*" +traitlets = ">=5.1" + +[package.extras] +all = ["flaky", "ipykernel", "ipython", "ipywidgets (>=7.5)", "myst-parser", "nbsphinx (>=0.2.12)", "playwright", "pydata-sphinx-theme", "pyqtwebengine (>=5.15)", "pytest (>=7)", "sphinx (==5.0.2)", "sphinxcontrib-spelling", "tornado (>=6.1)"] +docs = ["ipykernel", "ipython", "myst-parser", "nbsphinx (>=0.2.12)", "pydata-sphinx-theme", "sphinx (==5.0.2)", "sphinxcontrib-spelling"] +qtpdf = ["pyqtwebengine (>=5.15)"] +qtpng = ["pyqtwebengine (>=5.15)"] +serve = ["tornado (>=6.1)"] +test = ["flaky", "ipykernel", "ipywidgets (>=7.5)", "pytest (>=7)"] +webpdf = ["playwright"] + +[[package]] +name = "nbformat" +version = "5.10.4" +description = "The Jupyter Notebook format" +optional = false +python-versions = ">=3.8" +files = [ + {file = "nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b"}, + {file = "nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a"}, +] + +[package.dependencies] +fastjsonschema = ">=2.15" +jsonschema = ">=2.6" +jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" +traitlets = ">=5.1" + +[package.extras] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] +test = ["pep440", "pre-commit", "pytest", "testpath"] + [[package]] name = "nbqa" version = "1.9.0" @@ -3758,6 +4113,21 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] +[[package]] +name = "paginate" +version = "0.5.7" +description = "Divides large result sets into pages for easier browsing" +optional = false +python-versions = "*" +files = [ + {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"}, + {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"}, +] + +[package.extras] +dev = ["pytest", "tox"] +lint = ["black"] + [[package]] name = "pandas" version = "2.2.3" @@ -3859,6 +4229,17 @@ files = [ numpy = ">=1.23.5" types-pytz = ">=2022.1.1" +[[package]] +name = "pandocfilters" +version = "1.5.1" +description = "Utilities for writing pandoc filters in python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc"}, + {file = "pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e"}, +] + [[package]] name = "parso" version = "0.8.4" @@ -4574,6 +4955,24 @@ tomlkit = ">=0.10.1" spelling = ["pyenchant (>=3.2,<4.0)"] testutils = ["gitpython (>3)"] +[[package]] +name = "pymdown-extensions" +version = "10.11.2" +description = "Extension pack for Python Markdown." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pymdown_extensions-10.11.2-py3-none-any.whl", hash = "sha256:41cdde0a77290e480cf53892f5c5e50921a7ee3e5cd60ba91bf19837b33badcf"}, + {file = "pymdown_extensions-10.11.2.tar.gz", hash = "sha256:bc8847ecc9e784a098efd35e20cba772bc5a1b529dfcef9dc1972db9021a1049"}, +] + +[package.dependencies] +markdown = ">=3.6" +pyyaml = "*" + +[package.extras] +extra = ["pygments (>=2.12)"] + [[package]] name = "pymilvus" version = "2.4.7" @@ -5027,6 +5426,20 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "pyyaml-env-tag" +version = "0.1" +description = "A custom YAML tag for referencing environment variables in YAML files. " +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"}, + {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"}, +] + +[package.dependencies] +pyyaml = "*" + [[package]] name = "pyzmq" version = "26.2.0" @@ -6342,6 +6755,24 @@ requests = ">=2.26.0" [package.extras] blobfile = ["blobfile (>=2)"] +[[package]] +name = "tinycss2" +version = "1.3.0" +description = "A tiny CSS parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tinycss2-1.3.0-py3-none-any.whl", hash = "sha256:54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7"}, + {file = "tinycss2-1.3.0.tar.gz", hash = "sha256:152f9acabd296a8375fbca5b84c961ff95971fcfc32e79550c8df8e29118c54d"}, +] + +[package.dependencies] +webencodings = ">=0.4" + +[package.extras] +doc = ["sphinx", "sphinx_rtd_theme"] +test = ["pytest", "ruff"] + [[package]] name = "tokenize-rt" version = "6.0.0" @@ -6843,11 +7274,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -7084,6 +7510,48 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +[[package]] +name = "watchdog" +version = "5.0.3" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.9" +files = [ + {file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:85527b882f3facda0579bce9d743ff7f10c3e1e0db0a0d0e28170a7d0e5ce2ea"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:53adf73dcdc0ef04f7735066b4a57a4cd3e49ef135daae41d77395f0b5b692cb"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e25adddab85f674acac303cf1f5835951345a56c5f7f582987d266679979c75b"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f01f4a3565a387080dc49bdd1fefe4ecc77f894991b88ef927edbfa45eb10818"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91b522adc25614cdeaf91f7897800b82c13b4b8ac68a42ca959f992f6990c490"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d52db5beb5e476e6853da2e2d24dbbbed6797b449c8bf7ea118a4ee0d2c9040e"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:94d11b07c64f63f49876e0ab8042ae034674c8653bfcdaa8c4b32e71cfff87e8"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:349c9488e1d85d0a58e8cb14222d2c51cbc801ce11ac3936ab4c3af986536926"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:53a3f10b62c2d569e260f96e8d966463dec1a50fa4f1b22aec69e3f91025060e"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:950f531ec6e03696a2414b6308f5c6ff9dab7821a768c9d5788b1314e9a46ca7"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae6deb336cba5d71476caa029ceb6e88047fc1dc74b62b7c4012639c0b563906"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1021223c08ba8d2d38d71ec1704496471ffd7be42cfb26b87cd5059323a389a1"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:752fb40efc7cc8d88ebc332b8f4bcbe2b5cc7e881bccfeb8e25054c00c994ee3"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a2e8f3f955d68471fa37b0e3add18500790d129cc7efe89971b8a4cc6fdeb0b2"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b8ca4d854adcf480bdfd80f46fdd6fb49f91dd020ae11c89b3a79e19454ec627"}, + {file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:90a67d7857adb1d985aca232cc9905dd5bc4803ed85cfcdcfcf707e52049eda7"}, + {file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:720ef9d3a4f9ca575a780af283c8fd3a0674b307651c1976714745090da5a9e8"}, + {file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:223160bb359281bb8e31c8f1068bf71a6b16a8ad3d9524ca6f523ac666bb6a1e"}, + {file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:560135542c91eaa74247a2e8430cf83c4342b29e8ad4f520ae14f0c8a19cfb5b"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:dd021efa85970bd4824acacbb922066159d0f9e546389a4743d56919b6758b91"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_armv7l.whl", hash = "sha256:78864cc8f23dbee55be34cc1494632a7ba30263951b5b2e8fc8286b95845f82c"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_i686.whl", hash = "sha256:1e9679245e3ea6498494b3028b90c7b25dbb2abe65c7d07423ecfc2d6218ff7c"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64.whl", hash = "sha256:9413384f26b5d050b6978e6fcd0c1e7f0539be7a4f1a885061473c5deaa57221"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:294b7a598974b8e2c6123d19ef15de9abcd282b0fbbdbc4d23dfa812959a9e05"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_s390x.whl", hash = "sha256:26dd201857d702bdf9d78c273cafcab5871dd29343748524695cecffa44a8d97"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:0f9332243355643d567697c3e3fa07330a1d1abf981611654a1f2bf2175612b7"}, + {file = "watchdog-5.0.3-py3-none-win32.whl", hash = "sha256:c66f80ee5b602a9c7ab66e3c9f36026590a0902db3aea414d59a2f55188c1f49"}, + {file = "watchdog-5.0.3-py3-none-win_amd64.whl", hash = "sha256:f00b4cf737f568be9665563347a910f8bdc76f88c2970121c86243c8cfdf90e9"}, + {file = "watchdog-5.0.3-py3-none-win_ia64.whl", hash = "sha256:49f4d36cb315c25ea0d946e018c01bb028048023b9e103d3d3943f58e109dd45"}, + {file = "watchdog-5.0.3.tar.gz", hash = "sha256:108f42a7f0345042a854d4d0ad0834b741d421330d5f575b81cb27b883500176"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + [[package]] name = "wcwidth" version = "0.2.13" @@ -7095,6 +7563,17 @@ files = [ {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, ] +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +optional = false +python-versions = "*" +files = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] + [[package]] name = "wheel" version = "0.44.0" @@ -7462,4 +7941,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "536b2f199fe70180aa31e55e7ad47a75a0b64cd20bbe96caec294037966c7b00" +content-hash = "cae7819c1a144a8aa2b700d0399d7e9d78b55b3c743cfb0b118f4bd0baa2d34e" diff --git a/pyproject.toml b/pyproject.toml index 015cb384c..1d6c4ae3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,8 @@ pandas-stubs = "^2.1.4.231227" ipykernel = "^6.29.5" ipywidgets = "^8.1.5" nbqa = "^1.9.0" +mkdocs-material = "^9.5.40" +mkdocs-jupyter = "^0.25.0" [tool.poetry.group.examples.dependencies] datasets = "^2.21.0"