diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 80f719960..0a85a254b 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -21,4 +21,7 @@ build: - poetry config virtualenvs.create false post_install: # Install dependencies with 'docs' dependency group - - poetry install --with docs \ No newline at end of file + - poetry install --with docs + pre_build: + # Generate hub documentation + - python scripts/component_readme/generate_hub.py \ No newline at end of file diff --git a/docs/components/components.md b/docs/components/components.md index e6d348ca4..d79901648 100644 --- a/docs/components/components.md +++ b/docs/components/components.md @@ -2,7 +2,7 @@ Fondant makes it easy to build data preparation pipelines leveraging reusable components. Fondant provides a lot of components out of the box -([overview](https://github.com/ml6team/fondant/tree/main/components)), but you can also define your +([overview](hub.md)), but you can also define your own custom components. ## The anatomy of a component diff --git a/docs/components/hub.md b/docs/components/hub.md new file mode 100644 index 000000000..53dce2ecc --- /dev/null +++ b/docs/components/hub.md @@ -0,0 +1,88 @@ +--- +disable_toc: True +--- + +# Component Hub + +Below you can find the reusable components offered by Fondant. + +??? "caption_images" + + --8<-- "components/caption_images/README.md:1" + +??? "download_images" + + --8<-- "components/download_images/README.md:1" + +??? "embed_images" + + --8<-- "components/embed_images/README.md:1" + +??? "embedding_based_laion_retrieval" + + --8<-- "components/embedding_based_laion_retrieval/README.md:1" + +??? "filter_comments" + + --8<-- "components/filter_comments/README.md:1" + +??? "filter_image_resolution" + + --8<-- "components/filter_image_resolution/README.md:1" + +??? "filter_line_length" + + --8<-- "components/filter_line_length/README.md:1" + +??? "image_cropping" + + --8<-- "components/image_cropping/README.md:1" + +??? "image_resolution_extraction" + + --8<-- "components/image_resolution_extraction/README.md:1" + +??? "language_filter" + + --8<-- "components/language_filter/README.md:1" + +??? "load_from_files" + + --8<-- "components/load_from_files/README.md:1" + +??? "load_from_hf_hub" + + --8<-- "components/load_from_hf_hub/README.md:1" + +??? "load_from_parquet" + + --8<-- "components/load_from_parquet/README.md:1" + +??? "minhash_generator" + + --8<-- "components/minhash_generator/README.md:1" + +??? "pii_redaction" + + --8<-- "components/pii_redaction/README.md:1" + +??? "prompt_based_laion_retrieval" + + --8<-- "components/prompt_based_laion_retrieval/README.md:1" + +??? "segment_images" + + --8<-- "components/segment_images/README.md:1" + +??? "text_length_filter" + + --8<-- "components/text_length_filter/README.md:1" + +??? "text_normalization" + + --8<-- "components/text_normalization/README.md:1" + +??? "write_to_hf_hub" + + --8<-- "components/write_to_hf_hub/README.md:1" + diff --git a/docs/overrides/partials/toc.html b/docs/overrides/partials/toc.html new file mode 100644 index 000000000..df8c43203 --- /dev/null +++ b/docs/overrides/partials/toc.html @@ -0,0 +1,56 @@ + + + +{% set title = lang.t("toc") %} +{% if config.mdx_configs.toc and config.mdx_configs.toc.title %} + {% set title = config.mdx_configs.toc.title %} +{% endif %} + + + \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 050b06cdf..6c660daef 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -42,6 +42,7 @@ nav: - Creating custom components: components/custom_component.md - Read / write components: components/generic_component.md - Component spec: components/component_spec.md + - Hub: components/hub.md - Data explorer: data_explorer.md - Infrastructure: infrastructure.md - Manifest: manifest.md diff --git a/scripts/component_readme/generate_hub.py b/scripts/component_readme/generate_hub.py new file mode 100644 index 000000000..faffcd914 --- /dev/null +++ b/scripts/component_readme/generate_hub.py @@ -0,0 +1,36 @@ +import typing as t +from pathlib import Path +from glob import glob + +import jinja2 + + +def find_components() -> t.List[str]: + return [Path(d).name for d in sorted(glob("components/*", recursive=True))] + + +def generate_hub(components) -> str: + env = jinja2.Environment( + loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent), + trim_blocks=True + ) + template = env.get_template("hub_template.md") + + return template.render( + components=components + ) + + +def write_hub(hub: str) -> None: + with open("docs/components/hub.md", "w") as f: + f.write(hub) + + +def main(): + components = find_components() + hub = generate_hub(components) + write_hub(hub) + + +if __name__ == "__main__": + main() diff --git a/scripts/component_readme/hub_template.md b/scripts/component_readme/hub_template.md new file mode 100644 index 000000000..76f664991 --- /dev/null +++ b/scripts/component_readme/hub_template.md @@ -0,0 +1,14 @@ +--- +disable_toc: True +--- + +# Component Hub + +Below you can find the reusable components offered by Fondant. + +{% for component in components %} +??? "{{ component }}" + + --8<-- "components/{{ component }}/README.md:1" + +{% endfor %}