From 0f19979a37023782e06ecde06247f1bbb9e48745 Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 13:47:50 +0200 Subject: [PATCH 1/9] Move docker registry to DockerHub --- .github/workflows/build.yaml | 10 ++++-- .github/workflows/prep-release.yaml | 10 ++++-- .github/workflows/release.yaml | 10 ++++-- .../caption_images/fondant_component.yaml | 2 +- .../download_images/fondant_component.yaml | 2 +- .../embed_images/fondant_component.yaml | 2 +- .../fondant_component.yaml | 2 +- .../filter_comments/fondant_component.yaml | 2 +- .../fondant_component.yaml | 2 +- .../filter_line_length/fondant_component.yaml | 2 +- .../image_cropping/fondant_component.yaml | 2 +- .../fondant_component.yaml | 2 +- .../language_filter/fondant_component.yaml | 2 +- .../load_from_files/fondant_component.yaml | 2 +- .../load_from_hf_hub/fondant_component.yaml | 2 +- .../load_from_parquet/fondant_component.yaml | 2 +- .../minhash_generator/fondant_component.yaml | 2 +- .../pii_redaction/fondant_component.yaml | 2 +- .../fondant_component.yaml | 2 +- .../segment_images/fondant_component.yaml | 2 +- .../text_length_filter/fondant_component.yaml | 2 +- .../text_normalization/fondant_component.yaml | 2 +- .../write_to_hf_hub/fondant_component.yaml | 2 +- docs/components/generic_component.md | 4 +-- docs/guides/build_a_simple_pipeline.md | 2 +- .../fondant_component.yaml | 2 +- .../read_warc_paths/fondant_component.yaml | 2 +- .../generate_prompts/fondant_component.yaml | 2 +- .../fondant_component.yaml | 2 +- .../add_clip_score/fondant_component.yaml | 2 +- .../clean_captions/fondant_component.yaml | 2 +- .../fondant_component.yaml | 2 +- .../detect_text/fondant_component.yaml | 2 +- .../filter_clip_score/fondant_component.yaml | 2 +- .../fondant_component.yaml | 2 +- .../load_from_hf_hub/fondant_component.yaml | 2 +- .../mask_images/fondant_component.yaml | 2 +- .../load_from_hf_hub/fondant_component.yaml | 2 +- .../load_from_hf_hub/fondant_component.yaml | 2 +- .../write_to_hf_hub/fondant_component.yaml | 2 +- .../load_from_hub/fondant_component.yaml | 2 +- scripts/build_components.sh | 33 ++++++++++++++----- src/fondant/cli.py | 4 +-- .../example_2/docker-compose.yml | 4 +-- .../example_2/kubeflow_pipeline.yml | 4 +-- .../example_2/vertex_pipeline.yml | 4 +-- tests/test_explorer.py | 2 +- tests/test_pipeline.py | 2 +- 48 files changed, 95 insertions(+), 66 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 54fa75a1b..d9cba60da 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -17,13 +17,17 @@ jobs: - name: Set buildx alias run: docker buildx install + + - name: Install docker pushrm + run: | + wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm + chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm - name: Login to GitHub Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN } - name: Build components run: ./scripts/build_components.sh --cache -t $GITHUB_SHA -t dev diff --git a/.github/workflows/prep-release.yaml b/.github/workflows/prep-release.yaml index e9b84416a..33175b3bc 100644 --- a/.github/workflows/prep-release.yaml +++ b/.github/workflows/prep-release.yaml @@ -24,12 +24,16 @@ jobs: - name: Set buildx alias run: docker buildx install + - name: Install docker pushrm + run: | + wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm + chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm + - name: Login to GitHub Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }} - name: Build components run: ./scripts/build_components.sh -t $GITHUB_REF_NAME diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a9b6552e8..7927b8290 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -24,12 +24,16 @@ jobs: - name: Set buildx alias run: docker buildx install + - name: Install docker pushrm + run: | + wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm + chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm + - name: Login to GitHub Container Registry uses: docker/login-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }} - name: Tag components run: ./scripts/tag_components.sh -o $GITHUB_REF_NAME -n latest diff --git a/components/caption_images/fondant_component.yaml b/components/caption_images/fondant_component.yaml index 9f12f6ef1..43380da15 100644 --- a/components/caption_images/fondant_component.yaml +++ b/components/caption_images/fondant_component.yaml @@ -1,6 +1,6 @@ name: Caption images description: This component captions images using a BLIP model from the Hugging Face hub -image: ghcr.io/ml6team/caption_images:dev +image: fndnt/caption_images:dev consumes: images: diff --git a/components/download_images/fondant_component.yaml b/components/download_images/fondant_component.yaml index fdd3e7f83..81f75225e 100644 --- a/components/download_images/fondant_component.yaml +++ b/components/download_images/fondant_component.yaml @@ -8,7 +8,7 @@ description: | [resizer](https://github.com/rom1504/img2dataset/blob/main/img2dataset/resizer.py) function from the img2dataset library. -image: ghcr.io/ml6team/download_images:dev +image: fndnt/download_images:e6501fb consumes: images: diff --git a/components/embed_images/fondant_component.yaml b/components/embed_images/fondant_component.yaml index 3c84e2e16..21ec43390 100644 --- a/components/embed_images/fondant_component.yaml +++ b/components/embed_images/fondant_component.yaml @@ -1,6 +1,6 @@ name: Embed images description: Component that generates CLIP embeddings from images -image: ghcr.io/ml6team/embed_images:dev +image: fndnt/embed_images:dev consumes: images: diff --git a/components/embedding_based_laion_retrieval/fondant_component.yaml b/components/embedding_based_laion_retrieval/fondant_component.yaml index 23c5d60da..a9e894d2a 100644 --- a/components/embedding_based_laion_retrieval/fondant_component.yaml +++ b/components/embedding_based_laion_retrieval/fondant_component.yaml @@ -2,7 +2,7 @@ name: Embedding based LAION retrieval description: | This component retrieves image URLs from LAION-5B based on a set of CLIP embeddings. It can be used to find images similar to the embedded images / captions. -image: ghcr.io/ml6team/embedding_based_laion_retrieval:dev +image: fndnt/embedding_based_laion_retrieval:dev consumes: embeddings: diff --git a/components/filter_comments/fondant_component.yaml b/components/filter_comments/fondant_component.yaml index 4368798b0..168444d71 100644 --- a/components/filter_comments/fondant_component.yaml +++ b/components/filter_comments/fondant_component.yaml @@ -1,6 +1,6 @@ name: Filter comments description: Component that filters code based on the code to comment ratio -image: ghcr.io/ml6team/filter_comments:dev +image: fndnt/filter_comments:dev consumes: code: diff --git a/components/filter_image_resolution/fondant_component.yaml b/components/filter_image_resolution/fondant_component.yaml index de0341c1d..eca292ad7 100644 --- a/components/filter_image_resolution/fondant_component.yaml +++ b/components/filter_image_resolution/fondant_component.yaml @@ -1,6 +1,6 @@ name: Filter image resolution description: Component that filters images based on minimum size and max aspect ratio -image: ghcr.io/ml6team/filter_image_resolution:dev +image: fndnt/filter_image_resolution:dev consumes: images: diff --git a/components/filter_line_length/fondant_component.yaml b/components/filter_line_length/fondant_component.yaml index d833ebaa7..e77d530fb 100644 --- a/components/filter_line_length/fondant_component.yaml +++ b/components/filter_line_length/fondant_component.yaml @@ -1,6 +1,6 @@ name: Filter line length description: Component that filters code based on line length -image: ghcr.io/ml6team/filter_line_length:dev +image: fndnt/filter_line_length:dev consumes: code: diff --git a/components/image_cropping/fondant_component.yaml b/components/image_cropping/fondant_component.yaml index 65072471d..89e2c398e 100644 --- a/components/image_cropping/fondant_component.yaml +++ b/components/image_cropping/fondant_component.yaml @@ -1,5 +1,5 @@ name: Image cropping -image: ghcr.io/ml6team/image_cropping:dev +image: fndnt/image_cropping:dev description: | This component crops out image borders. This is typically useful when working with graphical images that have single-color borders (e.g. logos, icons, etc.). diff --git a/components/image_resolution_extraction/fondant_component.yaml b/components/image_resolution_extraction/fondant_component.yaml index e3155ea6e..911de3990 100644 --- a/components/image_resolution_extraction/fondant_component.yaml +++ b/components/image_resolution_extraction/fondant_component.yaml @@ -1,6 +1,6 @@ name: Image resolution extraction description: Component that extracts image resolution data from the images -image: ghcr.io/ml6team/image_resolution_extraction:dev +image: fndnt/image_resolution_extraction:dev consumes: images: diff --git a/components/language_filter/fondant_component.yaml b/components/language_filter/fondant_component.yaml index b40f43a1f..0339138be 100644 --- a/components/language_filter/fondant_component.yaml +++ b/components/language_filter/fondant_component.yaml @@ -1,6 +1,6 @@ name: Filter languages description: A component that filters text based on the provided language. -image: ghcr.io/ml6team/filter_language:latest +image: fndnt/filter_language:latest consumes: text: diff --git a/components/load_from_files/fondant_component.yaml b/components/load_from_files/fondant_component.yaml index 6481c8e14..af8cb8e69 100644 --- a/components/load_from_files/fondant_component.yaml +++ b/components/load_from_files/fondant_component.yaml @@ -2,7 +2,7 @@ name: Load from files description: | This component loads data from files in a local or remote (AWS S3, Azure Blob storage, GCS) location. It supports the following formats: .zip, gzip, tar and tar.gz. -image: ghcr.io/ml6team/load_from_files:dev +image: fndnt/load_from_files:dev produces: file: diff --git a/components/load_from_hf_hub/fondant_component.yaml b/components/load_from_hf_hub/fondant_component.yaml index 64090a6f2..38ec90980 100644 --- a/components/load_from_hf_hub/fondant_component.yaml +++ b/components/load_from_hf_hub/fondant_component.yaml @@ -1,6 +1,6 @@ name: Load from hub description: Component that loads a dataset from the hub -image: ghcr.io/ml6team/load_from_hf_hub:dev +image: fndnt/load_from_hf_hub:dev produces: dummy_variable: #TODO: fill in here diff --git a/components/load_from_parquet/fondant_component.yaml b/components/load_from_parquet/fondant_component.yaml index 9f128a1cb..1e97e960e 100644 --- a/components/load_from_parquet/fondant_component.yaml +++ b/components/load_from_parquet/fondant_component.yaml @@ -1,6 +1,6 @@ name: Load from parquet description: Component that loads a dataset from a parquet uri -image: ghcr.io/ml6team/load_from_parquet:dev +image: fndnt/load_from_parquet:dev produces: dummy_variable: #TODO: fill in here diff --git a/components/minhash_generator/fondant_component.yaml b/components/minhash_generator/fondant_component.yaml index f1a83ae38..dbb0c6e17 100644 --- a/components/minhash_generator/fondant_component.yaml +++ b/components/minhash_generator/fondant_component.yaml @@ -1,6 +1,6 @@ name: MinHash generator description: A component that generates minhashes of text. -image: ghcr.io/ml6team/minhash_generator:latest +image: fndnt/minhash_generator:latest consumes: text: diff --git a/components/pii_redaction/fondant_component.yaml b/components/pii_redaction/fondant_component.yaml index b64f11fa3..eefb5038b 100644 --- a/components/pii_redaction/fondant_component.yaml +++ b/components/pii_redaction/fondant_component.yaml @@ -20,7 +20,7 @@ description: | PII is replaced by random data which is stored in the `replacements.json` file. A component that detects and redacts Personal Identifiable Information (PII) from code. -image: ghcr.io/ml6team/pii_redaction:dev +image: fndnt/pii_redaction:dev consumes: code: diff --git a/components/prompt_based_laion_retrieval/fondant_component.yaml b/components/prompt_based_laion_retrieval/fondant_component.yaml index 88f8d20dd..b10f62610 100644 --- a/components/prompt_based_laion_retrieval/fondant_component.yaml +++ b/components/prompt_based_laion_retrieval/fondant_component.yaml @@ -5,7 +5,7 @@ description: | the prompt sentences and the captions in the LAION dataset. This component doesn’t return the actual images, only URLs. -image: ghcr.io/ml6team/prompt_based_laion_retrieval:dev +image: fndnt/prompt_based_laion_retrieval:dev consumes: prompts: diff --git a/components/segment_images/fondant_component.yaml b/components/segment_images/fondant_component.yaml index 8f32d14f6..5da7a4167 100644 --- a/components/segment_images/fondant_component.yaml +++ b/components/segment_images/fondant_component.yaml @@ -1,6 +1,6 @@ name: Segment images description: Component that creates segmentation masks for images using a model from the Hugging Face hub -image: ghcr.io/ml6team/segment_images:dev +image: fndnt/segment_images:dev consumes: images: diff --git a/components/text_length_filter/fondant_component.yaml b/components/text_length_filter/fondant_component.yaml index bc43a34b9..8e27ac05d 100644 --- a/components/text_length_filter/fondant_component.yaml +++ b/components/text_length_filter/fondant_component.yaml @@ -1,6 +1,6 @@ name: Filter text length description: A component that filters out text based on their length -image: ghcr.io/ml6team/filter_text_length:latest +image: fndnt/filter_text_length:latest consumes: text: diff --git a/components/text_normalization/fondant_component.yaml b/components/text_normalization/fondant_component.yaml index b99625bcc..cff40afb0 100644 --- a/components/text_normalization/fondant_component.yaml +++ b/components/text_normalization/fondant_component.yaml @@ -1,5 +1,5 @@ name: Normalize text -image: ghcr.io/ml6team/text_normalization:latest +image: fndnt/text_normalization:latest description: | This component implements several text normalization techniques to clean and preprocess textual data: diff --git a/components/write_to_hf_hub/fondant_component.yaml b/components/write_to_hf_hub/fondant_component.yaml index 59c69a093..1173087ff 100644 --- a/components/write_to_hf_hub/fondant_component.yaml +++ b/components/write_to_hf_hub/fondant_component.yaml @@ -1,6 +1,6 @@ name: Write to hub description: Component that writes a dataset to the hub -image: ghcr.io/ml6team/write_to_hf_hub:dev +image: fndnt/write_to_hf_hub:dev consumes: dummy_variable: #TODO: fill in here diff --git a/docs/components/generic_component.md b/docs/components/generic_component.md index d5bf074f8..7e217c0f1 100644 --- a/docs/components/generic_component.md +++ b/docs/components/generic_component.md @@ -33,7 +33,7 @@ The component specification can be modified as follows ```yaml name: Load from hub description: Component that loads a dataset from the hub -image: ghcr.io/ml6team/load_from_hf_hub:latest +image: fndnt/load_from_hf_hub:latest consumes: images: @@ -100,7 +100,7 @@ If we want to write this dataset to a Hugging Face Hub location, we can use the ```yaml name: Write to hub description: Component that writes a dataset to the hub -image: ghcr.io/ml6team/write_to_hf_hub:latest +image: fndnt/write_to_hf_hub:latest consumes: images: diff --git a/docs/guides/build_a_simple_pipeline.md b/docs/guides/build_a_simple_pipeline.md index 1bf007268..5ebce4a67 100644 --- a/docs/guides/build_a_simple_pipeline.md +++ b/docs/guides/build_a_simple_pipeline.md @@ -57,7 +57,7 @@ Create a folder `component/load_from_hub` and create a `fondant_component.yaml` ```yaml name: Load from hub description: Component that loads a dataset from the hub -image: ghcr.io/ml6team/load_from_hf_hub:dev +image: fndnt/load_from_hf_hub:dev produces: images: diff --git a/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml b/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml index 175edf2ea..8df0f2457 100644 --- a/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml +++ b/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml @@ -1,6 +1,6 @@ name: Extract image licenses from warc description: A component that extracts images and their licenses from warc files -image: ghcr.io/ml6team/extract_images_from_warc:d4619b5 +image: fndnt/extract_images_from_warc:d4619b5 consumes: warc: diff --git a/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml b/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml index 8b774da57..9f890ee49 100644 --- a/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml +++ b/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml @@ -1,6 +1,6 @@ name: Common crawl download component description: A component that downloads parts of the common crawl -image: ghcr.io/ml6team/read_warc_paths:57404ff +image: fndnt/read_warc_paths:57404ff produces: warc: diff --git a/examples/pipelines/controlnet-interior-design/components/generate_prompts/fondant_component.yaml b/examples/pipelines/controlnet-interior-design/components/generate_prompts/fondant_component.yaml index b98226870..a1c4ede40 100644 --- a/examples/pipelines/controlnet-interior-design/components/generate_prompts/fondant_component.yaml +++ b/examples/pipelines/controlnet-interior-design/components/generate_prompts/fondant_component.yaml @@ -1,6 +1,6 @@ name: Generate prompts description: Component that generates a set of seed prompts -image: ghcr.io/ml6team/generate_prompts:dev +image: fndnt/generate_prompts:dev produces: prompts: diff --git a/examples/pipelines/controlnet-interior-design/components/write_to_hub_controlnet/fondant_component.yaml b/examples/pipelines/controlnet-interior-design/components/write_to_hub_controlnet/fondant_component.yaml index 62a7c8209..4a37bdd3d 100644 --- a/examples/pipelines/controlnet-interior-design/components/write_to_hub_controlnet/fondant_component.yaml +++ b/examples/pipelines/controlnet-interior-design/components/write_to_hub_controlnet/fondant_component.yaml @@ -1,6 +1,6 @@ name: Write to hub description: Component that writes a dataset to the hub -image: ghcr.io/ml6team/write_to_hf_hub:latest +image: fndnt/write_to_hf_hub:latest consumes: images: diff --git a/examples/pipelines/datacomp/components/add_clip_score/fondant_component.yaml b/examples/pipelines/datacomp/components/add_clip_score/fondant_component.yaml index eb973b865..1ae31d0b8 100644 --- a/examples/pipelines/datacomp/components/add_clip_score/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/add_clip_score/fondant_component.yaml @@ -1,6 +1,6 @@ name: Add CLIP score description: Component that adds the CLIP score -image: ghcr.io/ml6team/add_clip_score:dev +image: fndnt/add_clip_score:dev consumes: embeddings: diff --git a/examples/pipelines/datacomp/components/clean_captions/fondant_component.yaml b/examples/pipelines/datacomp/components/clean_captions/fondant_component.yaml index 019bdafba..d72541c74 100644 --- a/examples/pipelines/datacomp/components/clean_captions/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/clean_captions/fondant_component.yaml @@ -1,6 +1,6 @@ name: Clean captions description: Component that filters out bad captions (Empty captions, Captions with weird characters, Captions that are dates) -image: ghcr.io/ml6team/clean_captions:50f3a97878ac81670ebe624039ff0fcec0542e4f +image: fndnt/clean_captions:50f3a97878ac81670ebe624039ff0fcec0542e4f consumes: text: diff --git a/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml b/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml index d42ee5462..b18cb782c 100644 --- a/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/cluster_image_embeddings/fondant_component.yaml @@ -1,6 +1,6 @@ name: Cluster embeddings description: Component that applies k-means clustering on subsampled image embeddings -image: ghcr.io/ml6team/cluster_image_embeddings:latest +image: fndnt/cluster_image_embeddings:latest consumes: image: diff --git a/examples/pipelines/datacomp/components/detect_text/fondant_component.yaml b/examples/pipelines/datacomp/components/detect_text/fondant_component.yaml index b7593233b..bcdf822f9 100644 --- a/examples/pipelines/datacomp/components/detect_text/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/detect_text/fondant_component.yaml @@ -1,6 +1,6 @@ name: Detect text description: Component that detects text in images using an mmocr model -image: ghcr.io/ml6team/detect_text:dev +image: fndnt/detect_text:dev consumes: images: diff --git a/examples/pipelines/datacomp/components/filter_clip_score/fondant_component.yaml b/examples/pipelines/datacomp/components/filter_clip_score/fondant_component.yaml index 045af23ee..d5d71c363 100644 --- a/examples/pipelines/datacomp/components/filter_clip_score/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/filter_clip_score/fondant_component.yaml @@ -1,6 +1,6 @@ name: Filter CLIP score description: Component that filters out bad captions (Empty captions, Captions with weird characters, Captions that are dates) -image: ghcr.io/ml6team/filter_clip_score:dev +image: fndnt/filter_clip_score:dev consumes: imagetext: diff --git a/examples/pipelines/datacomp/components/filter_text_complexity/fondant_component.yaml b/examples/pipelines/datacomp/components/filter_text_complexity/fondant_component.yaml index aa7b3911e..e0bc9f444 100644 --- a/examples/pipelines/datacomp/components/filter_text_complexity/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/filter_text_complexity/fondant_component.yaml @@ -1,6 +1,6 @@ name: Filter text complexity description: Component that filters text based on their dependency parse complexity and number of actions -image: ghcr.io/ml6team/filter_text_complexity:dev +image: fndnt/filter_text_complexity:dev consumes: text: diff --git a/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml b/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml index 50f983acd..9698ff2be 100644 --- a/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/load_from_hf_hub/fondant_component.yaml @@ -1,6 +1,6 @@ name: Load from hub description: Component that loads a dataset from the hub -image: ghcr.io/ml6team/load_from_hf_hub:dev +image: fndnt/load_from_hf_hub:dev produces: images: diff --git a/examples/pipelines/datacomp/components/mask_images/fondant_component.yaml b/examples/pipelines/datacomp/components/mask_images/fondant_component.yaml index 4287b16b7..fd6f19a38 100644 --- a/examples/pipelines/datacomp/components/mask_images/fondant_component.yaml +++ b/examples/pipelines/datacomp/components/mask_images/fondant_component.yaml @@ -1,6 +1,6 @@ name: Mask images description: Component that masks images based on bounding boxes -image: ghcr.io/ml6team/mask_images:dev +image: fndnt/mask_images:dev consumes: images: diff --git a/examples/pipelines/filter-cc-25m/components/load_from_hf_hub/fondant_component.yaml b/examples/pipelines/filter-cc-25m/components/load_from_hf_hub/fondant_component.yaml index fda34b610..6d865a6a0 100644 --- a/examples/pipelines/filter-cc-25m/components/load_from_hf_hub/fondant_component.yaml +++ b/examples/pipelines/filter-cc-25m/components/load_from_hf_hub/fondant_component.yaml @@ -1,6 +1,6 @@ name: Load from hub description: Component that loads a dataset from the hub -image: ghcr.io/ml6team/load_from_hf_hub:latest +image: fndnt/load_from_hf_hub:latest produces: images: diff --git a/examples/pipelines/finetune_stable_diffusion/components/load_from_hf_hub/fondant_component.yaml b/examples/pipelines/finetune_stable_diffusion/components/load_from_hf_hub/fondant_component.yaml index aa92302e5..2f180b601 100644 --- a/examples/pipelines/finetune_stable_diffusion/components/load_from_hf_hub/fondant_component.yaml +++ b/examples/pipelines/finetune_stable_diffusion/components/load_from_hf_hub/fondant_component.yaml @@ -1,6 +1,6 @@ name: Load from hub description: Component that loads a dataset from the hub -image: ghcr.io/ml6team/load_from_hf_hub:latest +image: fndnt/load_from_hf_hub:latest produces: images: diff --git a/examples/pipelines/finetune_stable_diffusion/components/write_to_hf_hub/fondant_component.yaml b/examples/pipelines/finetune_stable_diffusion/components/write_to_hf_hub/fondant_component.yaml index 4e7119f2c..1c251ab61 100644 --- a/examples/pipelines/finetune_stable_diffusion/components/write_to_hf_hub/fondant_component.yaml +++ b/examples/pipelines/finetune_stable_diffusion/components/write_to_hf_hub/fondant_component.yaml @@ -1,6 +1,6 @@ name: Write to hub description: Component that writes a dataset to the hub -image: ghcr.io/ml6team/write_to_hf_hub:latest +image: fndnt/write_to_hf_hub:latest consumes: images: diff --git a/examples/pipelines/starcoder/components/load_from_hub/fondant_component.yaml b/examples/pipelines/starcoder/components/load_from_hub/fondant_component.yaml index 379d12f0c..2919227d7 100644 --- a/examples/pipelines/starcoder/components/load_from_hub/fondant_component.yaml +++ b/examples/pipelines/starcoder/components/load_from_hub/fondant_component.yaml @@ -1,6 +1,6 @@ name: Load code dataset from hub description: Component that loads the stack dataset from the hub -image: ghcr.io/ml6team/load_from_hf_hub:latest +image: fndnt/load_from_hf_hub:latest produces: code: diff --git a/scripts/build_components.sh b/scripts/build_components.sh index 265d08b83..91c101c26 100755 --- a/scripts/build_components.sh +++ b/scripts/build_components.sh @@ -9,16 +9,18 @@ function usage { echo " -c, --cache Use registry caching when building the components (default:false)" echo " -d, --components-dir Directory containing components to build as subdirectories. The path should be relative to the root directory (default:components)" - echo " -n, --namespace The namespace for the built images, should match the github organization (default: ml6team)" + echo " -r, --registry The docker registry prefix to use (default: null for DockerHub)" + echo " -n, --namespace The DockerHub namespace for the built images (default: fndnt)" echo " -co, --component Specific component to build. Pass the component subdirectory name(s) to build certain component(s) or 'all' to build all components in the components directory (default: all)" - echo " -r, --repo Set the repo (default: fondant)" + echo " -r, --repo Set the repo (default: ml6team/fondant)" echo " -h, --help Display this help message" } # Parse the arguments while [[ "$#" -gt 0 ]]; do case $1 in + -r |--registry) registry="$2"; shift;; -n |--namespace) namespace="$2"; shift;; -d |--components-dir ) components_dir="$2"; shift;; -r |--repo) repo="$2"; shift;; @@ -39,8 +41,8 @@ fi # Set default values for optional arguments if not passed component="${components:-all}" components_dir="${components_dir:-components}" -namespace="${namespace:-ml6team}" -repo="${repo:-fondant}" +namespace="${namespace:-fndnt}" +repo="${repo:-ml6team/fondant}" # Get the component directory scripts_dir=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) @@ -65,13 +67,16 @@ for dir in "${components_to_build[@]}"; do full_image_names=() echo "Tagging image with following tags:" for tag in "${tags[@]}"; do - full_image_name=ghcr.io/${namespace}/${BASENAME}:${tag} + full_image_name=${namespace}/${BASENAME}:${tag} + if [ -n "${registry}" ] ; then + full_image_name=${registry}/${full_image_name} + fi echo "$full_image_name" full_image_names+=("$full_image_name") done # Prevent this from mistakenly being used below - unset full_image_name +# unset full_image_name echo "Updating the image version in the fondant_component.yaml with:" echo "${full_image_names[0]}" @@ -87,7 +92,7 @@ for dir in "${components_to_build[@]}"; do # Add cache arguments if caching is enabled if [ "$caching" = true ] ; then - cache_name=ghcr.io/${namespace}/${BASENAME}:build-cache + cache_name=${registry}/${namespace}/${BASENAME}:build-cache echo "Caching from/to ${cache_name}" args+=(--cache-to "type=registry,ref=${cache_name}") args+=(--cache-from "type=registry,ref=${cache_name}") @@ -96,9 +101,21 @@ for dir in "${components_to_build[@]}"; do echo "Freezing Fondant dependency version to ${tags[0]}" docker build --push "${args[@]}" \ --build-arg="FONDANT_VERSION=${tags[0]}" \ - --label org.opencontainers.image.source=https://github.com/${namespace}/${repo} \ + --label org.opencontainers.image.source=https://github.com/${repo}/components/{BASENAME} \ . + docker fake-command ${full_image_name} | echo " + README was not pushed. + + \`docker pushrm\` might not be installed. + + To install, run: + \`wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm\` + \`chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm\` + And validate by running: + \`docker pushrm --help\` + " + popd done diff --git a/src/fondant/cli.py b/src/fondant/cli.py index 610bd6bb9..0ff06e729 100644 --- a/src/fondant/cli.py +++ b/src/fondant/cli.py @@ -111,8 +111,8 @@ def register_explore(parent_parser): "--container", "-r", type=str, - default="ghcr.io/ml6team/data_explorer", - help="Docker container to use. Defaults to ghcr.io/ml6team/data_explorer.", + default="fndnt/data_explorer", + help="Docker container to use. Defaults to fndnt/data_explorer.", ) parser.add_argument( "--tag", diff --git a/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml b/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml index bffa065c3..dd4bb1a5b 100644 --- a/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml +++ b/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml @@ -26,7 +26,7 @@ services: - 8787:8787 volumes: [] image_cropping: - image: ghcr.io/ml6team/image_cropping:dev + image: fndnt/image_cropping:dev command: - --metadata - '{"base_path": "/foo/bar", "pipeline_name": "testpipeline", "run_id": "testpipeline-20230101000000", @@ -42,7 +42,7 @@ services: - --cluster_type - default - --component_spec - - '{"name": "Image cropping", "image": "ghcr.io/ml6team/image_cropping:dev", "description": + - '{"name": "Image cropping", "image": "fndnt/image_cropping:dev", "description": "This component crops out image borders. This is typically useful when working with graphical \nimages that have single-color borders (e.g. logos, icons, etc.).\n\nThe component takes an image and calculates which color is most present in the border. diff --git a/tests/example_pipelines/compiled_pipeline/example_2/kubeflow_pipeline.yml b/tests/example_pipelines/compiled_pipeline/example_2/kubeflow_pipeline.yml index 1fe3922b3..c6c9200e6 100644 --- a/tests/example_pipelines/compiled_pipeline/example_2/kubeflow_pipeline.yml +++ b/tests/example_pipelines/compiled_pipeline/example_2/kubeflow_pipeline.yml @@ -114,7 +114,7 @@ deploymentSpec: - fondant - execute - main - image: ghcr.io/ml6team/image_cropping:dev + image: fndnt/image_cropping:dev pipelineInfo: description: description of the test pipeline name: testpipeline @@ -214,7 +214,7 @@ root: is original, right side is cropped image](../../docs/art/components/image_cropping/component_border_crop_1.png)\n![Example of image cropping by removing the single-color border. Left side is original, right side is cropped image](../../docs/art/components/image_cropping/component_border_crop_0.png)\n" - image: ghcr.io/ml6team/image_cropping:dev + image: fndnt/image_cropping:dev name: Image cropping produces: images: diff --git a/tests/example_pipelines/compiled_pipeline/example_2/vertex_pipeline.yml b/tests/example_pipelines/compiled_pipeline/example_2/vertex_pipeline.yml index 882728792..d31e119fc 100644 --- a/tests/example_pipelines/compiled_pipeline/example_2/vertex_pipeline.yml +++ b/tests/example_pipelines/compiled_pipeline/example_2/vertex_pipeline.yml @@ -114,7 +114,7 @@ deploymentSpec: - fondant - execute - main - image: ghcr.io/ml6team/image_cropping:dev + image: fndnt/image_cropping:dev pipelineInfo: description: description of the test pipeline name: testpipeline @@ -214,7 +214,7 @@ root: is original, right side is cropped image](../../docs/art/components/image_cropping/component_border_crop_1.png)\n![Example of image cropping by removing the single-color border. Left side is original, right side is cropped image](../../docs/art/components/image_cropping/component_border_crop_0.png)\n" - image: ghcr.io/ml6team/image_cropping:dev + image: fndnt/image_cropping:dev name: Image cropping produces: images: diff --git a/tests/test_explorer.py b/tests/test_explorer.py index 2187a30ac..6b9057490 100644 --- a/tests/test_explorer.py +++ b/tests/test_explorer.py @@ -4,7 +4,7 @@ import pytest from fondant.explorer import run_explorer_app -DEFAULT_CONTAINER = "ghcr.io/ml6team/data_explorer" +DEFAULT_CONTAINER = "fndnt/data_explorer" DEFAULT_TAG = "latest" DEFAULT_PORT = 8501 diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 0acad2aa2..f5d355ded 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -114,7 +114,7 @@ def test_component_op_caching_strategy(monkeypatch): monkeypatch.setattr( ComponentSpec, "image", - f"ghcr.io/component/test_component:{tag}", + f"fndnt/test_component:{tag}", ) comp_0_op_spec_0 = ComponentOp( components_path, From d95e7345811c6471ee7a8342f932ef9a693ba98a Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 13:54:22 +0200 Subject: [PATCH 2/9] Add manual trigger to build Action --- .github/workflows/build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d9cba60da..0f9100ef3 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -4,6 +4,7 @@ on: push: branches: - main + workflow_dispatch: jobs: docker: From 2497f6445f6c4aff142b84f080ba42a9d33454c1 Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 13:59:37 +0200 Subject: [PATCH 3/9] Fix build workflow --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0f9100ef3..c6abfcb28 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ jobs: uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN } + password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }} - name: Build components run: ./scripts/build_components.sh --cache -t $GITHUB_SHA -t dev From 31d1b60915a65ff23fc934b47fa017f845630801 Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 14:03:26 +0200 Subject: [PATCH 4/9] Install docker-pushrm using sudo --- .github/workflows/build.yaml | 4 ++-- .github/workflows/prep-release.yaml | 4 ++-- .github/workflows/release.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c6abfcb28..63c0f48a9 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -21,8 +21,8 @@ jobs: - name: Install docker pushrm run: | - wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm - chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm + sudo wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm + sudo chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm - name: Login to GitHub Container Registry uses: docker/login-action@v2 diff --git a/.github/workflows/prep-release.yaml b/.github/workflows/prep-release.yaml index 33175b3bc..294d13d0e 100644 --- a/.github/workflows/prep-release.yaml +++ b/.github/workflows/prep-release.yaml @@ -26,8 +26,8 @@ jobs: - name: Install docker pushrm run: | - wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm - chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm + sudo wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm + sudo chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm - name: Login to GitHub Container Registry uses: docker/login-action@v2 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7927b8290..1b2e1e61d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -26,8 +26,8 @@ jobs: - name: Install docker pushrm run: | - wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm - chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm + sudo wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm + sudo chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm - name: Login to GitHub Container Registry uses: docker/login-action@v2 From da5dedfbf8c6b1662d6fa4bec6f4363208306660 Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 14:13:08 +0200 Subject: [PATCH 5/9] Fix cache name in build script --- scripts/build_components.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/build_components.sh b/scripts/build_components.sh index 91c101c26..78393107f 100755 --- a/scripts/build_components.sh +++ b/scripts/build_components.sh @@ -92,7 +92,10 @@ for dir in "${components_to_build[@]}"; do # Add cache arguments if caching is enabled if [ "$caching" = true ] ; then - cache_name=${registry}/${namespace}/${BASENAME}:build-cache + cache_name=${namespace}/${BASENAME}:build-cache + if [ -n "${registry}" ] ; then + cache_name=${registry}/${cache_name} + fi echo "Caching from/to ${cache_name}" args+=(--cache-to "type=registry,ref=${cache_name}") args+=(--cache-from "type=registry,ref=${cache_name}") From 67ee03a910470735350dfb48212b95bf8df5a990 Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 14:16:00 +0200 Subject: [PATCH 6/9] Update data explorer scripts --- scripts/build_explorer.sh | 2 +- scripts/tag_components.sh | 4 ++-- scripts/tag_explorer.sh | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/build_explorer.sh b/scripts/build_explorer.sh index 1a1af24c7..8e401ddc5 100755 --- a/scripts/build_explorer.sh +++ b/scripts/build_explorer.sh @@ -37,7 +37,7 @@ pushd "$explorer_dir" BASENAME=${explorer_dir%/} BASENAME=${BASENAME##*/} -full_image_name=ghcr.io/${namespace}/${BASENAME}:${tag} +full_image_name=${namespace}/${BASENAME}:${tag} echo "building $full_image_name" diff --git a/scripts/tag_components.sh b/scripts/tag_components.sh index 1a3af9144..84d66b193 100755 --- a/scripts/tag_components.sh +++ b/scripts/tag_components.sh @@ -38,8 +38,8 @@ for dir in "$component_dir"/*/; do BASENAME=${dir%/} BASENAME=${BASENAME##*/} - old_image_name=ghcr.io/${namespace}/${BASENAME}:${old_tag} - new_image_name=ghcr.io/${namespace}/${BASENAME}:${new_tag} + old_image_name=${namespace}/${BASENAME}:${old_tag} + new_image_name=${namespace}/${BASENAME}:${new_tag} echo "$old_image_name" echo "$new_image_name" diff --git a/scripts/tag_explorer.sh b/scripts/tag_explorer.sh index a706383fb..e8665cd64 100755 --- a/scripts/tag_explorer.sh +++ b/scripts/tag_explorer.sh @@ -37,8 +37,8 @@ pushd "$explorer_dir" BASENAME=${explorer_dir%/} BASENAME=${BASENAME##*/} -old_image_name=ghcr.io/${namespace}/${BASENAME}:${old_tag} -new_image_name=ghcr.io/${namespace}/${BASENAME}:${new_tag} +old_image_name=${namespace}/${BASENAME}:${old_tag} +new_image_name=${namespace}/${BASENAME}:${new_tag} echo "$old_image_name" echo "$new_image_name" From 66c46d8cfb5f00ccdc22e033a1e1309289a60e53 Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 15:26:17 +0200 Subject: [PATCH 7/9] Check pushrm installation in build action --- .github/workflows/build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 63c0f48a9..8b1c47f62 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -23,6 +23,7 @@ jobs: run: | sudo wget https://github.com/christian-korneck/docker-pushrm/releases/download/v1.9.0/docker-pushrm_linux_amd64 -O /usr/libexec/docker/cli-plugins/docker-pushrm sudo chmod +x /usr/libexec/docker/cli-plugins/docker-pushrm + docker pushrm --help - name: Login to GitHub Container Registry uses: docker/login-action@v2 From 78c7f8e9b5c9548d03fb16792341a5a81a21628f Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 15:28:19 +0200 Subject: [PATCH 8/9] Remove testing artifacts from build script --- scripts/build_components.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/build_components.sh b/scripts/build_components.sh index 78393107f..396950119 100755 --- a/scripts/build_components.sh +++ b/scripts/build_components.sh @@ -75,9 +75,6 @@ for dir in "${components_to_build[@]}"; do full_image_names+=("$full_image_name") done - # Prevent this from mistakenly being used below -# unset full_image_name - echo "Updating the image version in the fondant_component.yaml with:" echo "${full_image_names[0]}" sed -i -e "s|^image: .*|image: ${full_image_names[0]}|" fondant_component.yaml @@ -107,7 +104,7 @@ for dir in "${components_to_build[@]}"; do --label org.opencontainers.image.source=https://github.com/${repo}/components/{BASENAME} \ . - docker fake-command ${full_image_name} | echo " + docker pushrm ${full_image_name} | echo " README was not pushed. \`docker pushrm\` might not be installed. From d424e3225f07c9cc0f733c3d51681ad84d8ec395 Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Thu, 12 Oct 2023 16:57:04 +0200 Subject: [PATCH 9/9] Change commit tags in component specs to dev --- components/download_images/fondant_component.yaml | 2 +- .../components/extract_images_from_warc/fondant_component.yaml | 2 +- .../components/read_warc_paths/fondant_component.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/components/download_images/fondant_component.yaml b/components/download_images/fondant_component.yaml index 81f75225e..18a52c4fb 100644 --- a/components/download_images/fondant_component.yaml +++ b/components/download_images/fondant_component.yaml @@ -8,7 +8,7 @@ description: | [resizer](https://github.com/rom1504/img2dataset/blob/main/img2dataset/resizer.py) function from the img2dataset library. -image: fndnt/download_images:e6501fb +image: fndnt/download_images:dev consumes: images: diff --git a/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml b/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml index 8df0f2457..d308126b5 100644 --- a/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml +++ b/examples/pipelines/commoncrawl/components/extract_images_from_warc/fondant_component.yaml @@ -1,6 +1,6 @@ name: Extract image licenses from warc description: A component that extracts images and their licenses from warc files -image: fndnt/extract_images_from_warc:d4619b5 +image: fndnt/extract_images_from_warc:dev consumes: warc: diff --git a/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml b/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml index 9f890ee49..10c1825d4 100644 --- a/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml +++ b/examples/pipelines/commoncrawl/components/read_warc_paths/fondant_component.yaml @@ -1,6 +1,6 @@ name: Common crawl download component description: A component that downloads parts of the common crawl -image: fndnt/read_warc_paths:57404ff +image: fndnt/read_warc_paths:dev produces: warc: