Skip to content

Commit

Permalink
Add defaults to components (#289)
Browse files Browse the repository at this point in the history
PR that add default arguments to component yaml. This makes it easier to
get started with default values for reusable components.
  • Loading branch information
PhilippeMoussalli authored Jul 12, 2023
1 parent b76957e commit 520da12
Show file tree
Hide file tree
Showing 13 changed files with 43 additions and 21 deletions.
5 changes: 4 additions & 1 deletion components/caption_images/fondant_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ args:
model_id:
description: id of the model on the Hugging Face hub
type: str
default: "Salesforce/blip-image-captioning-base"
batch_size:
description: batch size to use
type: int
default: 8
max_new_tokens:
description: maximum token length of each caption
type: int
type: int
default: 50
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ args:
aesthetic_score:
description: Aesthetic embedding to add to the query embedding, between 0 and 9 (higher is prettier).
type: int
default: 9
aesthetic_weight:
description: Weight of the aesthetic embedding when added to the query, between 0 and 1
type: float
type: float
default: 0.5
4 changes: 3 additions & 1 deletion components/filter_comments/fondant_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ args:
min_comments_ratio:
description: The minimum code to comment ratio
type: float
default: 0.1
max_comments_ratio:
description: The maximum code to comment ratio
type: float
type: float
default: 0.9
2 changes: 2 additions & 0 deletions components/image_cropping/fondant_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ args:
cropping_threshold:
description: Threshold parameter used for detecting borders. A lower (negative) parameter results in a more performant border detection, but can cause overcropping. Default is -30
type: int
default: -30
padding:
description: Padding for the image cropping. The padding is added to all borders of the image.
type: int
default: 10
4 changes: 2 additions & 2 deletions components/image_cropping/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def transform(
self,
*,
dataframe: dd.DataFrame,
cropping_threshold: int = -30,
padding: int = 10,
cropping_threshold: int,
padding: int,
) -> dd.DataFrame:
"""
Args:
Expand Down
4 changes: 3 additions & 1 deletion components/image_embedding/fondant_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ args:
model_id:
description: Model id on the Hugging Face hub (e.g. "openai/clip-vit-large-patch14")
type: str
default: "openai/clip-vit-large-patch14"
batch_size:
description: Batch size to use when embedding
type: int
type: int
default: 8
3 changes: 2 additions & 1 deletion components/language_filter/fondant_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ consumes:
args:
language:
description: A valid language code or identifier (e.g., "en", "fr", "de").
type: str
type: str
default: "en"
10 changes: 5 additions & 5 deletions components/language_filter/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@

logger = logging.getLogger(__name__)

MODEL_PATH = "lid.176.ftz"


class LanguageIdentification:
"""A class for language detection using FastText."""

def __init__(self, language, model_path: str = "lid.176.ftz"):
def __init__(self,
language: str):
"""
Initializes the LanguageDetect class.
Args:
language (str): language to filter on
model_path (str): The path to the FastText language identification model.
"""
pretrained_lang_model_weight_path = model_path
self.language = language
self.model = fasttext.load_model(pretrained_lang_model_weight_path)
self.model = fasttext.load_model(MODEL_PATH)

def predict_lang(self, text: str):
"""
Expand Down Expand Up @@ -52,7 +53,6 @@ def setup(self, *, language):
"""
self.lang_detector = LanguageIdentification(language)


def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
"""
Args:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ args:
aesthetic_score:
description: Aesthetic embedding to add to the query embedding, between 0 and 9 (higher is prettier).
type: int
default: 9
aesthetic_weight:
description: Weight of the aesthetic embedding when added to the query, between 0 and 1
type: float
default: 0.5
url:
description: The url of the backend clip retrieval service, defaults to the public service
type: str
default: https://knn.laion.ai/knn-service
default: https://knn.laion.ai/knn-service

4 changes: 3 additions & 1 deletion components/segment_images/fondant_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ args:
model_id:
description: id of the model on the Hugging Face hub
type: str
default: "openmmlab/upernet-convnext-small"
batch_size:
description: batch size to use
type: int
type: int
batch_size: 8
9 changes: 7 additions & 2 deletions components/segment_images/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ def convert_to_rgb(seg: np.array) -> bytes:
return crop_bytes.getvalue()


def process_image(image: bytes, *, processor: SegformerImageProcessor, device: str) -> torch.Tensor:
def process_image(image: bytes,
*,
processor: SegformerImageProcessor,
device: str) -> torch.Tensor:
"""
Process the image to a tensor.
Expand All @@ -65,7 +68,9 @@ def transform(img: Image) -> BatchFeature:


@torch.no_grad()
def segment_image_batch(image_batch: pd.DataFrame, *, model: AutoModelForSemanticSegmentation,
def segment_image_batch(image_batch: pd.DataFrame,
*,
model: AutoModelForSemanticSegmentation,
processor: SegformerImageProcessor) -> pd.Series:
"""Embed a batch of images."""
input_batch = torch.cat(image_batch.tolist())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,4 @@ services:
second_component:
condition: service_completed_successfully
volumes: []
version: '3.8'

version: '3.8'
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ services:
"height": {"type": "int16"}}}}, "args": {"cropping_threshold": {"description":
"Threshold parameter used for detecting borders. A lower (negative) parameter
results in a more performant border detection, but can cause overcropping. Default
is -30", "type": "int"}, "padding": {"description": "Padding for the image cropping.
The padding is added to all borders of the image.", "type": "int"}}}'
is -30", "type": "int", "default": -30}, "padding": {"description": "Padding
for the image cropping. The padding is added to all borders of the image.",
"type": "int", "default": 10}}}'
- --input_manifest_path
- /foo/bar/first_component/manifest.json
depends_on:
first_component:
condition: service_completed_successfully
image: ghcr.io/ml6team/image_cropping:dev
volumes: []
version: '3.8'
version: '3.8'

0 comments on commit 520da12

Please sign in to comment.