Skip to content

Commit

Permalink
Fix Readme script generation (#821)
Browse files Browse the repository at this point in the history
This fixes the failing builds but is it what we want ?
  • Loading branch information
GeorgesLorre committed Jan 30, 2024
1 parent d87efb9 commit 8b9713a
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 36 deletions.
2 changes: 1 addition & 1 deletion components/caption_images/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pipeline = Pipeline(...)
dataset = pipeline.read(...)

dataset = dataset.apply(
"caption_images",
"",
arguments={
# Add arguments
# "model_id": "Salesforce/blip-image-captioning-base",
Expand Down
24 changes: 14 additions & 10 deletions scripts/component_readme/generate_readme.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path

import jinja2

from fondant.core.component_spec import ComponentSpec


Expand All @@ -11,24 +12,25 @@ def read_component_spec(component_spec_path: Path) -> ComponentSpec:

def generate_readme(component_spec: ComponentSpec, *, component_dir: Path) -> str:
env = jinja2.Environment(
loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent),
trim_blocks=True
loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent), trim_blocks=True
)
env.filters["eval"] = eval

template = env.get_template("readme_template.md")

return template.render(
id=component_dir.name,
component_id=component_spec.safe_name,
name=component_spec.name,
component_folder_name=component_spec.component_folder_name,
description=component_spec.description,
consumes=component_spec.consumes,
produces=component_spec.produces,
is_consumes_generic=component_spec.is_generic("consumes"),
is_produces_generic=component_spec.is_generic("produces"),
arguments=[arg for arg in component_spec.args.values()
if arg.name not in component_spec.default_arguments],
arguments=[
arg
for arg in component_spec.args.values()
if arg.name not in component_spec.default_arguments
],
tests=(component_dir / "tests").exists(),
tags=component_spec.tags,
)
Expand All @@ -48,10 +50,12 @@ def main(component_spec_path: Path):

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("component_specs",
nargs="+",
type=Path,
help="Path to the component spec to generate a readme from")
parser.add_argument(
"component_specs",
nargs="+",
type=Path,
help="Path to the component spec to generate a readme from",
)
args = parser.parse_args()

for spec in args.component_specs:
Expand Down
16 changes: 8 additions & 8 deletions scripts/component_readme/readme_template.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# {{ name }}

<a id="{{ component_folder_name }}#description"></a>
<a id="{{ component_id }}#description"></a>
## Description
{{ description }}

<a id="{{ component_folder_name }}#inputs_outputs"></a>
<a id="{{ component_id }}#inputs_outputs"></a>
## Inputs / outputs

<a id="{{ component_folder_name }}#consumes"></a>
<a id="{{ component_id }}#consumes"></a>
### Consumes
{% if consumes %}
**This component consumes:**
Expand All @@ -33,7 +33,7 @@ See the usage example below on how to define a field name for additional fields.
{% endif %}


<a id="{{ component_folder_name }}#produces"></a>
<a id="{{ component_id }}#produces"></a>
### Produces
{% if produces %}
**This component produces:**
Expand All @@ -55,7 +55,7 @@ the type of the field that should be used to write the output dataset.
**This component does not produce data.**
{% endif %}

<a id="{{ component_folder_name }}#arguments"></a>
<a id="{{ component_id }}#arguments"></a>
## Arguments

{% if arguments %}
Expand All @@ -70,7 +70,7 @@ The component takes the following arguments to alter its behavior:
This component takes no arguments.
{% endif %}

<a id="{{ component_folder_name }}#usage"></a>
<a id="{{ component_id }}#usage"></a>
## Usage

You can add this component to your pipeline using the following code:
Expand All @@ -94,7 +94,7 @@ dataset = dataset.apply(...)
dataset.write(
{% endif %}
{% endif %}
"{{ id }}",
"{{ component_id }}",
arguments={
# Add arguments
{% for argument in arguments %}
Expand All @@ -121,7 +121,7 @@ dataset.write(
```

{% if tests %}
<a id="{{ component_folder_name }}#testing"></a>
<a id="{{ component_id }}#testing"></a>
## Testing

You can run the tests using docker with BuildKit. From this directory, run:
Expand Down
2 changes: 1 addition & 1 deletion src/fondant/component/data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def _write_dataframe(self, dataframe: dd.DataFrame) -> dd.core.Scalar:
"""Create dataframe writing task."""
location = (
f"{self.manifest.base_path}/{self.manifest.pipeline_name}/"
f"{self.manifest.run_id}/{self.operation_spec.component_folder_name}"
f"{self.manifest.run_id}/{self.operation_spec.component_name}"
)

schema = {
Expand Down
12 changes: 8 additions & 4 deletions src/fondant/core/component_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(
tags: t.Optional[t.List[str]] = None,
):
spec_dict: t.Dict[str, t.Any] = {
"name": self.sanitized_component_name(name),
"name": name,
"image": image,
}

Expand Down Expand Up @@ -179,6 +179,10 @@ def from_dict(cls, component_spec_dict: t.Dict[str, t.Any]) -> "ComponentSpec":
def name(self):
return self._specification["name"]

@property
def safe_name(self):
return self.sanitized_component_name(self._specification["name"])

def sanitized_component_name(self, name) -> str:
"""Cleans and converts a component name."""
return name.lower().replace(" ", "_")
Expand Down Expand Up @@ -516,9 +520,9 @@ def outer_produces(self) -> t.Mapping[str, Field]:
return self._outer_produces

@property
def component_folder_name(self) -> str:
"""Get the component folder name."""
return self._component_spec.name
def component_name(self) -> str:
"""Get the component name."""
return self._component_spec.safe_name

@property
def previous_index(self) -> t.Optional[str]:
Expand Down
2 changes: 1 addition & 1 deletion src/fondant/core/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def evolve( # : PLR0912 (too many branches)
evolved_manifest = self.copy()

# Update `run_id` and `component_id` in the metadata
component_id = operation_spec.component_folder_name
component_id = operation_spec.component_name
evolved_manifest.update_metadata(key="component_id", value=component_id)
evolved_manifest.update_metadata(key="run_id", value=run_id)

Expand Down
2 changes: 1 addition & 1 deletion src/fondant/pipeline/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def from_fondant_component_spec(
re.sub(
"-+",
"-",
re.sub("[^-0-9a-z]+", "-", fondant_component.name.lower()),
re.sub("[^-0-9a-z]+", "-", fondant_component.safe_name.lower()),
)
.lstrip("-")
.rstrip("-")
Expand Down
2 changes: 1 addition & 1 deletion src/fondant/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def _get_registry_path(name: str) -> Path:

@property
def component_name(self) -> str:
return self.component_spec.name
return self.component_spec.safe_name

def get_component_cache_key(
self,
Expand Down
7 changes: 5 additions & 2 deletions tests/component/test_data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ def test_write_dataset(
data_writer.write_dataframe(dataframe, dask_client)
# read written data and assert
dataframe = dd.read_parquet(
temp_dir / manifest.pipeline_name / manifest.run_id / component_spec.name,
temp_dir
/ manifest.pipeline_name
/ manifest.run_id
/ component_spec.safe_name,
)
assert len(dataframe) == NUMBER_OF_TEST_ROWS
assert list(dataframe.columns) == columns
Expand Down Expand Up @@ -178,7 +181,7 @@ def test_write_dataset_custom_produces(
temp_dir
/ manifest.pipeline_name
/ manifest.run_id
/ component_spec_produces.name,
/ component_spec_produces.safe_name,
)
assert len(dataframe) == NUMBER_OF_TEST_ROWS
assert list(dataframe.columns) == expected_columns
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_component_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_component_spec_no_args(valid_fondant_schema_no_args):
"""Test that a component spec without args is supported."""
fondant_component = ComponentSpec.from_dict(valid_fondant_schema_no_args)

assert fondant_component.name == "example_component"
assert fondant_component.name == "Example component"
assert fondant_component.description == "This is an example component"
assert fondant_component.args == fondant_component.default_arguments

Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_manifest_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,5 +135,5 @@ def test_component_spec_location_update():
)

assert evolved_manifest.index.location.endswith(
component_spec.name,
component_spec.safe_name,
)
2 changes: 1 addition & 1 deletion tests/pipeline/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def load(self) -> dd.DataFrame:

component = ComponentOp.from_ref(Foo, produces={"bar": pa.string()})
assert component.component_spec._specification == {
"name": "foo",
"name": "Foo",
"image": fondant_image_name,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down
8 changes: 4 additions & 4 deletions tests/pipeline/test_python_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def load(self) -> dd.DataFrame:
].operation_spec.to_dict()
assert operation_spec_dict == {
"specification": {
"name": "createdata",
"name": "CreateData",
"image": "python:3.8-slim-buster",
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down Expand Up @@ -138,7 +138,7 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
operation_spec_dict = pipeline._graph["addn"]["operation"].operation_spec.to_dict()
assert operation_spec_dict == {
"specification": {
"name": "addn",
"name": "AddN",
"image": default_fondant_image,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down Expand Up @@ -200,7 +200,7 @@ def load(self) -> dd.DataFrame:

assert operation_spec_without_image == {
"specification": {
"name": "createdata",
"name": "CreateData",
"image": "python:3.8-slim-buster",
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down Expand Up @@ -289,7 +289,7 @@ def load(self) -> dd.DataFrame:

assert operation_spec_without_image == {
"specification": {
"name": "createdata",
"name": "CreateData",
"image": default_fondant_image,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down

0 comments on commit 8b9713a

Please sign in to comment.