Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Readme script generation #821

Merged
merged 6 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/caption_images/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pipeline = Pipeline(...)
dataset = pipeline.read(...)

dataset = dataset.apply(
"caption_images",
"",
arguments={
# Add arguments
# "model_id": "Salesforce/blip-image-captioning-base",
Expand Down
24 changes: 14 additions & 10 deletions scripts/component_readme/generate_readme.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path

import jinja2

from fondant.core.component_spec import ComponentSpec


Expand All @@ -11,24 +12,25 @@ def read_component_spec(component_spec_path: Path) -> ComponentSpec:

def generate_readme(component_spec: ComponentSpec, *, component_dir: Path) -> str:
env = jinja2.Environment(
loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent),
trim_blocks=True
loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent), trim_blocks=True
)
env.filters["eval"] = eval

template = env.get_template("readme_template.md")

return template.render(
id=component_dir.name,
component_id=component_spec.safe_name,
name=component_spec.name,
component_folder_name=component_spec.component_folder_name,
description=component_spec.description,
consumes=component_spec.consumes,
produces=component_spec.produces,
is_consumes_generic=component_spec.is_generic("consumes"),
is_produces_generic=component_spec.is_generic("produces"),
arguments=[arg for arg in component_spec.args.values()
if arg.name not in component_spec.default_arguments],
arguments=[
arg
for arg in component_spec.args.values()
if arg.name not in component_spec.default_arguments
],
tests=(component_dir / "tests").exists(),
tags=component_spec.tags,
)
Expand All @@ -48,10 +50,12 @@ def main(component_spec_path: Path):

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("component_specs",
nargs="+",
type=Path,
help="Path to the component spec to generate a readme from")
parser.add_argument(
"component_specs",
nargs="+",
type=Path,
help="Path to the component spec to generate a readme from",
)
args = parser.parse_args()

for spec in args.component_specs:
Expand Down
14 changes: 7 additions & 7 deletions scripts/component_readme/readme_template.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# {{ name }}

<a id="{{ component_folder_name }}#description"></a>
<a id="{{ component_id }}#description"></a>
## Description
{{ description }}

<a id="{{ component_folder_name }}#inputs_outputs"></a>
<a id="{{ component_id }}#inputs_outputs"></a>
## Inputs / outputs

<a id="{{ component_folder_name }}#consumes"></a>
<a id="{{ component_id }}#consumes"></a>
### Consumes
{% if consumes %}
**This component consumes:**
Expand All @@ -33,7 +33,7 @@ See the usage example below on how to define a field name for additional fields.
{% endif %}


<a id="{{ component_folder_name }}#produces"></a>
<a id="{{ component_id }}#produces"></a>
### Produces
{% if produces %}
**This component produces:**
Expand All @@ -55,7 +55,7 @@ the type of the field that should be used to write the output dataset.
**This component does not produce data.**
{% endif %}

<a id="{{ component_folder_name }}#arguments"></a>
<a id="{{ component_id }}#arguments"></a>
## Arguments

{% if arguments %}
Expand All @@ -70,7 +70,7 @@ The component takes the following arguments to alter its behavior:
This component takes no arguments.
{% endif %}

<a id="{{ component_folder_name }}#usage"></a>
<a id="{{ component_id }}#usage"></a>
## Usage

You can add this component to your pipeline using the following code:
Expand Down Expand Up @@ -121,7 +121,7 @@ dataset.write(
```

{% if tests %}
<a id="{{ component_folder_name }}#testing"></a>
<a id="{{ component_id }}#testing"></a>
## Testing

You can run the tests using docker with BuildKit. From this directory, run:
Expand Down
2 changes: 1 addition & 1 deletion src/fondant/component/data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def _write_dataframe(self, dataframe: dd.DataFrame) -> dd.core.Scalar:
"""Create dataframe writing task."""
location = (
f"{self.manifest.base_path}/{self.manifest.pipeline_name}/"
f"{self.manifest.run_id}/{self.operation_spec.component_folder_name}"
f"{self.manifest.run_id}/{self.operation_spec.component_name}"
)

schema = {
Expand Down
12 changes: 8 additions & 4 deletions src/fondant/core/component_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(
tags: t.Optional[t.List[str]] = None,
):
spec_dict: t.Dict[str, t.Any] = {
"name": self.sanitized_component_name(name),
"name": name,
"image": image,
}

Expand Down Expand Up @@ -179,6 +179,10 @@ def from_dict(cls, component_spec_dict: t.Dict[str, t.Any]) -> "ComponentSpec":
def name(self):
return self._specification["name"]

@property
def safe_name(self):
return self.sanitized_component_name(self._specification["name"])

def sanitized_component_name(self, name) -> str:
"""Cleans and converts a component name."""
return name.lower().replace(" ", "_")
Expand Down Expand Up @@ -516,9 +520,9 @@ def outer_produces(self) -> t.Mapping[str, Field]:
return self._outer_produces

@property
def component_folder_name(self) -> str:
"""Get the component folder name."""
return self._component_spec.name
def component_name(self) -> str:
"""Get the component name."""
return self._component_spec.safe_name

@property
def previous_index(self) -> t.Optional[str]:
Expand Down
2 changes: 1 addition & 1 deletion src/fondant/core/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def evolve( # : PLR0912 (too many branches)
evolved_manifest = self.copy()

# Update `run_id` and `component_id` in the metadata
component_id = operation_spec.component_folder_name
component_id = operation_spec.component_name
evolved_manifest.update_metadata(key="component_id", value=component_id)
evolved_manifest.update_metadata(key="run_id", value=run_id)

Expand Down
2 changes: 1 addition & 1 deletion src/fondant/pipeline/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def from_fondant_component_spec(
re.sub(
"-+",
"-",
re.sub("[^-0-9a-z]+", "-", fondant_component.name.lower()),
re.sub("[^-0-9a-z]+", "-", fondant_component.safe_name.lower()),
)
.lstrip("-")
.rstrip("-")
Expand Down
2 changes: 1 addition & 1 deletion src/fondant/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def _get_registry_path(name: str) -> Path:

@property
def component_name(self) -> str:
return self.component_spec.name
return self.component_spec.safe_name

def get_component_cache_key(
self,
Expand Down
7 changes: 5 additions & 2 deletions tests/component/test_data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ def test_write_dataset(
data_writer.write_dataframe(dataframe, dask_client)
# read written data and assert
dataframe = dd.read_parquet(
temp_dir / manifest.pipeline_name / manifest.run_id / component_spec.name,
temp_dir
/ manifest.pipeline_name
/ manifest.run_id
/ component_spec.safe_name,
)
assert len(dataframe) == NUMBER_OF_TEST_ROWS
assert list(dataframe.columns) == columns
Expand Down Expand Up @@ -178,7 +181,7 @@ def test_write_dataset_custom_produces(
temp_dir
/ manifest.pipeline_name
/ manifest.run_id
/ component_spec_produces.name,
/ component_spec_produces.safe_name,
)
assert len(dataframe) == NUMBER_OF_TEST_ROWS
assert list(dataframe.columns) == expected_columns
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_component_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_component_spec_no_args(valid_fondant_schema_no_args):
"""Test that a component spec without args is supported."""
fondant_component = ComponentSpec.from_dict(valid_fondant_schema_no_args)

assert fondant_component.name == "example_component"
assert fondant_component.name == "Example component"
assert fondant_component.description == "This is an example component"
assert fondant_component.args == fondant_component.default_arguments

Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_manifest_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,5 +135,5 @@ def test_component_spec_location_update():
)

assert evolved_manifest.index.location.endswith(
component_spec.name,
component_spec.safe_name,
)
2 changes: 1 addition & 1 deletion tests/pipeline/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def load(self) -> dd.DataFrame:

component = ComponentOp.from_ref(Foo, produces={"bar": pa.string()})
assert component.component_spec._specification == {
"name": "foo",
"name": "Foo",
"image": fondant_image_name,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down
8 changes: 4 additions & 4 deletions tests/pipeline/test_python_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def load(self) -> dd.DataFrame:
].operation_spec.to_dict()
assert operation_spec_dict == {
"specification": {
"name": "createdata",
"name": "CreateData",
"image": "python:3.8-slim-buster",
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down Expand Up @@ -138,7 +138,7 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
operation_spec_dict = pipeline._graph["addn"]["operation"].operation_spec.to_dict()
assert operation_spec_dict == {
"specification": {
"name": "addn",
"name": "AddN",
"image": default_fondant_image,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down Expand Up @@ -200,7 +200,7 @@ def load(self) -> dd.DataFrame:

assert operation_spec_without_image == {
"specification": {
"name": "createdata",
"name": "CreateData",
"image": "python:3.8-slim-buster",
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down Expand Up @@ -289,7 +289,7 @@ def load(self) -> dd.DataFrame:

assert operation_spec_without_image == {
"specification": {
"name": "createdata",
"name": "CreateData",
"image": default_fondant_image,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
Expand Down
Loading