Fix Readme script generation (#821)

This fixes the failing builds but is it what we want ?
ml6team · Jan 30, 2024 · 8b9713a · 8b9713a
1 parent d87efb9
commit 8b9713a
Show file tree

Hide file tree

Showing 13 changed files with 47 additions and 36 deletions.
diff --git a/components/caption_images/README.md b/components/caption_images/README.md
@@ -49,7 +49,7 @@ pipeline = Pipeline(...)
 dataset = pipeline.read(...)
 
 dataset = dataset.apply(
- "caption_images",
+ "",
  arguments={
  # Add arguments
  # "model_id": "Salesforce/blip-image-captioning-base",

diff --git a/scripts/component_readme/generate_readme.py b/scripts/component_readme/generate_readme.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 
 import jinja2
+
 from fondant.core.component_spec import ComponentSpec
 
 
@@ -11,24 +12,25 @@ def read_component_spec(component_spec_path: Path) -> ComponentSpec:
 
 def generate_readme(component_spec: ComponentSpec, *, component_dir: Path) -> str:
  env = jinja2.Environment(
- loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent),
- trim_blocks=True
+ loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent), trim_blocks=True
  )
  env.filters["eval"] = eval
 
  template = env.get_template("readme_template.md")
 
  return template.render(
- id=component_dir.name,
+ component_id=component_spec.safe_name,
  name=component_spec.name,
- component_folder_name=component_spec.component_folder_name,
  description=component_spec.description,
  consumes=component_spec.consumes,
  produces=component_spec.produces,
  is_consumes_generic=component_spec.is_generic("consumes"),
  is_produces_generic=component_spec.is_generic("produces"),
- arguments=[arg for arg in component_spec.args.values()
- if arg.name not in component_spec.default_arguments],
+ arguments=[
+ arg
+ for arg in component_spec.args.values()
+ if arg.name not in component_spec.default_arguments
+ ],
  tests=(component_dir / "tests").exists(),
  tags=component_spec.tags,
  )
@@ -48,10 +50,12 @@ def main(component_spec_path: Path):
 
 if __name__ == "__main__":
  parser = argparse.ArgumentParser()
- parser.add_argument("component_specs",
- nargs="+",
- type=Path,
- help="Path to the component spec to generate a readme from")
+ parser.add_argument(
+ "component_specs",
+ nargs="+",
+ type=Path,
+ help="Path to the component spec to generate a readme from",
+ )
  args = parser.parse_args()
 
  for spec in args.component_specs:

diff --git a/scripts/component_readme/readme_template.md b/scripts/component_readme/readme_template.md
@@ -1,13 +1,13 @@
 # {{ name }}
 
-<a id="{{ component_folder_name }}#description"></a>
+<a id="{{ component_id }}#description"></a>
 ## Description
 {{ description }}
 
-<a id="{{ component_folder_name }}#inputs_outputs"></a>
+<a id="{{ component_id }}#inputs_outputs"></a>
 ## Inputs / outputs 
 
-<a id="{{ component_folder_name }}#consumes"></a>
+<a id="{{ component_id }}#consumes"></a>
 ### Consumes 
 {% if consumes %}
 **This component consumes:**
@@ -33,7 +33,7 @@ See the usage example below on how to define a field name for additional fields.
 {% endif %}
 
 
-<a id="{{ component_folder_name }}#produces"></a> 
+<a id="{{ component_id }}#produces"></a> 
 ### Produces 
 {% if produces %}
 **This component produces:**
@@ -55,7 +55,7 @@ the type of the field that should be used to write the output dataset.
 **This component does not produce data.**
 {% endif %}
 
-<a id="{{ component_folder_name }}#arguments"></a>
+<a id="{{ component_id }}#arguments"></a>
 ## Arguments
 
 {% if arguments %}
@@ -70,7 +70,7 @@ The component takes the following arguments to alter its behavior:
 This component takes no arguments.
 {% endif %}
 
-<a id="{{ component_folder_name }}#usage"></a>
+<a id="{{ component_id }}#usage"></a>
 ## Usage 
 
 You can add this component to your pipeline using the following code:
@@ -94,7 +94,7 @@ dataset = dataset.apply(...)
 dataset.write(
 {% endif %}
 {% endif %}
- "{{ id }}",
+ "{{ component_id }}",
  arguments={
  # Add arguments
 {% for argument in arguments %}
@@ -121,7 +121,7 @@ dataset.write(
 ```
 
 {% if tests %}
-<a id="{{ component_folder_name }}#testing"></a>
+<a id="{{ component_id }}#testing"></a>
 ## Testing
 
 You can run the tests using docker with BuildKit. From this directory, run:

diff --git a/src/fondant/component/data_io.py b/src/fondant/component/data_io.py
@@ -199,7 +199,7 @@ def _write_dataframe(self, dataframe: dd.DataFrame) -> dd.core.Scalar:
  """Create dataframe writing task."""
  location = (
  f"{self.manifest.base_path}/{self.manifest.pipeline_name}/"
- f"{self.manifest.run_id}/{self.operation_spec.component_folder_name}"
+ f"{self.manifest.run_id}/{self.operation_spec.component_name}"
  )
 
  schema = {

diff --git a/src/fondant/core/component_spec.py b/src/fondant/core/component_spec.py
@@ -100,7 +100,7 @@ def __init__(
  tags: t.Optional[t.List[str]] = None,
  ):
  spec_dict: t.Dict[str, t.Any] = {
- "name": self.sanitized_component_name(name),
+ "name": name,
  "image": image,
  }
 
@@ -179,6 +179,10 @@ def from_dict(cls, component_spec_dict: t.Dict[str, t.Any]) -> "ComponentSpec":
  def name(self):
  return self._specification["name"]
 
+ @property
+ def safe_name(self):
+ return self.sanitized_component_name(self._specification["name"])
+
  def sanitized_component_name(self, name) -> str:
  """Cleans and converts a component name."""
  return name.lower().replace(" ", "_")
@@ -516,9 +520,9 @@ def outer_produces(self) -> t.Mapping[str, Field]:
  return self._outer_produces
 
  @property
- def component_folder_name(self) -> str:
- """Get the component folder name."""
- return self._component_spec.name
+ def component_name(self) -> str:
+ """Get the component name."""
+ return self._component_spec.safe_name
 
  @property
  def previous_index(self) -> t.Optional[str]:

diff --git a/src/fondant/core/manifest.py b/src/fondant/core/manifest.py
@@ -256,7 +256,7 @@ def evolve( # : PLR0912 (too many branches)
  evolved_manifest = self.copy()
 
  # Update `run_id` and `component_id` in the metadata
- component_id = operation_spec.component_folder_name
+ component_id = operation_spec.component_name
  evolved_manifest.update_metadata(key="component_id", value=component_id)
  evolved_manifest.update_metadata(key="run_id", value=run_id)
 

diff --git a/src/fondant/pipeline/compiler.py b/src/fondant/pipeline/compiler.py
@@ -358,7 +358,7 @@ def from_fondant_component_spec(
  re.sub(
  "-+",
  "-",
- re.sub("[^-0-9a-z]+", "-", fondant_component.name.lower()),
+ re.sub("[^-0-9a-z]+", "-", fondant_component.safe_name.lower()),
  )
  .lstrip("-")
  .rstrip("-")

diff --git a/src/fondant/pipeline/pipeline.py b/src/fondant/pipeline/pipeline.py
@@ -305,7 +305,7 @@ def _get_registry_path(name: str) -> Path:
 
  @property
  def component_name(self) -> str:
- return self.component_spec.name
+ return self.component_spec.safe_name
 
  def get_component_cache_key(
  self,

diff --git a/tests/component/test_data_io.py b/tests/component/test_data_io.py
@@ -137,7 +137,10 @@ def test_write_dataset(
  data_writer.write_dataframe(dataframe, dask_client)
  # read written data and assert
  dataframe = dd.read_parquet(
- temp_dir / manifest.pipeline_name / manifest.run_id / component_spec.name,
+ temp_dir
+ / manifest.pipeline_name
+ / manifest.run_id
+ / component_spec.safe_name,
  )
  assert len(dataframe) == NUMBER_OF_TEST_ROWS
  assert list(dataframe.columns) == columns
@@ -178,7 +181,7 @@ def test_write_dataset_custom_produces(
  temp_dir
  / manifest.pipeline_name
  / manifest.run_id
- / component_spec_produces.name,
+ / component_spec_produces.safe_name,
  )
  assert len(dataframe) == NUMBER_OF_TEST_ROWS
  assert list(dataframe.columns) == expected_columns

diff --git a/tests/core/test_component_specs.py b/tests/core/test_component_specs.py
@@ -91,7 +91,7 @@ def test_component_spec_no_args(valid_fondant_schema_no_args):
  """Test that a component spec without args is supported."""
  fondant_component = ComponentSpec.from_dict(valid_fondant_schema_no_args)
 
- assert fondant_component.name == "example_component"
+ assert fondant_component.name == "Example component"
  assert fondant_component.description == "This is an example component"
  assert fondant_component.args == fondant_component.default_arguments
 

diff --git a/tests/core/test_manifest_evolution.py b/tests/core/test_manifest_evolution.py
@@ -135,5 +135,5 @@ def test_component_spec_location_update():
  )
 
  assert evolved_manifest.index.location.endswith(
- component_spec.name,
+ component_spec.safe_name,
  )
diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py
@@ -93,7 +93,7 @@ def load(self) -> dd.DataFrame:
 
  component = ComponentOp.from_ref(Foo, produces={"bar": pa.string()})
  assert component.component_spec._specification == {
- "name": "foo",
+ "name": "Foo",
  "image": fondant_image_name,
  "description": "lightweight component",
  "consumes": {"additionalProperties": True},

diff --git a/tests/pipeline/test_python_component.py b/tests/pipeline/test_python_component.py
@@ -94,7 +94,7 @@ def load(self) -> dd.DataFrame:
  ].operation_spec.to_dict()
  assert operation_spec_dict == {
  "specification": {
- "name": "createdata",
+ "name": "CreateData",
  "image": "python:3.8-slim-buster",
  "description": "lightweight component",
  "consumes": {"additionalProperties": True},
@@ -138,7 +138,7 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
  operation_spec_dict = pipeline._graph["addn"]["operation"].operation_spec.to_dict()
  assert operation_spec_dict == {
  "specification": {
- "name": "addn",
+ "name": "AddN",
  "image": default_fondant_image,
  "description": "lightweight component",
  "consumes": {"additionalProperties": True},
@@ -200,7 +200,7 @@ def load(self) -> dd.DataFrame:
 
  assert operation_spec_without_image == {
  "specification": {
- "name": "createdata",
+ "name": "CreateData",
  "image": "python:3.8-slim-buster",
  "description": "lightweight component",
  "consumes": {"additionalProperties": True},
@@ -289,7 +289,7 @@ def load(self) -> dd.DataFrame:
 
  assert operation_spec_without_image == {
  "specification": {
- "name": "createdata",
+ "name": "CreateData",
  "image": default_fondant_image,
  "description": "lightweight component",
  "consumes": {"additionalProperties": True},