From d127f411d1c4895523346968b62bb01f87fdf409 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Tue, 7 Oct 2025 17:27:28 +0200 Subject: [PATCH 001/115] ok --- pipelex/client/client.py | 3 ++- pipelex/tools/config/config_root.py | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/pipelex/client/client.py b/pipelex/client/client.py index a199776b6..c81401d22 100644 --- a/pipelex/client/client.py +++ b/pipelex/client/client.py @@ -30,7 +30,7 @@ def __init__( api_token: str | None = None, api_base_url: str | None = None, ): - self.api_token = api_token or get_required_env("PIPELEX_API_TOKEN") + self.api_token = api_token or get_required_env("PIPELEX_API_KEY") if not self.api_token: msg = "API token is required for API execution" @@ -92,6 +92,7 @@ async def execute_pipeline( if input_memory is not None: working_memory = WorkingMemoryFactory.make_from_compact_memory(input_memory) + pipeline_request = PipelineRequestFactory.make_from_working_memory( working_memory=working_memory, output_name=output_name, diff --git a/pipelex/tools/config/config_root.py b/pipelex/tools/config/config_root.py index 65096c4cf..bde4d57cd 100644 --- a/pipelex/tools/config/config_root.py +++ b/pipelex/tools/config/config_root.py @@ -10,13 +10,6 @@ CONFIG_BASE_OVERRIDES_BEFORE_ENV = ["local"] CONFIG_BASE_OVERRIDES_AFTER_ENV = ["super"] - -class SecretMethod(StrEnum): - NONE = "none" - ENV_VAR = "env_var" - SECRET_PROVIDER = "secret_provider" - - class ConfigRoot(ConfigModel): """Main configuration class for the project. From a9c4f8cd20385df48b2e67ef276a6ac8445ce4d8 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 9 Oct 2025 10:58:40 +0200 Subject: [PATCH 002/115] fix specialDOmain --- pipelex/core/stuffs/stuff_factory.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pipelex/core/stuffs/stuff_factory.py b/pipelex/core/stuffs/stuff_factory.py index c11ba9d3c..cf3e74d95 100644 --- a/pipelex/core/stuffs/stuff_factory.py +++ b/pipelex/core/stuffs/stuff_factory.py @@ -8,6 +8,7 @@ from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode +from pipelex.core.domains.domain import SpecialDomain from pipelex.core.stuffs.list_content import ListContent from pipelex.core.stuffs.stuff import Stuff from pipelex.core.stuffs.stuff_content import StuffContent @@ -225,9 +226,10 @@ def make_stuff_from_stuff_content_using_search_domains( if not concept_code: msg = "Stuff content data dict is badly formed: no concept code" raise StuffFactoryError(msg) + domain_and_concept_code = ConceptFactory.make_domain_and_concept_code_from_concept_string_or_code(domain=SpecialDomain.NATIVE, concept_string_or_code=concept_code) content_value = stuff_content_dict["content"] - if NativeConceptCode.get_validated_native_concept_string(concept_string_or_code=concept_code): - concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode(concept_code)) + if NativeConceptCode.get_validated_native_concept_string(concept_string_or_code=domain_and_concept_code.concept_code): + concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode(domain_and_concept_code.concept_code)) content = StuffContentFactory.make_stuff_content_from_concept_with_fallback( concept=concept, value=content_value, From 62e65198c04913d364b6d7d44f2894726c470378 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Thu, 9 Oct 2025 15:57:46 +0200 Subject: [PATCH 003/115] WIP Better explain migration errors --- .pipelex/pipelex.toml | 5 -- pipelex/cogt/config_cogt.py | 2 +- .../content_generation/content_generator.py | 14 ++--- .../content_generator_dry.py | 14 ++--- .../content_generator_protocol.py | 36 ++++++------ pipelex/cogt/exceptions.py | 20 ++++++- .../cogt/model_backends/backend_library.py | 11 +++- .../model_routing/routing_profile_library.py | 6 +- pipelex/cogt/models/model_deck.py | 2 +- pipelex/cogt/models/model_manager.py | 35 ++++++----- pipelex/config.py | 11 ++++ pipelex/core/domains/domain_factory.py | 4 +- pipelex/core/interpreter.py | 26 ++++----- pipelex/core/validation.py | 31 ++++++++++ pipelex/libraries/library_manager.py | 32 +++++----- pipelex/pipelex.py | 54 ++++++++++++----- pipelex/pipelex.toml | 13 +++++ .../plugins/mistral/mistral_extract_worker.py | 4 +- pipelex/tools/typing/pydantic_utils.py | 58 +++++++++++++++---- pipelex/urls.py | 2 + 20 files changed, 257 insertions(+), 123 deletions(-) create mode 100644 pipelex/core/validation.py create mode 100644 pipelex/urls.py diff --git a/.pipelex/pipelex.toml b/.pipelex/pipelex.toml index dddcbf095..0f6eaf0a1 100644 --- a/.pipelex/pipelex.toml +++ b/.pipelex/pipelex.toml @@ -10,10 +10,6 @@ api_key_method = "env" [cogt] -#################################################################################################### -# OCR config -#################################################################################################### - [cogt.extract_config] page_output_text_file_name = "page_text.md" @@ -22,4 +18,3 @@ page_output_text_file_name = "page_text.md" is_pipeline_tracking_enabled = false is_activity_tracking_enabled = false is_reporting_enabled = true - diff --git a/pipelex/cogt/config_cogt.py b/pipelex/cogt/config_cogt.py index eb6a2e8c9..b0b194355 100644 --- a/pipelex/cogt/config_cogt.py +++ b/pipelex/cogt/config_cogt.py @@ -65,7 +65,7 @@ def model_specs_path(self, backend_name: str) -> str: return f"{self.inference_config_path}/backends/{backend_name}.toml" def get_model_deck_paths(self) -> list[str]: - """Get all LLM deck TOML file paths sorted alphabetically.""" + """Get all Model deck TOML file paths sorted alphabetically.""" model_deck_paths = [ str(path) for path in find_files_in_dir( diff --git a/pipelex/cogt/content_generation/content_generator.py b/pipelex/cogt/content_generation/content_generator.py index 1d5b87c8d..fffba6d52 100644 --- a/pipelex/cogt/content_generation/content_generator.py +++ b/pipelex/cogt/content_generation/content_generator.py @@ -37,7 +37,7 @@ class ContentGenerator(ContentGeneratorProtocol): @override @update_job_metadata - async def make_llm_text( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_llm_text( self, job_metadata: JobMetadata, llm_setting_main: LLMSetting, @@ -57,7 +57,7 @@ async def make_llm_text( # pyright: ignore[reportIncompatibleMethodOverride] @override @update_job_metadata - async def make_object_direct( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_object_direct( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -80,7 +80,7 @@ async def make_object_direct( # pyright: ignore[reportIncompatibleMethodOverrid @override @update_job_metadata - async def make_text_then_object( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_text_then_object( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -127,7 +127,7 @@ async def make_text_then_object( # pyright: ignore[reportIncompatibleMethodOver @override @update_job_metadata - async def make_object_list_direct( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_object_list_direct( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -150,7 +150,7 @@ async def make_object_list_direct( # pyright: ignore[reportIncompatibleMethodOv @override @update_job_metadata - async def make_text_then_object_list( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_text_then_object_list( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -196,7 +196,7 @@ async def make_text_then_object_list( # pyright: ignore[reportIncompatibleMetho @override @update_job_metadata - async def make_single_image( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_single_image( self, job_metadata: JobMetadata, img_gen_handle: str, @@ -219,7 +219,7 @@ async def make_single_image( # pyright: ignore[reportIncompatibleMethodOverride @override @update_job_metadata - async def make_image_list( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_image_list( self, job_metadata: JobMetadata, img_gen_handle: str, diff --git a/pipelex/cogt/content_generation/content_generator_dry.py b/pipelex/cogt/content_generation/content_generator_dry.py index 578fd30b3..9aa8359c4 100644 --- a/pipelex/cogt/content_generation/content_generator_dry.py +++ b/pipelex/cogt/content_generation/content_generator_dry.py @@ -33,7 +33,7 @@ def _text_gen_truncate_length(self) -> int: @override @update_job_metadata - async def make_llm_text( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_llm_text( self, job_metadata: JobMetadata, llm_setting_main: LLMSetting, @@ -46,7 +46,7 @@ async def make_llm_text( # pyright: ignore[reportIncompatibleMethodOverride] @override @update_job_metadata - async def make_object_direct( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_object_direct( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -70,7 +70,7 @@ class ObjectFactory(ModelFactory[object_class]): # type: ignore[valid-type] @override @update_job_metadata - async def make_text_then_object( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_text_then_object( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -91,7 +91,7 @@ async def make_text_then_object( # pyright: ignore[reportIncompatibleMethodOver @override @update_job_metadata - async def make_object_list_direct( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_object_list_direct( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -114,7 +114,7 @@ async def make_object_list_direct( # pyright: ignore[reportIncompatibleMethodOv @override @update_job_metadata - async def make_text_then_object_list( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_text_then_object_list( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -136,7 +136,7 @@ async def make_text_then_object_list( # pyright: ignore[reportIncompatibleMetho @override @update_job_metadata - async def make_single_image( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_single_image( self, job_metadata: JobMetadata, img_gen_handle: str, @@ -156,7 +156,7 @@ async def make_single_image( # pyright: ignore[reportIncompatibleMethodOverride @override @update_job_metadata - async def make_image_list( # pyright: ignore[reportIncompatibleMethodOverride] + async def make_image_list( self, job_metadata: JobMetadata, img_gen_handle: str, diff --git a/pipelex/cogt/content_generation/content_generator_protocol.py b/pipelex/cogt/content_generation/content_generator_protocol.py index 4de591978..fe19411ae 100644 --- a/pipelex/cogt/content_generation/content_generator_protocol.py +++ b/pipelex/cogt/content_generation/content_generator_protocol.py @@ -45,22 +45,22 @@ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: class ContentGeneratorProtocol(Protocol): - async def make_llm_text( + def make_llm_text( self, job_metadata: JobMetadata, llm_setting_main: LLMSetting, llm_prompt_for_text: LLMPrompt, - ) -> str: ... + ) -> Coroutine[Any, Any, str]: ... - async def make_object_direct( + def make_object_direct( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], llm_setting_for_object: LLMSetting, llm_prompt_for_object: LLMPrompt, - ) -> BaseModelTypeVar: ... + ) -> Coroutine[Any, Any, BaseModelTypeVar]: ... - async def make_text_then_object( + def make_text_then_object( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -68,18 +68,18 @@ async def make_text_then_object( llm_setting_for_object: LLMSetting, llm_prompt_for_text: LLMPrompt, llm_prompt_factory_for_object: LLMPromptFactoryAbstract | None = None, - ) -> BaseModelTypeVar: ... + ) -> Coroutine[Any, Any, BaseModelTypeVar]: ... - async def make_object_list_direct( + def make_object_list_direct( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], llm_setting_for_object_list: LLMSetting, llm_prompt_for_object_list: LLMPrompt, nb_items: int | None = None, - ) -> list[BaseModelTypeVar]: ... + ) -> Coroutine[Any, Any, list[BaseModelTypeVar]]: ... - async def make_text_then_object_list( + def make_text_then_object_list( self, job_metadata: JobMetadata, object_class: type[BaseModelTypeVar], @@ -88,18 +88,18 @@ async def make_text_then_object_list( llm_prompt_for_text: LLMPrompt, llm_prompt_factory_for_object_list: LLMPromptFactoryAbstract | None = None, nb_items: int | None = None, - ) -> list[BaseModelTypeVar]: ... + ) -> Coroutine[Any, Any, list[BaseModelTypeVar]]: ... - async def make_single_image( + def make_single_image( self, job_metadata: JobMetadata, img_gen_handle: str, img_gen_prompt: ImgGenPrompt, img_gen_job_params: ImgGenJobParams | None = None, img_gen_job_config: ImgGenJobConfig | None = None, - ) -> GeneratedImage: ... + ) -> Coroutine[Any, Any, GeneratedImage]: ... - async def make_image_list( + def make_image_list( self, job_metadata: JobMetadata, img_gen_handle: str, @@ -107,21 +107,21 @@ async def make_image_list( nb_images: int, img_gen_job_params: ImgGenJobParams | None = None, img_gen_job_config: ImgGenJobConfig | None = None, - ) -> list[GeneratedImage]: ... + ) -> Coroutine[Any, Any, list[GeneratedImage]]: ... - async def make_templated_text( + def make_templated_text( self, context: dict[str, Any], template: str, templating_style: TemplatingStyle | None = None, template_category: TemplateCategory | None = None, - ) -> str: ... + ) -> Coroutine[Any, Any, str]: ... - async def make_extract_pages( + def make_extract_pages( self, job_metadata: JobMetadata, extract_input: ExtractInput, extract_handle: str, extract_job_params: ExtractJobParams, extract_job_config: ExtractJobConfig, - ) -> ExtractOutput: ... + ) -> Coroutine[Any, Any, ExtractOutput]: ... diff --git a/pipelex/cogt/exceptions.py b/pipelex/cogt/exceptions.py index 22c7c4dfb..3897bc43e 100644 --- a/pipelex/cogt/exceptions.py +++ b/pipelex/cogt/exceptions.py @@ -143,7 +143,7 @@ class MissingPluginError(CogtError): pass -class OcrCapabilityError(CogtError): +class ExtractCapabilityError(CogtError): pass @@ -151,6 +151,10 @@ class RoutingProfileLibraryNotFoundError(CogtError): pass +class RoutingProfileValidationError(CogtError): + pass + + class RoutingProfileLibraryError(CogtError): pass @@ -163,6 +167,14 @@ class InferenceBackendError(CogtError): pass +class InferenceBackendLibraryNotFoundError(CogtError): + pass + + +class InferenceBackendLibraryValidationError(CogtError): + pass + + class InferenceBackendCredentialsErrorType(StrEnum): VAR_NOT_FOUND = "var_not_found" UNKNOWN_VAR_PREFIX = "unknown_var_prefix" @@ -191,9 +203,13 @@ class RoutingProfileError(CogtError): pass -class ModelsManagerError(CogtError): +class ModelManagerError(CogtError): pass class ModelDeckNotFoundError(CogtError): pass + + +class ModelDeckValidationError(CogtError): + pass diff --git a/pipelex/cogt/model_backends/backend_library.py b/pipelex/cogt/model_backends/backend_library.py index 085dd599d..7e2482a4e 100644 --- a/pipelex/cogt/model_backends/backend_library.py +++ b/pipelex/cogt/model_backends/backend_library.py @@ -6,6 +6,8 @@ InferenceBackendCredentialsError, InferenceBackendCredentialsErrorType, InferenceBackendLibraryError, + InferenceBackendLibraryNotFoundError, + InferenceBackendLibraryValidationError, InferenceModelSpecError, ) from pipelex.cogt.model_backends.backend import InferenceBackend @@ -38,9 +40,12 @@ def load(self): backends_library_path = get_config().cogt.inference_config.backends_library_path try: backends_dict = load_toml_from_path(path=backends_library_path) - except (FileNotFoundError, InferenceBackendLibraryError) as exc: - msg = f"Failed to load inference backend library from file '{backends_library_path}': {exc}" - raise InferenceBackendLibraryError(msg) from exc + except FileNotFoundError as file_not_found_exc: + msg = f"Could not find inference backend library at '{backends_library_path}': {file_not_found_exc}" + raise InferenceBackendLibraryNotFoundError(msg) from file_not_found_exc + except ValidationError as exc: + msg = f"Invalid inference backend library configuration in '{backends_library_path}': {exc}" + raise InferenceBackendLibraryValidationError(msg) from exc for backend_name, backend_dict in backends_dict.items(): # We'll split the read settings into standard fields and extra config standard_fields = InferenceBackendBlueprint.model_fields.keys() diff --git a/pipelex/cogt/model_routing/routing_profile_library.py b/pipelex/cogt/model_routing/routing_profile_library.py index 1fb265580..c2b27d257 100644 --- a/pipelex/cogt/model_routing/routing_profile_library.py +++ b/pipelex/cogt/model_routing/routing_profile_library.py @@ -1,7 +1,7 @@ from pydantic import Field, RootModel, ValidationError from pipelex import log -from pipelex.cogt.exceptions import RoutingProfileLibraryError, RoutingProfileLibraryNotFoundError +from pipelex.cogt.exceptions import RoutingProfileLibraryError, RoutingProfileLibraryNotFoundError, RoutingProfileValidationError from pipelex.cogt.model_routing.routing_models import BackendMatchForModel from pipelex.cogt.model_routing.routing_profile import RoutingProfile from pipelex.cogt.model_routing.routing_profile_factory import ( @@ -45,14 +45,14 @@ def load(self) -> None: try: catalog_dict = load_toml_from_path(path=routing_profile_library_path) except FileNotFoundError as not_found_exc: - msg = f"Failed to load routing profile library from file '{routing_profile_library_path}': {not_found_exc}" + msg = f"Could not find routing profile library at '{routing_profile_library_path}': {not_found_exc}" raise RoutingProfileLibraryNotFoundError(msg) from not_found_exc try: catalog_blueprint = RoutingProfileLibraryBlueprint.model_validate(catalog_dict) except ValidationError as exc: msg = f"Invalid routing profile library configuration in '{routing_profile_library_path}': {exc}" - raise RoutingProfileLibraryError(msg) from exc + raise RoutingProfileValidationError(msg) from exc # Validate that the active config exists if catalog_blueprint.active not in catalog_blueprint.profiles: diff --git a/pipelex/cogt/models/model_deck.py b/pipelex/cogt/models/model_deck.py index 8c2932bbb..9b7108f35 100644 --- a/pipelex/cogt/models/model_deck.py +++ b/pipelex/cogt/models/model_deck.py @@ -114,7 +114,7 @@ def get_img_gen_setting(self, img_gen_choice: ImgGenModelChoice) -> ImgGenSettin raise ImgGenChoiceNotFoundError(msg) @classmethod - def final_validate(cls, deck: Self): # pyright: ignore[reportIncompatibleMethodOverride] + def final_validate(cls, deck: Self): for llm_preset_id, llm_setting in deck.llm_presets.items(): inference_model = deck.get_required_inference_model(model_handle=llm_setting.model) try: diff --git a/pipelex/cogt/models/model_manager.py b/pipelex/cogt/models/model_manager.py index e237868e6..1d14f3dbe 100644 --- a/pipelex/cogt/models/model_manager.py +++ b/pipelex/cogt/models/model_manager.py @@ -1,10 +1,10 @@ -import os from typing import Any +from pydantic import ValidationError from typing_extensions import override from pipelex import log -from pipelex.cogt.exceptions import ModelDeckNotFoundError, ModelsManagerError +from pipelex.cogt.exceptions import ModelDeckNotFoundError, ModelDeckValidationError, ModelManagerError from pipelex.cogt.model_backends.backend import InferenceBackend from pipelex.cogt.model_backends.backend_library import InferenceBackendLibrary from pipelex.cogt.model_backends.model_spec import InferenceModelSpec @@ -47,23 +47,22 @@ def load_deck_blueprint(cls) -> ModelDeckBlueprint: deck_paths = get_config().cogt.inference_config.get_model_deck_paths() full_deck_dict: dict[str, Any] = {} if not deck_paths: - msg = "No LLM deck paths found. Please run `pipelex init-libraries` to create it." + msg = "No Model deck paths found. Please run `pipelex init-libraries` to create the set up the base deck." raise ModelDeckNotFoundError(msg) for deck_path in deck_paths: - if not os.path.exists(deck_path): - msg = f"LLM deck path `{deck_path}` not found. Please run `pipelex init-libraries` to create it." - raise ModelDeckNotFoundError(msg) try: deck_dict = load_toml_from_path(path=deck_path) - log.debug(f"Loaded LLM deck from {deck_path}") - deep_update(full_deck_dict, deck_dict) - except Exception as exc: - msg = f"Failed to load LLM deck file '{deck_path}': {exc}" - log.error(msg) - raise + except FileNotFoundError as not_found_exc: + msg = f"Could not find Model Deck file at '{deck_path}': {not_found_exc}" + raise ModelDeckNotFoundError(msg) from not_found_exc + deep_update(full_deck_dict, deck_dict) - return ModelDeckBlueprint.model_validate(full_deck_dict) + try: + return ModelDeckBlueprint.model_validate(full_deck_dict) + except ValidationError as exc: + msg = f"Invalid Model Deck configuration in {deck_paths}: {exc}" + raise ModelDeckValidationError(msg) from exc def build_deck(self, model_deck_blueprint: ModelDeckBlueprint) -> ModelDeck: all_models_and_possible_backends = self.inference_backend_library.get_all_models_and_possible_backends() @@ -80,7 +79,7 @@ def build_deck(self, model_deck_blueprint: ModelDeckBlueprint) -> ModelDeck: backend = self.inference_backend_library.get_inference_backend(backend_name=matched_backend_name) if backend is None: msg = f"Backend '{matched_backend_name}', requested for model '{model_name}', could not be found" - raise ModelsManagerError(msg) + raise ModelManagerError(msg) model_spec = backend.get_model_spec(model_name) if model_spec is None: # Not finding the model spec can be an error or not according to the matching method @@ -90,7 +89,7 @@ def build_deck(self, model_deck_blueprint: ModelDeckBlueprint) -> ModelDeck: f"Model spec '{model_name}' not found in backend '{matched_backend_name}' " f"which was matched exactly in routing profile '{backend_match_for_model.routing_profile_name}'" ) - raise ModelsManagerError(msg) + raise ModelManagerError(msg) case BackendMatchingMethod.PATTERN_MATCH: log.verbose( f"Model spec '{model_name}' not found in backend '{matched_backend_name}' but it's OK because " @@ -109,7 +108,7 @@ def build_deck(self, model_deck_blueprint: ModelDeckBlueprint) -> ModelDeck: backend = self.inference_backend_library.get_inference_backend(backend_name=available_backend) if backend is None: msg = f"Backend '{available_backend}' not found for model '{model_name}'" - raise ModelsManagerError(msg) + raise ModelManagerError(msg) model_spec = backend.get_model_spec(model_name) if model_spec is not None: break @@ -118,7 +117,7 @@ def build_deck(self, model_deck_blueprint: ModelDeckBlueprint) -> ModelDeck: f"Model spec '{model_name}' not found in any of the available backends '{available_backends}' " f"which was set as default in routing profile '{backend_match_for_model.routing_profile_name}'" ) - raise ModelsManagerError(msg) + raise ModelManagerError(msg) inference_models[model_name] = model_spec return ModelDeck( @@ -145,5 +144,5 @@ def get_required_inference_backend(self, backend_name: str) -> InferenceBackend: backend = self.inference_backend_library.get_inference_backend(backend_name) if backend is None: msg = f"Inference backend '{backend_name}' not found" - raise ModelsManagerError(msg) + raise ModelManagerError(msg) return backend diff --git a/pipelex/config.py b/pipelex/config.py index fb6a63757..39e4a0bc5 100644 --- a/pipelex/config.py +++ b/pipelex/config.py @@ -119,10 +119,21 @@ class Pipelex(ConfigModel): observer_config: ObserverConfig +class MigrationConfig(ConfigModel): + renaming_map: dict[str, str] + + def text_in_renaming_keys(self, text: str) -> list[tuple[str, str]]: + return [(key, value) for key, value in self.renaming_map.items() if text in key] + + def text_in_renaming_values(self, text: str) -> list[tuple[str, str]]: + return [(key, value) for key, value in self.renaming_map.items() if text in value] + + class PipelexConfig(ConfigRoot): session_id: str = shortuuid.uuid() cogt: Cogt pipelex: Pipelex + migration: MigrationConfig def get_config() -> PipelexConfig: diff --git a/pipelex/core/domains/domain_factory.py b/pipelex/core/domains/domain_factory.py index 0875040a1..fa8f69353 100644 --- a/pipelex/core/domains/domain_factory.py +++ b/pipelex/core/domains/domain_factory.py @@ -18,6 +18,6 @@ def make_from_blueprint(cls, blueprint: DomainBlueprint) -> Domain: prompt_template_to_structure=blueprint.prompt_template_to_structure, ) except ValidationError as exc: - formatted_error_msg = format_pydantic_validation_error(exc) - msg = f"Could not make domain from blueprint: {formatted_error_msg}" + validation_error_msg = format_pydantic_validation_error(exc) + msg = f"Could not make domain from blueprint: {validation_error_msg}" raise DomainDefinitionError(message=msg, domain_code=blueprint.code, description=blueprint.description) from exc diff --git a/pipelex/core/interpreter.py b/pipelex/core/interpreter.py index 65cd9321c..0912f8ec0 100644 --- a/pipelex/core/interpreter.py +++ b/pipelex/core/interpreter.py @@ -22,19 +22,19 @@ class PipelexInterpreter(BaseModel): file_path: Path | None = None file_content: str | None = None - @staticmethod - def escape_plx_string(value: str | None) -> str: - """Escape a string for plx serialization.""" - if value is None: - return "" - # Escape backslashes first (must be done first) - value = value.replace("\\", "\\\\") - # Escape quotes - value = value.replace('"', '\\"') - # Replace actual newlines with escaped newlines - value = value.replace("\n", "\\n") - value = value.replace("\r", "\\r") - return value.replace("\t", "\\t") + # @staticmethod + # def escape_plx_string(value: str | None) -> str: + # """Escape a string for plx serialization.""" + # if value is None: + # return "" + # # Escape backslashes first (must be done first) + # value = value.replace("\\", "\\\\") + # # Escape quotes + # value = value.replace('"', '\\"') + # # Replace actual newlines with escaped newlines + # value = value.replace("\n", "\\n") + # value = value.replace("\r", "\\r") + # return value.replace("\t", "\\t") @model_validator(mode="after") def check_file_path_or_file_content(self) -> Self: diff --git a/pipelex/core/validation.py b/pipelex/core/validation.py new file mode 100644 index 000000000..5d464991d --- /dev/null +++ b/pipelex/core/validation.py @@ -0,0 +1,31 @@ +from pydantic import ValidationError + +from pipelex.config import get_config +from pipelex.tools.typing.pydantic_utils import analyze_pydantic_validation_error + + +def report_validation_error(validation_error: ValidationError) -> str: + validation_error_analysis = analyze_pydantic_validation_error(validation_error) + + migration_config = get_config().migration + + migration_reports: list[str] = [] + for missing_field in validation_error_analysis.missing_fields: + text = missing_field.split(".")[-1] + if renamings := migration_config.text_in_renaming_values(text=text): + renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings) + migration_reports.append(f"Missing field '{missing_field}' is possibly a new name related to one of these renamings:\n{renamings_str}") + + for extra_field in validation_error_analysis.extra_fields: + text = extra_field.split(".")[-1] + if renamings := migration_config.text_in_renaming_keys(text=text): + renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings) + migration_reports.append( + f"Extra field '{extra_field}' is possibly an old deprecated name related to one of these renamings:\n{renamings_str}" + ) + + report_msg = validation_error_analysis.error_msg + if migration_reports: + migration_reports_str = "\n".join(migration_reports) + report_msg += "\n\nThe following fields have been renamed in the new version of Pipelex:\n\n" + migration_reports_str + return report_msg diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 174ac4e53..74bc70c16 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -18,6 +18,7 @@ from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_factory import PipeFactory from pipelex.core.pipes.pipe_library import PipeLibrary +from pipelex.core.validation import report_validation_error from pipelex.exceptions import ( ConceptDefinitionError, ConceptLibraryError, @@ -36,7 +37,6 @@ from pipelex.tools.func_registry_utils import FuncRegistryUtils from pipelex.tools.misc.file_utils import find_files_in_dir from pipelex.tools.runtime_manager import runtime_manager -from pipelex.tools.typing.pydantic_utils import format_pydantic_validation_error from pipelex.types import StrEnum @@ -243,16 +243,16 @@ def load_libraries( for plx_file_path in valid_plx_paths: try: blueprint = PipelexInterpreter(file_path=plx_file_path).make_pipelex_bundle_blueprint() - except FileNotFoundError as domain_def_error: + except FileNotFoundError as file_not_found_error: msg = f"Could not find PLX blueprint at '{plx_file_path}'" - raise LibraryLoadingError(msg) from domain_def_error - except PipeDefinitionError as domain_def_error: - msg = f"Could not load PLX blueprint from '{plx_file_path}': {domain_def_error}" - raise LibraryLoadingError(msg) from domain_def_error - except ValidationError as domain_def_error: - formatted_error_msg = format_pydantic_validation_error(domain_def_error) - msg = f"Could not load PLX blueprint from '{plx_file_path}' because of: {formatted_error_msg}" - raise LibraryLoadingError(msg) from domain_def_error + raise LibraryLoadingError(msg) from file_not_found_error + except PipeDefinitionError as pipe_def_error: + msg = f"Could not load PLX blueprint from '{plx_file_path}': {pipe_def_error}" + raise LibraryLoadingError(msg) from pipe_def_error + except ValidationError as validation_error: + validation_error_msg = report_validation_error(validation_error=validation_error) + msg = f"Could not load PLX blueprint from '{plx_file_path}' because of: {validation_error_msg}" + raise LibraryLoadingError(msg) from validation_error blueprint.source = str(plx_file_path) blueprints.append(blueprint) @@ -265,8 +265,8 @@ def load_libraries( msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {domain_def_error}" raise LibraryLoadingError(msg) from domain_def_error except ValidationError as validation_error: - formatted_error_msg = format_pydantic_validation_error(validation_error) - msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {formatted_error_msg}" + validation_error_msg = report_validation_error(validation_error=validation_error) + msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_domains.append(domain) self.domain_library.add_domains(domains=all_domains) @@ -280,8 +280,8 @@ def load_libraries( msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {concept_def_error}" raise LibraryLoadingError(msg) from concept_def_error except ValidationError as validation_error: - formatted_error_msg = format_pydantic_validation_error(validation_error) - msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {formatted_error_msg}" + validation_error_msg = report_validation_error(validation_error=validation_error) + msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_concepts.extend(concepts) self.concept_library.add_concepts(concepts=all_concepts) @@ -295,8 +295,8 @@ def load_libraries( msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {pipe_def_error}" raise LibraryLoadingError(msg) from pipe_def_error except ValidationError as validation_error: - formatted_error_msg = format_pydantic_validation_error(validation_error) - msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {formatted_error_msg}" + validation_error_msg = report_validation_error(validation_error=validation_error) + msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_pipes.extend(pipes) self.pipe_library.add_pipes(pipes=all_pipes) diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index f6aa4d883..baeec5cff 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -14,7 +14,15 @@ from pipelex.cogt.content_generation.content_generator_protocol import ( ContentGeneratorProtocol, ) -from pipelex.cogt.exceptions import InferenceBackendCredentialsError, RoutingProfileLibraryNotFoundError +from pipelex.cogt.exceptions import ( + InferenceBackendCredentialsError, + InferenceBackendLibraryNotFoundError, + InferenceBackendLibraryValidationError, + ModelDeckNotFoundError, + ModelDeckValidationError, + RoutingProfileLibraryNotFoundError, + RoutingProfileValidationError, +) from pipelex.cogt.inference.inference_manager import InferenceManager from pipelex.cogt.models.model_manager import ModelManager from pipelex.cogt.models.model_manager_abstract import ModelManagerAbstract @@ -23,6 +31,7 @@ from pipelex.core.domains.domain_library import DomainLibrary from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.core.registry_models import PipelexRegistryModels +from pipelex.core.validation import report_validation_error from pipelex.exceptions import PipelexConfigError, PipelexSetupError from pipelex.hub import PipelexHub, set_pipelex_hub from pipelex.libraries.library_manager_factory import LibraryManagerFactory @@ -51,8 +60,8 @@ from pipelex.tools.secrets.env_secrets_provider import EnvSecretsProvider from pipelex.tools.secrets.secrets_provider_abstract import SecretsProviderAbstract from pipelex.tools.storage.storage_provider_abstract import StorageProviderAbstract -from pipelex.tools.typing.pydantic_utils import format_pydantic_validation_error from pipelex.types import Self +from pipelex.urls import URLs PACKAGE_NAME = __name__.split(".", maxsplit=1)[0] PACKAGE_VERSION = metadata(PACKAGE_NAME)["Version"] @@ -80,10 +89,10 @@ def __init__( # tools try: self.pipelex_hub.setup_config(config_cls=config_cls or PipelexConfig) - except ValidationError as exc: - formatted_error_msg = format_pydantic_validation_error(exc) - msg = f"Could not setup config because of: {formatted_error_msg}" - raise PipelexConfigError(msg) from exc + except ValidationError as validation_error: + validation_error_msg = report_validation_error(validation_error=validation_error) + msg = f"Could not setup config because of: {validation_error_msg}" + raise PipelexConfigError(msg) from validation_error log.configure( project_name=get_config().project_name or "unknown_project", @@ -167,9 +176,28 @@ def setup( self.plugin_manager.setup() try: self.models_manager.setup() - except RoutingProfileLibraryNotFoundError as routing_profile_library_exc: - msg = "The routing library could not be found, please call `pipelex init config` to create it" - raise PipelexSetupError(msg) from routing_profile_library_exc + except (RoutingProfileLibraryNotFoundError, InferenceBackendLibraryNotFoundError, ModelDeckNotFoundError) as backends_not_found_exc: + msg = ( + "Some config files are missing for the inference backend library, routing profile library, or model deck. " + "Run `pipelex init config` to generate the missing files." + ) + raise PipelexSetupError(msg) from backends_not_found_exc + except (RoutingProfileValidationError, InferenceBackendLibraryValidationError, ModelDeckValidationError) as backends_validation_exc: + msg = ( + "Some config files are invalid for the inference backend library, routing profile library, or model deck. " + "You can fix them manually, or run `pipelex init config --reset` to regenerate them. " + "Note that this command resets all config files to their default values.\n" + f"If you need help, drop by our Discord: we're happy to assist: {URLs.discord}.\n" + ) + cause_exc = backends_validation_exc.__cause__ + if cause_exc is None: + msg += f"\nUnxpexted cause:{cause_exc}" + raise PipelexSetupError(msg) from cause_exc + if not isinstance(cause_exc, ValidationError): + msg += f"\nUnxpexted cause:{cause_exc}" + raise PipelexSetupError(msg) from cause_exc + validation_error_msg = msg + "\n" + report_validation_error(validation_error=cause_exc) + raise PipelexSetupError(validation_error_msg) from backends_validation_exc except InferenceBackendCredentialsError as credentials_exc: backend_name = credentials_exc.backend_name var_name = credentials_exc.key_name @@ -219,10 +247,10 @@ def setup_libraries(self): def validate_libraries(self): try: self.library_manager.validate_libraries() - except ValidationError as exc: - formatted_error_msg = format_pydantic_validation_error(exc) - msg = f"Could not validate libraries because of: {formatted_error_msg}" - raise PipelexSetupError(msg) from exc + except ValidationError as validation_error: + validation_error_msg = report_validation_error(validation_error=validation_error) + msg = f"Could not validate libraries because of: {validation_error_msg}" + raise PipelexSetupError(msg) from validation_error log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}") def teardown(self): diff --git a/pipelex/pipelex.toml b/pipelex/pipelex.toml index 8d86c747b..8cb8098c3 100644 --- a/pipelex/pipelex.toml +++ b/pipelex/pipelex.toml @@ -276,3 +276,16 @@ structure_field_ordering = ["type", "description", "choices", "required"] [pipelex.plx_config.pipes] field_ordering = ["type", "description", "inputs", "output"] + +#################################################################################################### +# Migration config +#################################################################################################### + +[migration.renaming_map] +definition = "description" +ocr_config = "extract_config" +ocr = "extract" +# ocr = "model" +llm_handle = "model" +llm = "model" +llm_to_structure = "model_to_structure" diff --git a/pipelex/plugins/mistral/mistral_extract_worker.py b/pipelex/plugins/mistral/mistral_extract_worker.py index e7d88e751..f3bf088f2 100644 --- a/pipelex/plugins/mistral/mistral_extract_worker.py +++ b/pipelex/plugins/mistral/mistral_extract_worker.py @@ -4,7 +4,7 @@ from typing_extensions import override from pipelex import log -from pipelex.cogt.exceptions import OcrCapabilityError, SdkTypeError +from pipelex.cogt.exceptions import ExtractCapabilityError, SdkTypeError from pipelex.cogt.extract.extract_input import ExtractInputError from pipelex.cogt.extract.extract_job import ExtractJob from pipelex.cogt.extract.extract_output import ExtractOutput @@ -88,7 +88,7 @@ async def _make_extract_output_from_pdf( ) -> ExtractOutput: if should_caption_images: msg = "Captioning is not implemented for Mistral OCR." - raise OcrCapabilityError(msg) + raise ExtractCapabilityError(msg) if should_include_page_views: log.debug("Page views are not implemented for Mistral OCR.") # TODO: use a model capability flag to check possibility before asking for it diff --git a/pipelex/tools/typing/pydantic_utils.py b/pipelex/tools/typing/pydantic_utils.py index 411f40150..9cc4f9705 100644 --- a/pipelex/tools/typing/pydantic_utils.py +++ b/pipelex/tools/typing/pydantic_utils.py @@ -20,14 +20,26 @@ def _factory() -> list[T]: return _factory -def format_pydantic_validation_error(exc: ValidationError) -> str: - """Format a Pydantic ValidationError into a readable string with detailed error information. +class PydanticValidationErrorAnalysis(BaseModel): + error_msg: str + + missing_fields: list[str] + extra_fields: list[str] + type_errors: list[str] + value_errors: list[str] + enum_errors: list[str] + union_tag_errors: list[str] + model_type_errors: list[str] + + +def analyze_pydantic_validation_error(exc: ValidationError) -> PydanticValidationErrorAnalysis: + """Analyze a Pydantic ValidationError into a readable string with detailed error information. Args: exc: The Pydantic ValidationError exception Returns: - A formatted string containing categorized validation errors + A PydanticValidationErrorAnalysis object containing categorized validation errors """ error_msg = "Validation error(s):" @@ -60,27 +72,49 @@ def format_pydantic_validation_error(exc: ValidationError) -> str: # Add each type of error to the message if present if missing_fields: - error_msg += f"\nMissing required fields: {missing_fields}" + error_msg += f"\n\nMissing required fields: {missing_fields}" if extra_fields: - error_msg += f"\nExtra forbidden fields: {extra_fields}" + error_msg += f"\n\nExtra forbidden fields: {extra_fields}" if type_errors: - error_msg += f"\nType errors: {type_errors}" + error_msg += f"\n\nType errors: {type_errors}" if value_errors: - error_msg += f"\nValue errors: {value_errors}" + error_msg += f"\n\nValue errors: {value_errors}" if enum_errors: - error_msg += f"\nEnum errors: {enum_errors}" + error_msg += f"\n\nEnum errors: {enum_errors}" if union_tag_errors: - error_msg += f"\nUnion discriminator errors: {union_tag_errors}" + error_msg += f"\n\nUnion discriminator errors: {union_tag_errors}" if model_type_errors: - error_msg += f"\nModel type errors: {model_type_errors}" + error_msg += f"\n\nModel type errors: {model_type_errors}" # If none of the specific error types were found, add the raw error messages if not any([missing_fields, extra_fields, type_errors, value_errors, enum_errors, union_tag_errors, model_type_errors]): - error_msg += "\nOther validation errors:" + error_msg += "\n\nOther validation errors:" for err in exc.errors(): error_msg += f"\n{'.'.join(map(str, err['loc']))}: {err['type']}: {err['msg']}" - return error_msg + return PydanticValidationErrorAnalysis( + error_msg=error_msg, + missing_fields=missing_fields, + extra_fields=extra_fields, + type_errors=type_errors, + value_errors=value_errors, + enum_errors=enum_errors, + union_tag_errors=union_tag_errors, + model_type_errors=model_type_errors, + ) + + +def format_pydantic_validation_error(exc: ValidationError) -> str: + """Format a Pydantic ValidationError into a readable string with detailed error information. + + Args: + exc: The Pydantic ValidationError exception + + Returns: + A formatted string containing categorized validation errors + + """ + return analyze_pydantic_validation_error(exc).error_msg def convert_strenum_to_str( diff --git a/pipelex/urls.py b/pipelex/urls.py new file mode 100644 index 000000000..d57662830 --- /dev/null +++ b/pipelex/urls.py @@ -0,0 +1,2 @@ +class URLs: + discord = "https://go.pipelex.com/discord" From aca2c6afb7d9ce68ee5b67110725e050e0f6b5dd Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 9 Oct 2025 16:10:25 +0200 Subject: [PATCH 004/115] fix PipeExtract --- pipelex/client/client.py | 2 +- pipelex/core/stuffs/stuff_factory.py | 4 +++- .../libraries/pipelines/builder/pipe/pipe_extract_spec.py | 6 ++++++ pipelex/tools/config/config_root.py | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pipelex/client/client.py b/pipelex/client/client.py index c81401d22..cc1eb1bfb 100644 --- a/pipelex/client/client.py +++ b/pipelex/client/client.py @@ -92,7 +92,7 @@ async def execute_pipeline( if input_memory is not None: working_memory = WorkingMemoryFactory.make_from_compact_memory(input_memory) - + pipeline_request = PipelineRequestFactory.make_from_working_memory( working_memory=working_memory, output_name=output_name, diff --git a/pipelex/core/stuffs/stuff_factory.py b/pipelex/core/stuffs/stuff_factory.py index cf3e74d95..d7e0f820a 100644 --- a/pipelex/core/stuffs/stuff_factory.py +++ b/pipelex/core/stuffs/stuff_factory.py @@ -226,7 +226,9 @@ def make_stuff_from_stuff_content_using_search_domains( if not concept_code: msg = "Stuff content data dict is badly formed: no concept code" raise StuffFactoryError(msg) - domain_and_concept_code = ConceptFactory.make_domain_and_concept_code_from_concept_string_or_code(domain=SpecialDomain.NATIVE, concept_string_or_code=concept_code) + domain_and_concept_code = ConceptFactory.make_domain_and_concept_code_from_concept_string_or_code( + domain=SpecialDomain.NATIVE, concept_string_or_code=concept_code + ) content_value = stuff_content_dict["content"] if NativeConceptCode.get_validated_native_concept_string(concept_string_or_code=domain_and_concept_code.concept_code): concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode(domain_and_concept_code.concept_code)) diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py index 09fc7237b..d33beaad8 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py @@ -41,6 +41,7 @@ class PipeExtractSpec(PipeSpec): Validation Rules: - inputs dict must have exactly one input entry, and the value must be either `Image` or `PDF`. + - output must be "Page" """ type: SkipJsonSchema[Literal["PipeExtract"]] = "PipeExtract" @@ -50,6 +51,11 @@ class PipeExtractSpec(PipeSpec): page_image_captions: bool | None = Field(default=None, description="Whether to generate captions for detected images using AI.") page_views: bool | None = Field(default=None, description="Whether to include rendered page views in the output.") + @field_validator("output", mode="before") + @classmethod + def validate_output(cls, _: str) -> str: + return "Page" + @field_validator("extract_skill", mode="before") @classmethod def validate_extract_skill(cls, extract_skill_value: str) -> ExtractSkill: diff --git a/pipelex/tools/config/config_root.py b/pipelex/tools/config/config_root.py index bde4d57cd..b30244b6d 100644 --- a/pipelex/tools/config/config_root.py +++ b/pipelex/tools/config/config_root.py @@ -5,11 +5,11 @@ from pipelex.tools.config.config_model import ConfigModel from pipelex.tools.exceptions import ConfigValidationError from pipelex.tools.typing.pydantic_utils import format_pydantic_validation_error -from pipelex.types import StrEnum CONFIG_BASE_OVERRIDES_BEFORE_ENV = ["local"] CONFIG_BASE_OVERRIDES_AFTER_ENV = ["super"] + class ConfigRoot(ConfigModel): """Main configuration class for the project. From e6bfa9d3d7963882762f41b776ef6799acf505a1 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Thu, 9 Oct 2025 16:20:33 +0200 Subject: [PATCH 005/115] WIP Distinct migration maps --- pipelex/config.py | 20 +++++++++++++------- pipelex/core/validation.py | 9 ++++++--- pipelex/libraries/library_manager.py | 8 ++++---- pipelex/pipelex.py | 6 +++--- pipelex/pipelex.toml | 7 +++++-- 5 files changed, 31 insertions(+), 19 deletions(-) diff --git a/pipelex/config.py b/pipelex/config.py index 39e4a0bc5..db7572ef3 100644 --- a/pipelex/config.py +++ b/pipelex/config.py @@ -120,13 +120,19 @@ class Pipelex(ConfigModel): class MigrationConfig(ConfigModel): - renaming_map: dict[str, str] - - def text_in_renaming_keys(self, text: str) -> list[tuple[str, str]]: - return [(key, value) for key, value in self.renaming_map.items() if text in key] - - def text_in_renaming_values(self, text: str) -> list[tuple[str, str]]: - return [(key, value) for key, value in self.renaming_map.items() if text in value] + migration_maps: dict[str, dict[str, str]] + + def text_in_renaming_keys(self, category: str, text: str) -> list[tuple[str, str]]: + renaming_map = self.migration_maps.get(category) + if not renaming_map: + return [] + return [(key, value) for key, value in renaming_map.items() if text in key] + + def text_in_renaming_values(self, category: str, text: str) -> list[tuple[str, str]]: + renaming_map = self.migration_maps.get(category) + if not renaming_map: + return [] + return [(key, value) for key, value in renaming_map.items() if text in value] class PipelexConfig(ConfigRoot): diff --git a/pipelex/core/validation.py b/pipelex/core/validation.py index 5d464991d..eb8350e53 100644 --- a/pipelex/core/validation.py +++ b/pipelex/core/validation.py @@ -1,24 +1,27 @@ from pydantic import ValidationError +from pipelex import log from pipelex.config import get_config from pipelex.tools.typing.pydantic_utils import analyze_pydantic_validation_error -def report_validation_error(validation_error: ValidationError) -> str: +def report_validation_error(category: str, validation_error: ValidationError) -> str: validation_error_analysis = analyze_pydantic_validation_error(validation_error) migration_config = get_config().migration migration_reports: list[str] = [] + log.debug(validation_error_analysis.missing_fields, title="Missing fields") for missing_field in validation_error_analysis.missing_fields: text = missing_field.split(".")[-1] - if renamings := migration_config.text_in_renaming_values(text=text): + if renamings := migration_config.text_in_renaming_values(category=category, text=text): renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings) migration_reports.append(f"Missing field '{missing_field}' is possibly a new name related to one of these renamings:\n{renamings_str}") + log.debug(validation_error_analysis.extra_fields, title="Extra fields") for extra_field in validation_error_analysis.extra_fields: text = extra_field.split(".")[-1] - if renamings := migration_config.text_in_renaming_keys(text=text): + if renamings := migration_config.text_in_renaming_keys(category=category, text=text): renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings) migration_reports.append( f"Extra field '{extra_field}' is possibly an old deprecated name related to one of these renamings:\n{renamings_str}" diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 74bc70c16..48a409cbb 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -250,7 +250,7 @@ def load_libraries( msg = f"Could not load PLX blueprint from '{plx_file_path}': {pipe_def_error}" raise LibraryLoadingError(msg) from pipe_def_error except ValidationError as validation_error: - validation_error_msg = report_validation_error(validation_error=validation_error) + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) msg = f"Could not load PLX blueprint from '{plx_file_path}' because of: {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error blueprint.source = str(plx_file_path) @@ -265,7 +265,7 @@ def load_libraries( msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {domain_def_error}" raise LibraryLoadingError(msg) from domain_def_error except ValidationError as validation_error: - validation_error_msg = report_validation_error(validation_error=validation_error) + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_domains.append(domain) @@ -280,7 +280,7 @@ def load_libraries( msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {concept_def_error}" raise LibraryLoadingError(msg) from concept_def_error except ValidationError as validation_error: - validation_error_msg = report_validation_error(validation_error=validation_error) + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_concepts.extend(concepts) @@ -295,7 +295,7 @@ def load_libraries( msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {pipe_def_error}" raise LibraryLoadingError(msg) from pipe_def_error except ValidationError as validation_error: - validation_error_msg = report_validation_error(validation_error=validation_error) + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_pipes.extend(pipes) diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index baeec5cff..71f9cfccd 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -90,7 +90,7 @@ def __init__( try: self.pipelex_hub.setup_config(config_cls=config_cls or PipelexConfig) except ValidationError as validation_error: - validation_error_msg = report_validation_error(validation_error=validation_error) + validation_error_msg = report_validation_error(category="config", validation_error=validation_error) msg = f"Could not setup config because of: {validation_error_msg}" raise PipelexConfigError(msg) from validation_error @@ -196,7 +196,7 @@ def setup( if not isinstance(cause_exc, ValidationError): msg += f"\nUnxpexted cause:{cause_exc}" raise PipelexSetupError(msg) from cause_exc - validation_error_msg = msg + "\n" + report_validation_error(validation_error=cause_exc) + validation_error_msg = msg + "\n" + report_validation_error(category="config", validation_error=cause_exc) raise PipelexSetupError(validation_error_msg) from backends_validation_exc except InferenceBackendCredentialsError as credentials_exc: backend_name = credentials_exc.backend_name @@ -248,7 +248,7 @@ def validate_libraries(self): try: self.library_manager.validate_libraries() except ValidationError as validation_error: - validation_error_msg = report_validation_error(validation_error=validation_error) + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) msg = f"Could not validate libraries because of: {validation_error_msg}" raise PipelexSetupError(msg) from validation_error log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}") diff --git a/pipelex/pipelex.toml b/pipelex/pipelex.toml index 8cb8098c3..c080bf9ed 100644 --- a/pipelex/pipelex.toml +++ b/pipelex/pipelex.toml @@ -281,11 +281,14 @@ field_ordering = ["type", "description", "inputs", "output"] # Migration config #################################################################################################### -[migration.renaming_map] +[migration.migration_maps.config] definition = "description" ocr_config = "extract_config" ocr = "extract" -# ocr = "model" + +[migration.migration_maps.plx] +img_gen = "model" +ocr = "model" llm_handle = "model" llm = "model" llm_to_structure = "model_to_structure" From 80322745d8649336e9fe32f4db89a6ff3dc3e8f4 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 9 Oct 2025 18:08:47 +0200 Subject: [PATCH 006/115] conver info to debug log --- pipelex/pipe_operators/llm/llm_prompt_blueprint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py index 2df0dc2da..07a4cf0e4 100644 --- a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py +++ b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py @@ -163,8 +163,8 @@ async def _unravel_text( jinja2_blueprint.templating_style = templating_style log.verbose(f"Setting prompting style to {templating_style}") - log.info(f"extra_params: {extra_params}") - log.info(f"jinja2_blueprint.extra_context: {jinja2_blueprint.extra_context}") + log.debug(f"extra_params in _unravel_text: {extra_params}") + log.debug(f"jinja2_blueprint.extra_context in _unravel_text: {jinja2_blueprint.extra_context}") context: dict[str, Any] = context_provider.generate_jinja2_context() if extra_params: From 86616a7b33c22c4ea3429c5b0b176f0f014f20be Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 9 Oct 2025 23:32:17 +0200 Subject: [PATCH 007/115] add pipeline_run_id --- pipelex/pipe_run/pipe_router_protocol.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pipelex/pipe_run/pipe_router_protocol.py b/pipelex/pipe_run/pipe_router_protocol.py index 9cf72eeac..bc98b6788 100644 --- a/pipelex/pipe_run/pipe_router_protocol.py +++ b/pipelex/pipe_run/pipe_router_protocol.py @@ -13,6 +13,7 @@ async def _before_run( pipe_job: PipeJob, ) -> None: payload: PayloadType = { + "pipeline_run_id": pipe_job.job_metadata.pipeline_run_id, "pipe_job": pipe_job, } await self.observer_provider.observe_before_run(payload) @@ -23,6 +24,7 @@ async def _after_successful_run( pipe_output: PipeOutput, ) -> None: payload: PayloadType = { + "pipeline_run_id": pipe_job.job_metadata.pipeline_run_id, "pipe_job": pipe_job, "pipe_output": pipe_output, } @@ -34,6 +36,7 @@ async def _after_failing_run( error: Exception, ) -> None: payload: PayloadType = { + "pipeline_run_id": pipe_job.job_metadata.pipeline_run_id, "pipe_job": pipe_job, "error": error, } From 2d4ecc06877388e1e7a2a286d1d2c43fe39586b0 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 10:18:34 +0200 Subject: [PATCH 008/115] Better detect migrations, regroup hints --- pipelex/core/validation.py | 44 ++++++++++++++++++++++++++++++++------ pipelex/pipelex.py | 4 +++- pipelex/pipelex.toml | 4 ++++ 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/pipelex/core/validation.py b/pipelex/core/validation.py index eb8350e53..75d7727db 100644 --- a/pipelex/core/validation.py +++ b/pipelex/core/validation.py @@ -11,21 +11,51 @@ def report_validation_error(category: str, validation_error: ValidationError) -> migration_config = get_config().migration migration_reports: list[str] = [] + + # Build field-to-renamings mapping for missing fields log.debug(validation_error_analysis.missing_fields, title="Missing fields") + missing_field_renamings: dict[tuple[tuple[str, str], ...], list[str]] = {} for missing_field in validation_error_analysis.missing_fields: text = missing_field.split(".")[-1] if renamings := migration_config.text_in_renaming_values(category=category, text=text): - renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings) - migration_reports.append(f"Missing field '{missing_field}' is possibly a new name related to one of these renamings:\n{renamings_str}") + # Use tuple of renamings as key for grouping + renamings_key = tuple(renamings) + if renamings_key not in missing_field_renamings: + missing_field_renamings[renamings_key] = [] + missing_field_renamings[renamings_key].append(missing_field) + # Build field-to-renamings mapping for extra fields log.debug(validation_error_analysis.extra_fields, title="Extra fields") + extra_field_renamings: dict[tuple[tuple[str, str], ...], list[str]] = {} for extra_field in validation_error_analysis.extra_fields: - text = extra_field.split(".")[-1] + # Extract field path before the colon (extra fields include ": value") + field_path = extra_field.split(":")[0].strip() + text = field_path.split(".")[-1] if renamings := migration_config.text_in_renaming_keys(category=category, text=text): - renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings) - migration_reports.append( - f"Extra field '{extra_field}' is possibly an old deprecated name related to one of these renamings:\n{renamings_str}" - ) + renamings_key = tuple(renamings) + if renamings_key not in extra_field_renamings: + extra_field_renamings[renamings_key] = [] + extra_field_renamings[renamings_key].append(extra_field) + + # Format grouped output for missing fields + for renamings_tuple, fields in missing_field_renamings.items(): + renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings_tuple) + if len(fields) == 1: + msg = f"Missing field '{fields[0]}' is possibly a new name related to one of these renamings:\n{renamings_str}" + else: + fields_str = ", ".join(f"'{f}'" for f in fields) + msg = f"Missing fields [{fields_str}] are possibly new names related to one of these renamings:\n{renamings_str}" + migration_reports.append(msg) + + # Format grouped output for extra fields + for renamings_tuple, fields in extra_field_renamings.items(): + renamings_str = "\n".join(f"• '{key}' -> '{value}'" for key, value in renamings_tuple) + if len(fields) == 1: + msg = f"Extra field '{fields[0]}' is possibly an old deprecated name related to one of these renamings:\n{renamings_str}" + else: + fields_str = ", ".join(f"'{f}'" for f in fields) + msg = f"Extra fields [{fields_str}] are possibly old deprecated names related to one of these renamings:\n{renamings_str}" + migration_reports.append(msg) report_msg = validation_error_analysis.error_msg if migration_reports: diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index 71f9cfccd..0da460e98 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -183,12 +183,13 @@ def setup( ) raise PipelexSetupError(msg) from backends_not_found_exc except (RoutingProfileValidationError, InferenceBackendLibraryValidationError, ModelDeckValidationError) as backends_validation_exc: - msg = ( + comment_msg = ( "Some config files are invalid for the inference backend library, routing profile library, or model deck. " "You can fix them manually, or run `pipelex init config --reset` to regenerate them. " "Note that this command resets all config files to their default values.\n" f"If you need help, drop by our Discord: we're happy to assist: {URLs.discord}.\n" ) + msg = "" cause_exc = backends_validation_exc.__cause__ if cause_exc is None: msg += f"\nUnxpexted cause:{cause_exc}" @@ -197,6 +198,7 @@ def setup( msg += f"\nUnxpexted cause:{cause_exc}" raise PipelexSetupError(msg) from cause_exc validation_error_msg = msg + "\n" + report_validation_error(category="config", validation_error=cause_exc) + validation_error_msg += "\n\n" + comment_msg raise PipelexSetupError(validation_error_msg) from backends_validation_exc except InferenceBackendCredentialsError as credentials_exc: backend_name = credentials_exc.backend_name diff --git a/pipelex/pipelex.toml b/pipelex/pipelex.toml index c080bf9ed..bfc75a161 100644 --- a/pipelex/pipelex.toml +++ b/pipelex/pipelex.toml @@ -285,6 +285,10 @@ field_ordering = ["type", "description", "inputs", "output"] definition = "description" ocr_config = "extract_config" ocr = "extract" +img_gen = "model" +llm_handle = "model" +llm = "model" +llm_to_structure = "model_to_structure" [migration.migration_maps.plx] img_gen = "model" From ba8126527f98a716108c66f089e98d29ea94e26b Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 10:32:17 +0200 Subject: [PATCH 009/115] Cleaner and more specific setup errors --- pipelex/core/validation.py | 2 +- pipelex/pipelex.py | 68 ++++++++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/pipelex/core/validation.py b/pipelex/core/validation.py index 75d7727db..b43cf4a35 100644 --- a/pipelex/core/validation.py +++ b/pipelex/core/validation.py @@ -60,5 +60,5 @@ def report_validation_error(category: str, validation_error: ValidationError) -> report_msg = validation_error_analysis.error_msg if migration_reports: migration_reports_str = "\n".join(migration_reports) - report_msg += "\n\nThe following fields have been renamed in the new version of Pipelex:\n\n" + migration_reports_str + report_msg += "\n\nNote that some fields have been renamed in the new version of Pipelex.\n\n" + migration_reports_str return report_msg diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index 0da460e98..add5570eb 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -161,6 +161,32 @@ def __init__( log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} init done") + @staticmethod + def _get_config_not_found_error_msg(component_name: str) -> str: + """Generate error message for missing config files.""" + return f"Config files are missing for the {component_name}. Run `pipelex init config` to generate the missing files." + + @staticmethod + def _get_validation_error_msg(component_name: str, validation_exc: Exception) -> str: + """Generate error message for invalid config files.""" + msg = "" + cause_exc = validation_exc.__cause__ + if cause_exc is None: + msg += f"\nUnxpexted cause:{cause_exc}" + raise PipelexSetupError(msg) from cause_exc + if not isinstance(cause_exc, ValidationError): + msg += f"\nUnxpexted cause:{cause_exc}" + raise PipelexSetupError(msg) from cause_exc + report = report_validation_error(category="config", validation_error=cause_exc) + return f"""{msg} +{report} + +Config files are invalid for the {component_name}. +You can fix them manually, or run `pipelex init config --reset` to regenerate them. +Note that this command resets all config files to their default values. +If you need help, drop by our Discord: we're happy to assist: {URLs.discord}. +""" + def setup( self, secrets_provider: SecretsProviderAbstract | None = None, @@ -176,30 +202,24 @@ def setup( self.plugin_manager.setup() try: self.models_manager.setup() - except (RoutingProfileLibraryNotFoundError, InferenceBackendLibraryNotFoundError, ModelDeckNotFoundError) as backends_not_found_exc: - msg = ( - "Some config files are missing for the inference backend library, routing profile library, or model deck. " - "Run `pipelex init config` to generate the missing files." - ) - raise PipelexSetupError(msg) from backends_not_found_exc - except (RoutingProfileValidationError, InferenceBackendLibraryValidationError, ModelDeckValidationError) as backends_validation_exc: - comment_msg = ( - "Some config files are invalid for the inference backend library, routing profile library, or model deck. " - "You can fix them manually, or run `pipelex init config --reset` to regenerate them. " - "Note that this command resets all config files to their default values.\n" - f"If you need help, drop by our Discord: we're happy to assist: {URLs.discord}.\n" - ) - msg = "" - cause_exc = backends_validation_exc.__cause__ - if cause_exc is None: - msg += f"\nUnxpexted cause:{cause_exc}" - raise PipelexSetupError(msg) from cause_exc - if not isinstance(cause_exc, ValidationError): - msg += f"\nUnxpexted cause:{cause_exc}" - raise PipelexSetupError(msg) from cause_exc - validation_error_msg = msg + "\n" + report_validation_error(category="config", validation_error=cause_exc) - validation_error_msg += "\n\n" + comment_msg - raise PipelexSetupError(validation_error_msg) from backends_validation_exc + except RoutingProfileLibraryNotFoundError as routing_not_found_exc: + msg = self._get_config_not_found_error_msg("routing profile library") + raise PipelexSetupError(msg) from routing_not_found_exc + except InferenceBackendLibraryNotFoundError as backend_not_found_exc: + msg = self._get_config_not_found_error_msg("inference backend library") + raise PipelexSetupError(msg) from backend_not_found_exc + except ModelDeckNotFoundError as deck_not_found_exc: + msg = self._get_config_not_found_error_msg("model deck") + raise PipelexSetupError(msg) from deck_not_found_exc + except RoutingProfileValidationError as routing_validation_exc: + msg = self._get_validation_error_msg("routing profile library", routing_validation_exc) + raise PipelexSetupError(msg) from routing_validation_exc + except InferenceBackendLibraryValidationError as backend_validation_exc: + msg = self._get_validation_error_msg("inference backend library", backend_validation_exc) + raise PipelexSetupError(msg) from backend_validation_exc + except ModelDeckValidationError as deck_validation_exc: + msg = self._get_validation_error_msg("model deck", deck_validation_exc) + raise PipelexSetupError(msg) from deck_validation_exc except InferenceBackendCredentialsError as credentials_exc: backend_name = credentials_exc.backend_name var_name = credentials_exc.key_name From b2c48c6a73abf07aef6c20f4368ba25a4f8657db Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 11:07:56 +0200 Subject: [PATCH 010/115] Improve error messages --- pipelex/libraries/pipelines/builder/pipe/pipe_design.plx | 4 ++-- pipelex/pipelex.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_design.plx b/pipelex/libraries/pipelines/builder/pipe/pipe_design.plx index a0c0c601f..83754ff1a 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_design.plx +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_design.plx @@ -117,8 +117,8 @@ You will specifically generate the PipeLLM related to this signature: @pipe_signature If it's a structured generation, indicate it in the system_prompt to clarify the task. -If it's to generate free form text, the prompt_template should indicate to be concise. -If it's to generate an image generation, the prompt_template should indicate to be VERY concise and focus and apply the best practice for image generation. +If it's to generate free form text, the prompt should indicate to be concise. +If it's to generate an image generation prompt, the prompt should indicate to be VERY concise and focus and apply the best practice for image generation. Here are the concepts you can use for inputs/outputs: @concept_specs diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index add5570eb..d568828b1 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -219,6 +219,10 @@ def setup( raise PipelexSetupError(msg) from backend_validation_exc except ModelDeckValidationError as deck_validation_exc: msg = self._get_validation_error_msg("model deck", deck_validation_exc) + msg += ( + "\n\nIf you added your own config files to the model deck then they won't be fixed automatically, " + "you'll have to change them manually, sorry about that." + ) raise PipelexSetupError(msg) from deck_validation_exc except InferenceBackendCredentialsError as credentials_exc: backend_name = credentials_exc.backend_name From 459b684f57284ebe3615a5bdebb80c4151a7beb7 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 11:45:43 +0200 Subject: [PATCH 011/115] Rename pipe category --- pipelex/core/pipes/pipe_blueprint.py | 6 +- pipelex/language/plx_factory.py | 20 ++++- .../pipelines/builder/builder_loop.py | 2 +- .../pipelines/builder/flow_factory.py | 4 +- .../pipelines/builder/pipe/pipe_batch_spec.py | 4 +- .../builder/pipe/pipe_compose_spec.py | 4 +- .../builder/pipe/pipe_condition_spec.py | 4 +- .../builder/pipe/pipe_extract_spec.py | 4 +- .../pipelines/builder/pipe/pipe_func_spec.py | 4 +- .../builder/pipe/pipe_img_gen_spec.py | 4 +- .../pipelines/builder/pipe/pipe_llm_spec.py | 4 +- .../builder/pipe/pipe_parallel_spec.py | 4 +- .../builder/pipe/pipe_sequence_spec.py | 4 +- .../pipelines/builder/pipe/pipe_signature.py | 6 +- .../batch/pipe_batch_blueprint.py | 2 +- .../condition/pipe_condition_blueprint.py | 2 +- .../parallel/pipe_parallel_blueprint.py | 2 +- .../sequence/pipe_sequence_blueprint.py | 2 +- .../compose/pipe_compose_blueprint.py | 2 +- .../extract/pipe_extract_blueprint.py | 2 +- .../func/pipe_func_blueprint.py | 2 +- .../img_gen/pipe_img_gen_blueprint.py | 2 +- .../pipe_operators/llm/pipe_llm_blueprint.py | 2 +- .../pipelex/pipes/test_flow_factory.py | 6 +- .../core/bundles/test_data_pipe_sorter.py | 78 +++++++++---------- .../unit/pipelex/language/test_plx_factory.py | 6 +- .../pipe_controller/pipe_batch/test_data.py | 4 +- .../pipe_condition/test_data.py | 2 +- .../pipe_parallel/test_data.py | 6 +- .../pipe_sequence/test_data.py | 4 +- .../pipe_operator/pipe_compose/test_data.py | 2 +- .../pipe_operator/pipe_extract/test_data.py | 4 +- .../pipe/pipe_operator/pipe_func/test_data.py | 4 +- .../pipe_operator/pipe_img_gen/test_data.py | 4 +- .../pipelines/builder/pipe/test_data_pipe.py | 12 +-- 35 files changed, 119 insertions(+), 105 deletions(-) diff --git a/pipelex/core/pipes/pipe_blueprint.py b/pipelex/core/pipes/pipe_blueprint.py index a825fad06..2e803dafb 100644 --- a/pipelex/core/pipes/pipe_blueprint.py +++ b/pipelex/core/pipes/pipe_blueprint.py @@ -1,6 +1,6 @@ from typing import Any -from pydantic import BaseModel, ConfigDict, field_validator +from pydantic import BaseModel, ConfigDict, Field, field_validator from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.pipes.exceptions import PipeBlueprintError @@ -55,7 +55,7 @@ def value_list(cls) -> list[str]: class PipeBlueprint(BaseModel): model_config = ConfigDict(extra="forbid") source: str | None = None - category: Any + pipe_category: Any = Field(exclude=True) # Technical field for Union discrimination, not user-facing type: Any # TODO: Find a better way to handle this. description: str | None = None inputs: dict[str, str | InputRequirementBlueprint] | None = None @@ -94,7 +94,7 @@ def validate_pipe_type(value: Any) -> Any: raise PipeBlueprintError(msg) return value - @field_validator("category", mode="after") + @field_validator("pipe_category", mode="after") @staticmethod def validate_pipe_category(value: Any) -> Any: """Validate that the pipe category is one of the allowed values.""" diff --git a/pipelex/language/plx_factory.py b/pipelex/language/plx_factory.py index 2d9fd666e..02b34518b 100644 --- a/pipelex/language/plx_factory.py +++ b/pipelex/language/plx_factory.py @@ -22,6 +22,7 @@ class SectionKey(StrEnum): CONCEPT_STRUCTURE_FIELD_KEY = "structure" PIPE_TEMPLATE_FIELD_KEY = "template" +PIPE_CATEGORY_FIELD_KEY = "pipe_category" class PlxFactory: @@ -102,7 +103,7 @@ def convert_mapping_to_table( # If field ordering is provided, add fields in the specified order first if field_ordering: for field_key in field_ordering: - if field_key in mapping and field_key != "category": # Skip category field (pipe metadata) + if field_key in mapping and field_key != PIPE_CATEGORY_FIELD_KEY: # Skip category field (pipe metadata) field_value = mapping[field_key] if isinstance(field_value, Mapping): # Special handling for template field - create nested table instead of inline @@ -117,7 +118,7 @@ def convert_mapping_to_table( # Add any remaining fields not in the ordering for field_key, field_value in mapping.items(): - if field_key not in field_ordering and field_key != "category": + if field_key not in field_ordering and field_key != PIPE_CATEGORY_FIELD_KEY: if isinstance(field_value, Mapping): # Special handling for template field - create nested table instead of inline if field_key == PIPE_TEMPLATE_FIELD_KEY: @@ -132,7 +133,7 @@ def convert_mapping_to_table( # No field ordering provided, use original logic for field_key, field_value in mapping.items(): # Skip the category field as it's not needed in PLX output (pipe metadata) - if field_key == "category": + if field_key == PIPE_CATEGORY_FIELD_KEY: continue if isinstance(field_value, Mapping): @@ -328,7 +329,20 @@ def dict_to_plx_styled_toml(cls, data: Mapping[str, Any]) -> str: return cls.add_spaces_to_inline_tables(toml_output) return toml_output + # @classmethod + # def _remove_pipe_category_from_pipes(cls, data: dict[str, Any]) -> dict[str, Any]: + # """Remove the technical pipe_category field from all pipe definitions.""" + # if "pipe" in data and isinstance(data["pipe"], dict): + # pipe_section = cast("dict[str, Any]", data["pipe"]) + # for pipe_def in pipe_section.values(): + # if isinstance(pipe_def, dict): + # pipe_def_dict = cast("dict[str, Any]", pipe_def) + # if "pipe_category" in pipe_def_dict: + # del pipe_def_dict["pipe_category"] + # return data + @classmethod def make_plx_content(cls, blueprint: PipelexBundleBlueprint) -> str: blueprint_dict = blueprint.model_dump(serialize_as_any=True) + # blueprint_dict = cls._remove_pipe_category_from_pipes(blueprint_dict) return cls.dict_to_plx_styled_toml(data=blueprint_dict) diff --git a/pipelex/libraries/pipelines/builder/builder_loop.py b/pipelex/libraries/pipelines/builder/builder_loop.py index 0ccca249b..fe0c47d4c 100644 --- a/pipelex/libraries/pipelines/builder/builder_loop.py +++ b/pipelex/libraries/pipelines/builder/builder_loop.py @@ -59,7 +59,7 @@ def _fix_bundle_error(self, bundle_error: PipelexBundleError, pipelex_bundle_spe raise PipelexBundleUnexpectedError(message=msg) from bundle_error match static_error.error_type: case StaticValidationErrorType.MISSING_INPUT_VARIABLE | StaticValidationErrorType.EXTRANEOUS_INPUT_VARIABLE: - if not AllowedPipeCategories.is_controller_by_str(category_str=pipe_spec.category): + if not AllowedPipeCategories.is_controller_by_str(category_str=pipe_spec.pipe_category): msg = ( f"Static validation error: pipelex_bundle_spec had an input requirement error for a pipe spec of type " f"{pipe_spec.type} for considered pipe code: '{static_error.pipe_code}' but it was not a PipeController. " diff --git a/pipelex/libraries/pipelines/builder/flow_factory.py b/pipelex/libraries/pipelines/builder/flow_factory.py index f70701270..917e7164d 100644 --- a/pipelex/libraries/pipelines/builder/flow_factory.py +++ b/pipelex/libraries/pipelines/builder/flow_factory.py @@ -48,7 +48,7 @@ def make_from_bundle_blueprint(bundle_blueprint: PipelexBundleBlueprint) -> Flow if bundle_blueprint.pipe: for pipe_code, pipe_blueprint in bundle_blueprint.pipe.items(): - if pipe_blueprint.category == AllowedPipeCategories.PIPE_CONTROLLER: + if pipe_blueprint.pipe_category == AllowedPipeCategories.PIPE_CONTROLLER: # Keep controllers as-is (they are already blueprints which match spec structure) # Type check to ensure we only assign controller blueprints if isinstance( @@ -88,7 +88,7 @@ def _convert_blueprint_to_signature(pipe_code: str, pipe_blueprint: Any) -> Pipe return PipeSignature( code=pipe_code, - category="PipeSignature", + pipe_category="PipeSignature", type=pipe_blueprint.type, description=pipe_blueprint.description or "", inputs=inputs, diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_batch_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_batch_spec.py index 56b8127ca..e2770bb5f 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_batch_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_batch_spec.py @@ -26,7 +26,7 @@ class PipeBatchSpec(PipeSpec): """ type: Literal["PipeBatch"] = "PipeBatch" - category: Literal["PipeController"] = "PipeController" + pipe_category: Literal["PipeController"] = "PipeController" branch_pipe_code: str = Field( description="The pipe code to execute for each item in the input list. This pipe is instantiated once per item in parallel." ) @@ -44,7 +44,7 @@ def to_blueprint(self) -> PipeBatchBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, branch_pipe_code=self.branch_pipe_code, input_list_name=self.input_list_name, input_item_name=self.input_item_name, diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py index d578720bd..52adf3c6a 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py @@ -77,7 +77,7 @@ class PipeComposeSpec(PipeSpec): """PipeComposeSpec defines a templating operation based on a Jinja2 template.""" type: SkipJsonSchema[Literal["PipeCompose"]] = "PipeCompose" - category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" + pipe_category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" template: str = Field(description="Jinja2 template string") target_format: TargetFormat | str = Field(description="Target format for the output") @@ -106,6 +106,6 @@ def to_blueprint(self) -> PipeComposeBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, template=template_blueprint, ) diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_condition_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_condition_spec.py index 44d6dd7c9..12321c67c 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_condition_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_condition_spec.py @@ -21,7 +21,7 @@ class PipeConditionSpec(PipeSpec): """ type: SkipJsonSchema[Literal["PipeCondition"]] = "PipeCondition" - category: SkipJsonSchema[Literal["PipeController"]] = "PipeController" + pipe_category: SkipJsonSchema[Literal["PipeController"]] = "PipeController" jinja2_expression_template: str = Field(description="Jinja2 expression to evaluate.") outcomes: dict[str, str] = Field(..., description="Mapping `dict[str, str]` of condition to outcomes.") default_outcome: str | SpecialOutcome = Field(description="The fallback outcome if the expression result does not match any key in outcome map.") @@ -34,7 +34,7 @@ def to_blueprint(self) -> PipeConditionBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, expression_template=self.jinja2_expression_template, expression=None, outcomes=self.outcomes, diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py index 7dd4016f4..c28ae447f 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py @@ -44,7 +44,7 @@ class PipeExtractSpec(PipeSpec): """ type: SkipJsonSchema[Literal["PipeExtract"]] = "PipeExtract" - category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" + pipe_category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" extract_skill: ExtractSkill | str = Field(description="Select the most adequate extraction model skill according to the task to be performed.") page_images: bool | None = Field(default=None, description="Whether to include detected images in the Extract output.") page_image_captions: bool | None = Field(default=None, description="Whether to generate captions for detected images using AI.") @@ -83,7 +83,7 @@ def to_blueprint(self) -> PipeExtractBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, model=extract_model_choice, page_images=self.page_images, page_image_captions=self.page_image_captions, diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_func_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_func_spec.py index 16e190ae0..7f569c478 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_func_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_func_spec.py @@ -12,7 +12,7 @@ class PipeFuncSpec(PipeSpec): """PipeFunc enables calling custom functions in the Pipelex framework.""" type: SkipJsonSchema[Literal["PipeFunc"]] = "PipeFunc" - category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" + pipe_category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" function_name: str = Field(description="The name of the function to call.") @override @@ -23,6 +23,6 @@ def to_blueprint(self) -> PipeFuncBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, function_name=self.function_name, ) diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_img_gen_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_img_gen_spec.py index 365744eb5..669e4bf89 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_img_gen_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_img_gen_spec.py @@ -39,7 +39,7 @@ class PipeImgGenSpec(PipeSpec): """ type: Literal["PipeImgGen"] = "PipeImgGen" - category: Literal["PipeOperator"] = "PipeOperator" + pipe_category: Literal["PipeOperator"] = "PipeOperator" img_gen_skill: ImgGenSkill | None = None nb_output: int | None = Field(default=None, ge=1) @@ -60,7 +60,7 @@ def to_blueprint(self) -> PipeImgGenBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, img_gen_prompt=None, img_gen_prompt_var_name=None, model=self.img_gen_skill.model_recommendation if self.img_gen_skill else None, diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_llm_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_llm_spec.py index c9aa5b442..21cca5d56 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_llm_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_llm_spec.py @@ -82,7 +82,7 @@ class PipeLLMSpec(PipeSpec): """ type: SkipJsonSchema[Literal["PipeLLM"]] = "PipeLLM" - category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" + pipe_category: SkipJsonSchema[Literal["PipeOperator"]] = "PipeOperator" llm: LLMSkill | str = Field(description="Select the most adequate LLM model skill according to the task to be performed.") temperature: float | None = Field(default=None, ge=0, le=1) system_prompt: str | None = Field(default=None, description="A system prompt to guide the LLM's behavior, style and skills. Can be a template.") @@ -143,7 +143,7 @@ def to_blueprint(self) -> PipeLLMBlueprint: return PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description=base_blueprint.description, inputs=base_blueprint.inputs, output=base_blueprint.output, diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_parallel_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_parallel_spec.py index c147fdec2..9e09ba112 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_parallel_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_parallel_spec.py @@ -27,7 +27,7 @@ class PipeParallelSpec(PipeSpec): """ type: Literal["PipeParallel"] = "PipeParallel" - category: Literal["PipeController"] = "PipeController" + pipe_category: Literal["PipeController"] = "PipeController" parallels: list[SubPipeSpec] = Field(description="List of SubPipeSpec instances to execute concurrently.") add_each_output: bool = Field(description="Whether to include individual pipe outputs in the combined result.") combined_output: str | None = Field(default=None, description="Optional ConceptCode in PascalCasefor the combined output structure.") @@ -62,7 +62,7 @@ def to_blueprint(self) -> PipeParallelBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, parallels=core_parallels, add_each_output=self.add_each_output, combined_output=self.combined_output, diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_sequence_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_sequence_spec.py index 92856ad32..c7ece8140 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_sequence_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_sequence_spec.py @@ -16,7 +16,7 @@ class PipeSequenceSpec(PipeSpec): """ type: SkipJsonSchema[Literal["PipeSequence"]] = "PipeSequence" - category: SkipJsonSchema[Literal["PipeController"]] = "PipeController" + pipe_category: SkipJsonSchema[Literal["PipeController"]] = "PipeController" steps: list[SubPipeSpec] = Field( description=("List of SubPipeSpec instances to execute sequentially. Each step runs after the previous one completes.") ) @@ -30,6 +30,6 @@ def to_blueprint(self) -> PipeSequenceBlueprint: inputs=base_blueprint.inputs, output=base_blueprint.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, steps=core_steps, ) diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_signature.py b/pipelex/libraries/pipelines/builder/pipe/pipe_signature.py index 2ad9c9bbb..864f3538e 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_signature.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_signature.py @@ -18,8 +18,8 @@ class PipeSignature(StructuredContent): """ code: str = Field(description="Pipe code identifying the pipe. Must be snake_case.") - category: Literal["PipeSignature"] = "PipeSignature" type: AllowedPipeTypes = Field(description="Pipe type.") + pipe_category: Literal["PipeSignature"] = "PipeSignature" description: str = Field(description="What the pipe does") inputs: dict[str, str] = Field( description="Pipe inputs: keys are the input variable_names in snake_case, values are the ConceptCodes in PascalCase." @@ -39,7 +39,7 @@ class PipeSpec(StructuredContent): pipe_code: str = Field(description="Pipe code. Must be snake_case.") type: Any = Field(description=f"Pipe type. It is defined with type `Any` but validated at runtime and it must be one of: {AllowedPipeTypes}") - category: Any = Field( + pipe_category: Any = Field( description=f"Pipe category. It is defined with type `Any` but validated at runtime and it must be one of: {AllowedPipeCategories}" ) description: str | None = Field(description="Natural language description of what the pipe does.") @@ -106,5 +106,5 @@ def to_blueprint(self) -> PipeBlueprint: inputs=converted_inputs, output=self.output, type=self.type, - category=self.category, + pipe_category=self.pipe_category, ) diff --git a/pipelex/pipe_controllers/batch/pipe_batch_blueprint.py b/pipelex/pipe_controllers/batch/pipe_batch_blueprint.py index 7fefdb2cf..e65090aac 100644 --- a/pipelex/pipe_controllers/batch/pipe_batch_blueprint.py +++ b/pipelex/pipe_controllers/batch/pipe_batch_blueprint.py @@ -7,7 +7,7 @@ class PipeBatchBlueprint(PipeBlueprint): type: Literal["PipeBatch"] = "PipeBatch" - category: Literal["PipeController"] = "PipeController" + pipe_category: Literal["PipeController"] = "PipeController" branch_pipe_code: str input_list_name: str | None = None input_item_name: str | None = None diff --git a/pipelex/pipe_controllers/condition/pipe_condition_blueprint.py b/pipelex/pipe_controllers/condition/pipe_condition_blueprint.py index e03136cc8..e8940d992 100644 --- a/pipelex/pipe_controllers/condition/pipe_condition_blueprint.py +++ b/pipelex/pipe_controllers/condition/pipe_condition_blueprint.py @@ -11,7 +11,7 @@ class PipeConditionBlueprint(PipeBlueprint): type: Literal["PipeCondition"] = "PipeCondition" - category: Literal["PipeController"] = "PipeController" + pipe_category: Literal["PipeController"] = "PipeController" expression_template: str | None = None expression: str | None = None outcomes: OutcomeMap = Field(default_factory=OutcomeMap) diff --git a/pipelex/pipe_controllers/parallel/pipe_parallel_blueprint.py b/pipelex/pipe_controllers/parallel/pipe_parallel_blueprint.py index 8fba98166..5c498cf0a 100644 --- a/pipelex/pipe_controllers/parallel/pipe_parallel_blueprint.py +++ b/pipelex/pipe_controllers/parallel/pipe_parallel_blueprint.py @@ -12,7 +12,7 @@ class PipeParallelBlueprint(PipeBlueprint): type: Literal["PipeParallel"] = "PipeParallel" - category: Literal["PipeController"] = "PipeController" + pipe_category: Literal["PipeController"] = "PipeController" parallels: list[SubPipeBlueprint] add_each_output: bool = False combined_output: str | None = None diff --git a/pipelex/pipe_controllers/sequence/pipe_sequence_blueprint.py b/pipelex/pipe_controllers/sequence/pipe_sequence_blueprint.py index 58a498e6c..4df7f4344 100644 --- a/pipelex/pipe_controllers/sequence/pipe_sequence_blueprint.py +++ b/pipelex/pipe_controllers/sequence/pipe_sequence_blueprint.py @@ -8,7 +8,7 @@ class PipeSequenceBlueprint(PipeBlueprint): type: Literal["PipeSequence"] = "PipeSequence" - category: Literal["PipeController"] = "PipeController" + pipe_category: Literal["PipeController"] = "PipeController" steps: list[SubPipeBlueprint] @property diff --git a/pipelex/pipe_operators/compose/pipe_compose_blueprint.py b/pipelex/pipe_operators/compose/pipe_compose_blueprint.py index 22af36c1b..ef2bf87f6 100644 --- a/pipelex/pipe_operators/compose/pipe_compose_blueprint.py +++ b/pipelex/pipe_operators/compose/pipe_compose_blueprint.py @@ -8,7 +8,7 @@ class PipeComposeBlueprint(PipeBlueprint): type: Literal["PipeCompose"] = "PipeCompose" - category: Literal["PipeOperator"] = "PipeOperator" + pipe_category: Literal["PipeOperator"] = "PipeOperator" template: str | TemplateBlueprint @property diff --git a/pipelex/pipe_operators/extract/pipe_extract_blueprint.py b/pipelex/pipe_operators/extract/pipe_extract_blueprint.py index 55a23c72d..518b56296 100644 --- a/pipelex/pipe_operators/extract/pipe_extract_blueprint.py +++ b/pipelex/pipe_operators/extract/pipe_extract_blueprint.py @@ -6,7 +6,7 @@ class PipeExtractBlueprint(PipeBlueprint): type: Literal["PipeExtract"] = "PipeExtract" - category: Literal["PipeOperator"] = "PipeOperator" + pipe_category: Literal["PipeOperator"] = "PipeOperator" model: ExtractModelChoice | None = None page_images: bool | None = None page_image_captions: bool | None = None diff --git a/pipelex/pipe_operators/func/pipe_func_blueprint.py b/pipelex/pipe_operators/func/pipe_func_blueprint.py index 5d9af5be2..723d9a657 100644 --- a/pipelex/pipe_operators/func/pipe_func_blueprint.py +++ b/pipelex/pipe_operators/func/pipe_func_blueprint.py @@ -7,5 +7,5 @@ class PipeFuncBlueprint(PipeBlueprint): type: Literal["PipeFunc"] = "PipeFunc" - category: Literal["PipeOperator"] = "PipeOperator" + pipe_category: Literal["PipeOperator"] = "PipeOperator" function_name: str = Field(description="The name of the function to call.") diff --git a/pipelex/pipe_operators/img_gen/pipe_img_gen_blueprint.py b/pipelex/pipe_operators/img_gen/pipe_img_gen_blueprint.py index 6d88e2357..734e45bd4 100644 --- a/pipelex/pipe_operators/img_gen/pipe_img_gen_blueprint.py +++ b/pipelex/pipe_operators/img_gen/pipe_img_gen_blueprint.py @@ -9,7 +9,7 @@ class PipeImgGenBlueprint(PipeBlueprint): type: Literal["PipeImgGen"] = "PipeImgGen" - category: Literal["PipeOperator"] = "PipeOperator" + pipe_category: Literal["PipeOperator"] = "PipeOperator" img_gen_prompt: str | None = None img_gen_prompt_var_name: str | None = None diff --git a/pipelex/pipe_operators/llm/pipe_llm_blueprint.py b/pipelex/pipe_operators/llm/pipe_llm_blueprint.py index 19ce6aa13..d438e73f4 100644 --- a/pipelex/pipe_operators/llm/pipe_llm_blueprint.py +++ b/pipelex/pipe_operators/llm/pipe_llm_blueprint.py @@ -16,7 +16,7 @@ class StructuringMethod(StrEnum): class PipeLLMBlueprint(PipeBlueprint): type: Literal["PipeLLM"] = "PipeLLM" - category: Literal["PipeOperator"] = "PipeOperator" + pipe_category: Literal["PipeOperator"] = "PipeOperator" model: LLMModelChoice | None = None model_to_structure: LLMModelChoice | None = None diff --git a/tests/integration/pipelex/pipes/test_flow_factory.py b/tests/integration/pipelex/pipes/test_flow_factory.py index c4ce678f0..9b4530b69 100644 --- a/tests/integration/pipelex/pipes/test_flow_factory.py +++ b/tests/integration/pipelex/pipes/test_flow_factory.py @@ -70,8 +70,8 @@ async def test_load_plx_and_convert_to_flow( assert len(flow.flow_elements) > 0 # Log some details about what we found - controller_count = sum(1 for pipe in flow.flow_elements.values() if pipe.category == "PipeController") - operator_count = sum(1 for pipe in flow.flow_elements.values() if pipe.category == "PipeSignature") + controller_count = sum(1 for pipe in flow.flow_elements.values() if pipe.pipe_category == "PipeController") + operator_count = sum(1 for pipe in flow.flow_elements.values() if pipe.pipe_category == "PipeSignature") log.info(f"flow contains {len(flow.flow_elements)} pipes: {controller_count} controllers, {operator_count} operators (as signatures)") @@ -111,7 +111,7 @@ async def test_flow_converts_operators_to_signatures(self): # Find an operator pipe (LLM pipe) - converted to signature operator_pipe = flow.flow_elements.get("summarize_discord_channel_update_for_new_members") assert operator_pipe is not None - assert operator_pipe.category == "PipeSignature" + assert operator_pipe.pipe_category == "PipeSignature" assert operator_pipe.type == "PipeLLM" # Verify it has the signature properties diff --git a/tests/unit/pipelex/core/bundles/test_data_pipe_sorter.py b/tests/unit/pipelex/core/bundles/test_data_pipe_sorter.py index 100f32552..3adeea351 100644 --- a/tests/unit/pipelex/core/bundles/test_data_pipe_sorter.py +++ b/tests/unit/pipelex/core/bundles/test_data_pipe_sorter.py @@ -18,9 +18,9 @@ class PipeSorterTestCases: # Test case 1: No dependencies - all operators NO_DEPENDENCIES_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { - "pipe_c": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="C", inputs={}, output="Text"), - "pipe_a": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="A", inputs={}, output="Text"), - "pipe_b": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="B", inputs={}, output="Text"), + "pipe_c": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="C", inputs={}, output="Text"), + "pipe_a": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="A", inputs={}, output="Text"), + "pipe_b": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="B", inputs={}, output="Text"), } NO_DEPENDENCIES_EXPECTED: ClassVar[list[str]] = ["pipe_a", "pipe_b", "pipe_c"] # Alphabetical order (all are roots) @@ -28,16 +28,16 @@ class PipeSorterTestCases: SIMPLE_CHAIN_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { "pipe_c": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="C depends on B", inputs={}, output="Text", steps=[SubPipeBlueprint(pipe="pipe_b", result="result_b")], ), - "pipe_a": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="A no deps", inputs={}, output="Text"), + "pipe_a": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="A no deps", inputs={}, output="Text"), "pipe_b": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="B depends on A", inputs={}, output="Text", @@ -55,7 +55,7 @@ class PipeSorterTestCases: DIAMOND_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { "pipe_d": PipeParallelBlueprint( type="PipeParallel", - category="PipeController", + pipe_category="PipeController", description="D depends on B and C", inputs={}, output="Text", @@ -65,10 +65,10 @@ class PipeSorterTestCases: ], add_each_output=True, ), - "pipe_a": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="A", inputs={}, output="Text"), + "pipe_a": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="A", inputs={}, output="Text"), "pipe_c": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="C depends on A", inputs={}, output="Text", @@ -76,7 +76,7 @@ class PipeSorterTestCases: ), "pipe_b": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="B depends on A", inputs={}, output="Text", @@ -92,23 +92,23 @@ class PipeSorterTestCases: # Chain 1: A -> B "pipe_b": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="B depends on A", inputs={}, output="Text", steps=[SubPipeBlueprint(pipe="pipe_a", result="result_a")], ), - "pipe_a": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="A", inputs={}, output="Text"), + "pipe_a": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="A", inputs={}, output="Text"), # Chain 2: X -> Y "pipe_y": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="Y depends on X", inputs={}, output="Text", steps=[SubPipeBlueprint(pipe="pipe_x", result="result_x")], ), - "pipe_x": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="X", inputs={}, output="Text"), + "pipe_x": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="X", inputs={}, output="Text"), } # Depth-first from roots (pipe_b, pipe_y alphabetically): pipe_b -> pipe_a, then pipe_y -> pipe_x MULTIPLE_CHAINS_EXPECTED: ClassVar[list[str]] = ["pipe_b", "pipe_a", "pipe_y", "pipe_x"] @@ -117,13 +117,13 @@ class PipeSorterTestCases: PIPE_BATCH_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { "batch_pipe": PipeBatchBlueprint( type="PipeBatch", - category="PipeController", + pipe_category="PipeController", description="Batch depends on process", inputs={}, output="Text", branch_pipe_code="process_item", ), - "process_item": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="Process", inputs={}, output="Text"), + "process_item": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="Process", inputs={}, output="Text"), } PIPE_BATCH_EXPECTED: ClassVar[list[str]] = ["batch_pipe", "process_item"] @@ -131,7 +131,7 @@ class PipeSorterTestCases: PIPE_CONDITION_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { "router": PipeConditionBlueprint( type="PipeCondition", - category="PipeController", + pipe_category="PipeController", description="Routes to different pipes", inputs={}, output="Text", @@ -142,9 +142,9 @@ class PipeSorterTestCases: }, default_outcome="process_default", ), - "process_large": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="Large", inputs={}, output="Text"), - "process_small": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="Small", inputs={}, output="Text"), - "process_default": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="Default", inputs={}, output="Text"), + "process_large": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="Large", inputs={}, output="Text"), + "process_small": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="Small", inputs={}, output="Text"), + "process_default": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="Default", inputs={}, output="Text"), } # Router first, then its dependencies in alphabetical order PIPE_CONDITION_EXPECTED: ClassVar[list[str]] = ["router", "process_default", "process_large", "process_small"] @@ -153,7 +153,7 @@ class PipeSorterTestCases: CIRCULAR_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { "pipe_a": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="A depends on C (circular!)", inputs={}, output="Text", @@ -161,7 +161,7 @@ class PipeSorterTestCases: ), "pipe_b": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="B depends on A", inputs={}, output="Text", @@ -169,7 +169,7 @@ class PipeSorterTestCases: ), "pipe_c": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="C depends on B", inputs={}, output="Text", @@ -181,7 +181,7 @@ class PipeSorterTestCases: MISSING_DEPENDENCY_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { "pipe_b": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="B depends on A and Z (Z doesn't exist)", inputs={}, output="Text", @@ -190,7 +190,7 @@ class PipeSorterTestCases: SubPipeBlueprint(pipe="pipe_z", result="result_z"), # Z doesn't exist in this bundle ], ), - "pipe_a": PipeLLMBlueprint(type="PipeLLM", category="PipeOperator", description="A", inputs={}, output="Text"), + "pipe_a": PipeLLMBlueprint(type="PipeLLM", pipe_category="PipeOperator", description="A", inputs={}, output="Text"), } MISSING_DEPENDENCY_EXPECTED: ClassVar[list[str]] = ["pipe_b", "pipe_a"] # Z is ignored as it's not in the bundle @@ -198,35 +198,35 @@ class PipeSorterTestCases: IMAGE_INVERSION_PIPES: ClassVar[dict[str, PipeBlueprintUnion]] = { "analyze_image_content": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Analyzes the input photo to understand visual elements, composition, mood, etc.", inputs={"input_photo": "Image"}, output="ImageAnalysis", ), "define_opposite_concept": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Determines what constitutes the opposite for the analyzed image", inputs={"image_analysis": "ImageAnalysis"}, output="OppositeDefinition", ), "generate_image_prompt": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Crafts a detailed image generation prompt from the opposite definition", inputs={"opposite_definition": "OppositeDefinition"}, output="ImagePrompt", ), "generate_opposite_image": PipeImgGenBlueprint( type="PipeImgGen", - category="PipeOperator", + pipe_category="PipeOperator", description="Generates the opposite image using AI", inputs={"generation_prompt": "ImagePrompt"}, output="Image", ), "photo_opposite_pipeline": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="Main pipeline that generates the opposite of an input photo", inputs={"input_photo": "Image"}, output="Image", @@ -252,7 +252,7 @@ class PipeSorterTestCases: # Main sequence with nested controllers "main_pipeline": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="Main pipeline with nested controllers", inputs={"input": "Text"}, output="Text", @@ -265,7 +265,7 @@ class PipeSorterTestCases: # Nested sequence "nested_sequence": PipeSequenceBlueprint( type="PipeSequence", - category="PipeController", + pipe_category="PipeController", description="Nested preparation sequence", inputs={"input": "Text"}, output="Text", @@ -277,7 +277,7 @@ class PipeSorterTestCases: # Condition router "router": PipeConditionBlueprint( type="PipeCondition", - category="PipeController", + pipe_category="PipeController", description="Routes based on size", inputs={"prepared": "Text"}, output="Text", @@ -291,42 +291,42 @@ class PipeSorterTestCases: # LLM operators "prepare_data": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Prepare the data", inputs={"input": "Text"}, output="Text", ), "validate_data": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Validate the prepared data", inputs={"prepared_data": "Text"}, output="Text", ), "process_small": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Process small items", inputs={"item": "Text"}, output="Text", ), "process_large": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Process large items", inputs={"item": "Text"}, output="Text", ), "process_default": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Default processing", inputs={"item": "Text"}, output="Text", ), "final_process": PipeLLMBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="Final processing step", inputs={"processed": "Text"}, output="Text", diff --git a/tests/unit/pipelex/language/test_plx_factory.py b/tests/unit/pipelex/language/test_plx_factory.py index 559ffa3b4..3c79312ac 100644 --- a/tests/unit/pipelex/language/test_plx_factory.py +++ b/tests/unit/pipelex/language/test_plx_factory.py @@ -5,7 +5,7 @@ from pytest_mock import MockerFixture from pipelex.language.plx_config import PlxConfig, PlxConfigForConcepts, PlxConfigForPipes, PlxConfigInlineTables, PlxConfigStrings -from pipelex.language.plx_factory import PlxFactory +from pipelex.language.plx_factory import PIPE_CATEGORY_FIELD_KEY, PlxFactory class TestPlxFactoryUnit: @@ -206,13 +206,13 @@ def test_convert_mapping_to_table_skips_category(self, mocker: MockerFixture, mo """Test that category field is skipped.""" _mock_config = mocker.patch.object(PlxFactory, "_plx_config", return_value=mock_plx_config) - mapping = {"field1": "value1", "category": "should_be_skipped", "field2": "value2"} + mapping = {"field1": "value1", PIPE_CATEGORY_FIELD_KEY: "should_be_skipped", "field2": "value2"} result = PlxFactory.convert_mapping_to_table(mapping) assert isinstance(result, tomlkit.items.Table) # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType] assert "field1" in result assert "field2" in result - assert "category" not in result + assert PIPE_CATEGORY_FIELD_KEY not in result def test_add_spaces_to_inline_tables_simple(self): """Test adding spaces to simple inline tables.""" diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_data.py index 05d8a9b64..5a928c830 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_data.py @@ -20,7 +20,7 @@ class PipeBatchTestCases: inputs={"items": InputRequirementBlueprint(concept="ItemList")}, output="ProcessedItems", type="PipeBatch", - category="PipeController", + pipe_category="PipeController", branch_pipe_code="process_item", input_list_name=None, input_item_name=None, @@ -43,7 +43,7 @@ class PipeBatchTestCases: inputs={"data": InputRequirementBlueprint(concept="DataList")}, output="Results", type="PipeBatch", - category="PipeController", + pipe_category="PipeController", branch_pipe_code="transform_data", input_list_name="data_list", input_item_name="current_data", diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_data.py index bfecba1d9..e98a63abd 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_data.py @@ -26,7 +26,7 @@ class PipeConditionTestCases: inputs={"item": InputRequirementBlueprint(concept="Item")}, output="ProcessedItem", type="PipeCondition", - category="PipeController", + pipe_category="PipeController", expression=None, expression_template="{{ item.category }}", outcomes={ diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_data.py index 287352b1d..f846bada6 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_data.py @@ -27,7 +27,7 @@ class PipeParallelTestCases: inputs={"data": InputRequirementBlueprint(concept="Data")}, output="Results", type="PipeParallel", - category="PipeController", + pipe_category="PipeController", parallels=[ SubPipeBlueprint(pipe="analyze_data", result="analysis"), SubPipeBlueprint(pipe="transform_data", result="transformed"), @@ -58,7 +58,7 @@ class PipeParallelTestCases: inputs={"input": InputRequirementBlueprint(concept="Input")}, output="CombinedResult", type="PipeParallel", - category="PipeController", + pipe_category="PipeController", parallels=[ SubPipeBlueprint(pipe="pipe1", result="result1"), SubPipeBlueprint(pipe="pipe2", result="result2"), @@ -88,7 +88,7 @@ class PipeParallelTestCases: inputs={"input": InputRequirementBlueprint(concept="Input")}, output="CombinedResult", type="PipeParallel", - category="PipeController", + pipe_category="PipeController", parallels=[ SubPipeBlueprint(pipe="pipe1", result="result1"), SubPipeBlueprint(pipe="pipe2", result="result2"), diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_data.py index 522994251..cdda545ef 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_data.py @@ -26,7 +26,7 @@ class PipeSequenceTestCases: inputs={"input_data": InputRequirementBlueprint(concept="Text")}, output="ProcessedData", type="PipeSequence", - category="PipeController", + pipe_category="PipeController", steps=[ SubPipeBlueprint(pipe="step1", result="result1"), SubPipeBlueprint(pipe="step2", result="result2"), @@ -57,7 +57,7 @@ class PipeSequenceTestCases: inputs={"items": InputRequirementBlueprint(concept="ItemList")}, output="ProcessedItems", type="PipeSequence", - category="PipeController", + pipe_category="PipeController", steps=[ SubPipeBlueprint(pipe="prepare", result="prepared_items"), SubPipeBlueprint( diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py index d519826f3..9527aaa82 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py @@ -24,7 +24,7 @@ class PipeComposeTestCases: inputs={"data": InputRequirementBlueprint(concept="Data")}, output="RenderedText", type="PipeCompose", - category="PipeOperator", + pipe_category="PipeOperator", template=TemplateBlueprint( source="Hello {{ data.name }}!", category=TemplateCategory.MARKDOWN, diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py index b299d46aa..e26e5bf83 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py @@ -21,7 +21,7 @@ class PipeExtractTestCases: inputs={"image": InputRequirementBlueprint(concept="Image")}, output="ExtractedText", type="PipeExtract", - category="PipeOperator", + pipe_category="PipeOperator", model="base_ocr_mistral", ), ) @@ -44,7 +44,7 @@ class PipeExtractTestCases: inputs={"document": InputRequirementBlueprint(concept="PDF")}, output="PageContent", type="PipeExtract", - category="PipeOperator", + pipe_category="PipeOperator", model="base_ocr_mistral", page_images=True, page_image_captions=True, diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_data.py index f6f796b26..9bd1d6785 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_data.py @@ -20,7 +20,7 @@ class PipeFuncTestCases: inputs={"data": InputRequirementBlueprint(concept="Data")}, output="ProcessedData", type="PipeFunc", - category="PipeOperator", + pipe_category="PipeOperator", function_name="process_data", ), ) @@ -39,7 +39,7 @@ class PipeFuncTestCases: inputs=None, output="GeneratedData", type="PipeFunc", - category="PipeOperator", + pipe_category="PipeOperator", function_name="generate_data", ), ) diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_data.py index 473e393b5..eb7fdeaee 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_data.py @@ -19,7 +19,7 @@ class PipeImgGenTestCases: inputs=None, output="GeneratedImage", type="PipeImgGen", - category="PipeOperator", + pipe_category="PipeOperator", img_gen_prompt=None, img_gen_prompt_var_name=None, model=None, @@ -47,7 +47,7 @@ class PipeImgGenTestCases: inputs={"description": InputRequirementBlueprint(concept="Text")}, output="Image", type="PipeImgGen", - category="PipeOperator", + pipe_category="PipeOperator", img_gen_prompt=None, model=ImgGenSkill.GEN_IMAGE.model_recommendation, aspect_ratio=None, diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_pipe.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_pipe.py index 62a199fe8..93cd684ab 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_pipe.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_pipe.py @@ -11,14 +11,14 @@ class PipeBlueprintTestCases: PipeSpec( pipe_code="simple_pipe", type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="A simple pipe", inputs={"input": "Text"}, output="ProcessedText", ), PipeBlueprint( type="PipeLLM", - category="PipeOperator", + pipe_category="PipeOperator", description="A simple pipe", inputs={"input": InputRequirementBlueprint(concept="Text")}, output="ProcessedText", @@ -30,7 +30,7 @@ class PipeBlueprintTestCases: PipeSpec( pipe_code="pipe_with_requirements", type="PipeFunc", - category="PipeOperator", + pipe_category="PipeOperator", description="Pipe with input requirements", inputs={ "data": "Data", @@ -40,7 +40,7 @@ class PipeBlueprintTestCases: ), PipeBlueprint( type="PipeFunc", - category="PipeOperator", + pipe_category="PipeOperator", description="Pipe with input requirements", inputs={ "data": InputRequirementBlueprint(concept="Data"), @@ -55,14 +55,14 @@ class PipeBlueprintTestCases: PipeSpec( pipe_code="pipe_no_inputs", type="PipeFunc", - category="PipeOperator", + pipe_category="PipeOperator", description="Pipe without inputs", inputs={}, output="GeneratedData", ), PipeBlueprint( type="PipeFunc", - category="PipeOperator", + pipe_category="PipeOperator", description="Pipe without inputs", inputs=None, output="GeneratedData", From a9c56eba9898c0f0386aaac3f9cf4234b1c32d94 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 11:56:23 +0200 Subject: [PATCH 012/115] Finish rename pipe category --- pipelex/cogt/templating/template_blueprint.py | 4 +- pipelex/core/pipes/pipe_abstract.py | 2 +- .../builder/pipe/pipe_compose_spec.py | 2 +- pipelex/pipe_controllers/pipe_controller.py | 2 +- .../pipe_operators/compose/pipe_compose.py | 8 +- .../compose/pipe_compose_blueprint.py | 2 +- .../compose/pipe_compose_factory.py | 2 +- .../llm/llm_prompt_blueprint.py | 6 +- .../pipe_operators/llm/pipe_llm_factory.py | 4 +- pipelex/pipe_operators/pipe_operator.py | 2 +- .../pipelex/pipes/test_pipe_compose.py | 4 +- tests/test_pipelines/discord_newsletter.plx | 2 +- .../{test_jinja2.plx => test_compose.plx} | 2 +- .../pipe_operator/pipe_compose/__init__.py | 0 .../pipes/pipe_operator/pipe_compose/data.py | 140 ++++++++++++++++++ .../pipe_compose/test_pipe_compose_input.py | 55 +++++++ .../pipes/operators/compose/pipe_compose.py | 2 +- .../pipe_operator/pipe_compose/test_data.py | 2 +- 18 files changed, 218 insertions(+), 23 deletions(-) rename tests/test_pipelines/misc_tests/{test_jinja2.plx => test_compose.plx} (96%) create mode 100644 tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/__init__.py create mode 100644 tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/data.py create mode 100644 tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/test_pipe_compose_input.py diff --git a/pipelex/cogt/templating/template_blueprint.py b/pipelex/cogt/templating/template_blueprint.py index 6c9c682aa..4afc27280 100644 --- a/pipelex/cogt/templating/template_blueprint.py +++ b/pipelex/cogt/templating/template_blueprint.py @@ -8,7 +8,7 @@ class TemplateBlueprint(BaseModel): - source: str = Field(description="Raw template source") + template: str = Field(description="Raw template source") templating_style: TemplatingStyle | None = Field(default=None, description="Style of prompting to use (typically for different LLMs)") category: TemplateCategory = Field( description="Category of the template (could also be HTML, MARKDOWN, MERMAID, etc.), influences template rendering rules", @@ -17,5 +17,5 @@ class TemplateBlueprint(BaseModel): @model_validator(mode="after") def validate_template(self) -> "TemplateBlueprint": - check_jinja2_parsing(template_source=self.source, template_category=self.category) + check_jinja2_parsing(template_source=self.template, template_category=self.category) return self diff --git a/pipelex/core/pipes/pipe_abstract.py b/pipelex/core/pipes/pipe_abstract.py index f062d4499..f93a60642 100644 --- a/pipelex/core/pipes/pipe_abstract.py +++ b/pipelex/core/pipes/pipe_abstract.py @@ -16,7 +16,7 @@ class PipeAbstract(ABC, BaseModel): model_config = ConfigDict(strict=True, extra="forbid") - category: Any # Any so that subclasses can put a Literal + pipe_category: Any # Any so that subclasses can put a Literal type: Any # Any so that subclasses can put a Literal code: str domain: str diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py b/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py index 52adf3c6a..a3062b980 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py +++ b/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py @@ -95,7 +95,7 @@ def to_blueprint(self) -> PipeComposeBlueprint: category = target_format.category template_blueprint = TemplateBlueprint( - source=self.template, + template=self.template, templating_style=templating_style, category=category, extra_context=None, diff --git a/pipelex/pipe_controllers/pipe_controller.py b/pipelex/pipe_controllers/pipe_controller.py index e186eeef6..d4d94430a 100644 --- a/pipelex/pipe_controllers/pipe_controller.py +++ b/pipelex/pipe_controllers/pipe_controller.py @@ -12,7 +12,7 @@ class PipeController(PipeAbstract): - category: Literal["PipeController"] = "PipeController" + pipe_category: Literal["PipeController"] = "PipeController" @property def class_name(self) -> str: diff --git a/pipelex/pipe_operators/compose/pipe_compose.py b/pipelex/pipe_operators/compose/pipe_compose.py index 99def447e..523cdebd1 100644 --- a/pipelex/pipe_operators/compose/pipe_compose.py +++ b/pipelex/pipe_operators/compose/pipe_compose.py @@ -44,13 +44,13 @@ class PipeCompose(PipeOperator[PipeComposeOutput]): template: str templating_style: TemplatingStyle | None = None - template_category: TemplateCategory = TemplateCategory.BASIC + category: TemplateCategory = TemplateCategory.BASIC extra_context: dict[str, Any] | None = None @model_validator(mode="after") def validate_template(self) -> Self: try: - check_jinja2_parsing(template_source=self.template, template_category=self.template_category) + check_jinja2_parsing(template_source=self.template, template_category=self.category) except Jinja2TemplateSyntaxError as exc: msg = f"Could not parse template for PipeCompose '{self.code}: {exc}" raise PipeDefinitionError(msg) from exc @@ -92,7 +92,7 @@ def desc(self) -> str: @override def required_variables(self) -> set[str]: required_variables = detect_jinja2_required_variables( - template_category=self.template_category, + template_category=self.category, template_source=self.template, ) return { @@ -125,7 +125,7 @@ async def _run_operator_pipe( context=context, template=self.template, templating_style=self.templating_style, - template_category=self.template_category, + template_category=self.category, ) log.verbose(f"Jinja2 rendered text:\n{jinja2_text}") assert isinstance(jinja2_text, str) diff --git a/pipelex/pipe_operators/compose/pipe_compose_blueprint.py b/pipelex/pipe_operators/compose/pipe_compose_blueprint.py index ef2bf87f6..28927c757 100644 --- a/pipelex/pipe_operators/compose/pipe_compose_blueprint.py +++ b/pipelex/pipe_operators/compose/pipe_compose_blueprint.py @@ -14,7 +14,7 @@ class PipeComposeBlueprint(PipeBlueprint): @property def template_source(self) -> str: if isinstance(self.template, TemplateBlueprint): - return self.template.source + return self.template.template return self.template @property diff --git a/pipelex/pipe_operators/compose/pipe_compose_factory.py b/pipelex/pipe_operators/compose/pipe_compose_factory.py index c63e6f558..5a54dcac4 100644 --- a/pipelex/pipe_operators/compose/pipe_compose_factory.py +++ b/pipelex/pipe_operators/compose/pipe_compose_factory.py @@ -50,7 +50,7 @@ def make_from_blueprint( ), template=preprocessed_template, templating_style=blueprint.templating_style, - template_category=blueprint.template_category, + category=blueprint.template_category, extra_context=blueprint.extra_context, ) diff --git a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py index 2df0dc2da..32b2232b7 100644 --- a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py +++ b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py @@ -34,7 +34,7 @@ def required_variables(self) -> set[str]: required_variables.update(user_images_top_object_name) if self.prompt_blueprint: - template_source = preprocess_template(self.prompt_blueprint.source) + template_source = preprocess_template(self.prompt_blueprint.template) required_variables.update( detect_jinja2_required_variables( template_category=self.prompt_blueprint.category, @@ -43,7 +43,7 @@ def required_variables(self) -> set[str]: ) if self.system_prompt_blueprint: - system_prompt_template_source = preprocess_template(self.system_prompt_blueprint.source) + system_prompt_template_source = preprocess_template(self.system_prompt_blueprint.template) required_variables.update( detect_jinja2_required_variables( template_category=self.system_prompt_blueprint.category, @@ -174,7 +174,7 @@ async def _unravel_text( return await get_content_generator().make_templated_text( context=context, - template=jinja2_blueprint.source, + template=jinja2_blueprint.template, templating_style=self.templating_style, template_category=jinja2_blueprint.category, ) diff --git a/pipelex/pipe_operators/llm/pipe_llm_factory.py b/pipelex/pipe_operators/llm/pipe_llm_factory.py index e9c73e751..c8b277704 100644 --- a/pipelex/pipe_operators/llm/pipe_llm_factory.py +++ b/pipelex/pipe_operators/llm/pipe_llm_factory.py @@ -36,7 +36,7 @@ def make_from_blueprint( if system_prompt: try: system_prompt_jinja2_blueprint = TemplateBlueprint( - source=system_prompt, + template=system_prompt, category=TemplateCategory.LLM_PROMPT, ) except Jinja2TemplateSyntaxError as exc: @@ -50,7 +50,7 @@ def make_from_blueprint( if blueprint.prompt: try: user_text_jinja2_blueprint = TemplateBlueprint( - source=blueprint.prompt, + template=blueprint.prompt, category=TemplateCategory.LLM_PROMPT, ) except Jinja2TemplateSyntaxError as exc: diff --git a/pipelex/pipe_operators/pipe_operator.py b/pipelex/pipe_operators/pipe_operator.py index e4f1629be..8db54169b 100644 --- a/pipelex/pipe_operators/pipe_operator.py +++ b/pipelex/pipe_operators/pipe_operator.py @@ -17,7 +17,7 @@ class PipeOperator(PipeAbstract, Generic[PipeOperatorOutputType]): - category: Literal["PipeOperator"] = "PipeOperator" + pipe_category: Literal["PipeOperator"] = "PipeOperator" @property def class_name(self) -> str: diff --git a/tests/integration/pipelex/pipes/test_pipe_compose.py b/tests/integration/pipelex/pipes/test_pipe_compose.py index 91b90621b..efe306684 100644 --- a/tests/integration/pipelex/pipes/test_pipe_compose.py +++ b/tests/integration/pipelex/pipes/test_pipe_compose.py @@ -32,7 +32,7 @@ async def test_pipe_compose_for_any( pipe_compose_blueprint = PipeComposeBlueprint( description="Jinja2 test for any context", template=TemplateBlueprint( - source=template_source, + template=template_source, templating_style=TemplatingStyle(tag_style=TagStyle.TICKS, text_format=TextFormat.MARKDOWN), category=TemplateCategory.MARKDOWN, extra_context={"place_holder": "[some text from test_pipe_compose_for_any]"}, @@ -63,7 +63,7 @@ async def test_pipe_compose_for_stuff( pipe_compose_blueprint = PipeComposeBlueprint( description="Jinja2 test for stuff context", template=TemplateBlueprint( - source=template_source, + template=template_source, templating_style=TemplatingStyle(tag_style=TagStyle.TICKS, text_format=TextFormat.MARKDOWN), category=TemplateCategory.MARKDOWN, extra_context={"place_holder": "[some text from test_pipe_compose_for_stuff]"}, diff --git a/tests/test_pipelines/discord_newsletter.plx b/tests/test_pipelines/discord_newsletter.plx index 8081a23cf..885ab08f6 100644 --- a/tests/test_pipelines/discord_newsletter.plx +++ b/tests/test_pipelines/discord_newsletter.plx @@ -84,7 +84,7 @@ output = "HtmlNewsletter" [pipe.format_html_newsletter.template] category = "html" -source = """ +template = """

☀️ Weekly Summary

diff --git a/tests/test_pipelines/misc_tests/test_jinja2.plx b/tests/test_pipelines/misc_tests/test_compose.plx similarity index 96% rename from tests/test_pipelines/misc_tests/test_jinja2.plx rename to tests/test_pipelines/misc_tests/test_compose.plx index ca9ef194e..d54f33add 100644 --- a/tests/test_pipelines/misc_tests/test_jinja2.plx +++ b/tests/test_pipelines/misc_tests/test_compose.plx @@ -8,7 +8,7 @@ inputs = { text = "Text" } output = "Text" [pipe.jinja2_test_1.template] -source = """ +template = """ This is a simple test prompt: @text """ diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/__init__.py b/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/data.py b/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/data.py new file mode 100644 index 000000000..b0066b52b --- /dev/null +++ b/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/data.py @@ -0,0 +1,140 @@ +from typing import ClassVar + +from pipelex.cogt.templating.template_blueprint import TemplateBlueprint +from pipelex.cogt.templating.template_category import TemplateCategory +from pipelex.pipe_operators.compose.pipe_compose_blueprint import PipeComposeBlueprint +from pipelex.tools.jinja2.jinja2_errors import Jinja2TemplateSyntaxError + + +class PipeComposeInputTestCases: + """Test cases for PipeCompose input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_SIMPLE_TEMPLATE: ClassVar[tuple[str, PipeComposeBlueprint]] = ( + "valid_simple_template", + PipeComposeBlueprint( + description="Test case: valid_simple_template", + inputs={"name": "native.Text"}, + output="native.Text", + template="Hello {{ name }}!", + ), + ) + + VALID_NO_INPUTS: ClassVar[tuple[str, PipeComposeBlueprint]] = ( + "valid_no_inputs", + PipeComposeBlueprint( + description="Test case: valid_no_inputs", + inputs={}, + output="native.Text", + template="Hello World!", + ), + ) + + VALID_TWO_INPUTS: ClassVar[tuple[str, PipeComposeBlueprint]] = ( + "valid_two_inputs", + PipeComposeBlueprint( + description="Test case: valid_two_inputs", + inputs={"first_name": "native.Text", "last_name": "native.Text"}, + output="native.Text", + template="Hello {{ first_name }} {{ last_name }}!", + ), + ) + + VALID_WITH_TEMPLATE_BLUEPRINT: ClassVar[tuple[str, PipeComposeBlueprint]] = ( + "valid_with_template_blueprint", + PipeComposeBlueprint( + description="Test case: valid_with_template_blueprint", + inputs={"content": "native.Text"}, + output="native.Text", + template=TemplateBlueprint( + template="# Title\n\n{{ content }}", + category=TemplateCategory.MARKDOWN, + ), + ), + ) + + VALID_WITH_JINJA2_CONTROL: ClassVar[tuple[str, PipeComposeBlueprint]] = ( + "valid_with_jinja2_control", + PipeComposeBlueprint( + description="Test case: valid_with_jinja2_control", + inputs={"items": "native.Text"}, + output="native.Text", + template="{% for item in items %}{{ item }}{% endfor %}", + ), + ) + + VALID_WITH_HTML_TEMPLATE: ClassVar[tuple[str, PipeComposeBlueprint]] = ( + "valid_with_html_template", + PipeComposeBlueprint( + description="Test case: valid_with_html_template", + inputs={"title": "native.Text", "body": "native.Text"}, + output="native.Text", + template=TemplateBlueprint( + template="

{{ title }}

{{ body }}

", + category=TemplateCategory.HTML, + ), + ), + ) + + VALID_COMPLEX_JINJA2: ClassVar[tuple[str, PipeComposeBlueprint]] = ( + "valid_complex_jinja2", + PipeComposeBlueprint( + description="Test case: valid_complex_jinja2", + inputs={"user": "native.Text", "items": "native.Text"}, + output="native.Text", + template="Hello {{ user }}!\n{% if items %}Items: {{ items }}{% endif %}", + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeComposeBlueprint]]] = [ + VALID_SIMPLE_TEMPLATE, + VALID_NO_INPUTS, + VALID_TWO_INPUTS, + VALID_WITH_TEMPLATE_BLUEPRINT, + VALID_WITH_JINJA2_CONTROL, + VALID_WITH_HTML_TEMPLATE, + VALID_COMPLEX_JINJA2, + ] + + # Error test cases: (test_id, blueprint, expected_error_type, expected_error_message_fragment) + ERROR_MISSING_INPUT_IN_TEMPLATE: ClassVar[tuple[str, PipeComposeBlueprint, type[Exception], str]] = ( + "missing_input_in_template", + PipeComposeBlueprint( + description="Test case: missing_input_in_template", + inputs={}, + output="native.Text", + template="Hello {{ name }}!", + ), + Exception, # Will be PipeDefinitionError + "Required variable 'name' is not in the inputs", + ) + + ERROR_TWO_INPUTS_THREE_VARIABLES: ClassVar[tuple[str, PipeComposeBlueprint, type[Exception], str]] = ( + "two_inputs_three_variables", + PipeComposeBlueprint( + description="Test case: two_inputs_three_variables", + inputs={"first_name": "native.Text", "last_name": "native.Text"}, + output="native.Text", + template="Hello {{ first_name }} {{ last_name }} from {{ city }}!", + ), + Exception, # Will be PipeDefinitionError + "Required variable 'city' is not in the inputs", + ) + + ERROR_INVALID_JINJA2_SYNTAX: ClassVar[tuple[str, PipeComposeBlueprint, type[Exception], str]] = ( + "invalid_jinja2_syntax", + PipeComposeBlueprint( + description="Test case: invalid_jinja2_syntax", + inputs={"name": "native.Text"}, + output="native.Text", + template="Hello {{ name }!", + ), + Jinja2TemplateSyntaxError, + "Could not parse Jinja2 template", + ) + + ERROR_CASES: ClassVar[list[tuple[str, PipeComposeBlueprint, type[Exception], str]]] = [ + ERROR_MISSING_INPUT_IN_TEMPLATE, + ERROR_TWO_INPUTS_THREE_VARIABLES, + ERROR_INVALID_JINJA2_SYNTAX, + ] diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/test_pipe_compose_input.py b/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/test_pipe_compose_input.py new file mode 100644 index 000000000..7f03f43dc --- /dev/null +++ b/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/test_pipe_compose_input.py @@ -0,0 +1,55 @@ +import pytest + +from pipelex import log +from pipelex.pipe_operators.compose.pipe_compose_blueprint import PipeComposeBlueprint +from pipelex.pipe_operators.compose.pipe_compose_factory import PipeComposeFactory +from tests.unit.pipelex.core.pipes.pipe_operator.pipe_compose.data import PipeComposeInputTestCases + + +class TestPipeComposeValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeComposeInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeComposeBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + # Validation happens automatically during instantiation via model_validator + pipe_compose = PipeComposeFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_compose is not None + assert pipe_compose.code == f"test_pipe_{test_id}" + + @pytest.mark.parametrize( + ("test_id", "blueprint", "expected_error_type", "expected_error_message_fragment"), + PipeComposeInputTestCases.ERROR_CASES, + ) + def test_validate_inputs_error_cases( + self, + test_id: str, + blueprint: PipeComposeBlueprint, + expected_error_type: type[Exception], + expected_error_message_fragment: str, + ): + log.debug(f"Testing error case: {test_id}") + + with pytest.raises(expected_error_type) as exc_info: + PipeComposeFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + error_str = str(exc_info.value) + assert expected_error_message_fragment in error_str, ( + f"Expected fragment '{expected_error_message_fragment}' not found in error message: {error_str}" + ) diff --git a/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py b/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py index aa7bc6dc6..c61f7901a 100644 --- a/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py +++ b/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py @@ -52,7 +52,7 @@ description="Process a Jinja2 template", output=NativeConceptCode.TEXT, template=TemplateBlueprint( - source="Hello {{ name }}!", + template="Hello {{ name }}!", category=TemplateCategory.MARKDOWN, ), ), diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py index 9527aaa82..be167dbfe 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py @@ -26,7 +26,7 @@ class PipeComposeTestCases: type="PipeCompose", pipe_category="PipeOperator", template=TemplateBlueprint( - source="Hello {{ data.name }}!", + template="Hello {{ data.name }}!", category=TemplateCategory.MARKDOWN, templating_style=TemplatingStyle( tag_style=TagStyle.TICKS, From 9175dac73efbd1d4b51ca285a85cf8fecde8291f Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Fri, 10 Oct 2025 11:57:30 +0200 Subject: [PATCH 013/115] fix ut --- .../builder/pipe/pipe_operator/pipe_extract/test_data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py index b299d46aa..c0e81b1a7 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py @@ -12,14 +12,14 @@ class PipeExtractTestCases: pipe_code="extractor", description="Extract text from image", inputs={"image": "Image"}, - output="ExtractedText", + output="Page", extract_skill="extract_text_from_visuals", ), PipeExtractBlueprint( source=None, description="Extract text from image", inputs={"image": InputRequirementBlueprint(concept="Image")}, - output="ExtractedText", + output="Page", type="PipeExtract", category="PipeOperator", model="base_ocr_mistral", @@ -32,7 +32,7 @@ class PipeExtractTestCases: pipe_code="advanced_extract", description="Extract with page options", inputs={"document": "PDF"}, - output="PageContent", + output="Page", extract_skill="extract_text_from_pdf", page_images=True, page_image_captions=True, @@ -42,7 +42,7 @@ class PipeExtractTestCases: source=None, description="Extract with page options", inputs={"document": InputRequirementBlueprint(concept="PDF")}, - output="PageContent", + output="Page", type="PipeExtract", category="PipeOperator", model="base_ocr_mistral", From da2354aa04273dac906e3753561c030567595ab3 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 11:59:08 +0200 Subject: [PATCH 014/115] Fix UT --- .../core/test_data/pipes/operators/compose/pipe_compose.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py b/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py index c61f7901a..e7df0c42d 100644 --- a/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py +++ b/tests/unit/pipelex/core/test_data/pipes/operators/compose/pipe_compose.py @@ -40,7 +40,7 @@ output = "Text" [pipe.compose_output.template] -source = "Hello {{ name }}!" +template = "Hello {{ name }}!" category = "markdown" """, PipelexBundleBlueprint( From df14214fe006b64fa49e31a23625efe52a11cb7a Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 15:01:40 +0200 Subject: [PATCH 015/115] Move UT to correct place --- .../pipes => pipe_operators}/__init__.py | 0 .../pipe_compose}/__init__.py | 0 .../pipe_compose/data.py | 0 .../pipe_compose/test_pipe_compose_input.py | 2 +- .../pipe_extract}/__init__.py | 0 .../pipe_operators/pipe_extract/data.py | 103 ++++++++++++ .../pipe_extract/test_pipe_extract_input.py | 55 ++++++ .../pipe_func}/__init__.py | 0 .../pipelex/pipe_operators/pipe_func/data.py | 71 ++++++++ .../pipe_func/test_pipe_func_input.py | 30 ++++ .../pipe_operators/pipe_img_gen/__init__.py | 0 .../pipe_operators/pipe_img_gen/data.py | 159 ++++++++++++++++++ .../pipe_img_gen/test_pipe_img_gen_input.py | 54 ++++++ .../pipe_operators/pipe_llm/__init__.py | 0 .../pipe_llm/data.py | 0 .../pipe_llm/test_pipe_llm_input.py | 2 +- tests/unit/pipelex/pipe_run/__init__.py | 0 .../pipelex/{core/pipes => pipe_run}/data.py | 0 .../test_pipe_run_params.py | 2 +- 19 files changed, 475 insertions(+), 3 deletions(-) rename tests/unit/pipelex/{core/pipes => pipe_operators}/__init__.py (100%) rename tests/unit/pipelex/{core/pipes/pipe_operator => pipe_operators/pipe_compose}/__init__.py (100%) rename tests/unit/pipelex/{core/pipes/pipe_operator => pipe_operators}/pipe_compose/data.py (100%) rename tests/unit/pipelex/{core/pipes/pipe_operator => pipe_operators}/pipe_compose/test_pipe_compose_input.py (94%) rename tests/unit/pipelex/{core/pipes/pipe_operator/pipe_compose => pipe_operators/pipe_extract}/__init__.py (100%) create mode 100644 tests/unit/pipelex/pipe_operators/pipe_extract/data.py create mode 100644 tests/unit/pipelex/pipe_operators/pipe_extract/test_pipe_extract_input.py rename tests/unit/pipelex/{core/pipes/pipe_operator/pipe_llm => pipe_operators/pipe_func}/__init__.py (100%) create mode 100644 tests/unit/pipelex/pipe_operators/pipe_func/data.py create mode 100644 tests/unit/pipelex/pipe_operators/pipe_func/test_pipe_func_input.py create mode 100644 tests/unit/pipelex/pipe_operators/pipe_img_gen/__init__.py create mode 100644 tests/unit/pipelex/pipe_operators/pipe_img_gen/data.py create mode 100644 tests/unit/pipelex/pipe_operators/pipe_img_gen/test_pipe_img_gen_input.py create mode 100644 tests/unit/pipelex/pipe_operators/pipe_llm/__init__.py rename tests/unit/pipelex/{core/pipes/pipe_operator => pipe_operators}/pipe_llm/data.py (100%) rename tests/unit/pipelex/{core/pipes/pipe_operator => pipe_operators}/pipe_llm/test_pipe_llm_input.py (94%) create mode 100644 tests/unit/pipelex/pipe_run/__init__.py rename tests/unit/pipelex/{core/pipes => pipe_run}/data.py (100%) rename tests/unit/pipelex/{core/pipes => pipe_run}/test_pipe_run_params.py (99%) diff --git a/tests/unit/pipelex/core/pipes/__init__.py b/tests/unit/pipelex/pipe_operators/__init__.py similarity index 100% rename from tests/unit/pipelex/core/pipes/__init__.py rename to tests/unit/pipelex/pipe_operators/__init__.py diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_compose/__init__.py similarity index 100% rename from tests/unit/pipelex/core/pipes/pipe_operator/__init__.py rename to tests/unit/pipelex/pipe_operators/pipe_compose/__init__.py diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/data.py b/tests/unit/pipelex/pipe_operators/pipe_compose/data.py similarity index 100% rename from tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/data.py rename to tests/unit/pipelex/pipe_operators/pipe_compose/data.py diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/test_pipe_compose_input.py b/tests/unit/pipelex/pipe_operators/pipe_compose/test_pipe_compose_input.py similarity index 94% rename from tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/test_pipe_compose_input.py rename to tests/unit/pipelex/pipe_operators/pipe_compose/test_pipe_compose_input.py index 7f03f43dc..dbf8aa90c 100644 --- a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/test_pipe_compose_input.py +++ b/tests/unit/pipelex/pipe_operators/pipe_compose/test_pipe_compose_input.py @@ -3,7 +3,7 @@ from pipelex import log from pipelex.pipe_operators.compose.pipe_compose_blueprint import PipeComposeBlueprint from pipelex.pipe_operators.compose.pipe_compose_factory import PipeComposeFactory -from tests.unit.pipelex.core.pipes.pipe_operator.pipe_compose.data import PipeComposeInputTestCases +from tests.unit.pipelex.pipe_operators.pipe_compose.data import PipeComposeInputTestCases class TestPipeComposeValidateInputs: diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_extract/__init__.py similarity index 100% rename from tests/unit/pipelex/core/pipes/pipe_operator/pipe_compose/__init__.py rename to tests/unit/pipelex/pipe_operators/pipe_extract/__init__.py diff --git a/tests/unit/pipelex/pipe_operators/pipe_extract/data.py b/tests/unit/pipelex/pipe_operators/pipe_extract/data.py new file mode 100644 index 000000000..230517642 --- /dev/null +++ b/tests/unit/pipelex/pipe_operators/pipe_extract/data.py @@ -0,0 +1,103 @@ +from typing import ClassVar + +from pipelex.pipe_operators.extract.pipe_extract_blueprint import PipeExtractBlueprint + + +class PipeExtractInputTestCases: + """Test cases for PipeExtract input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_IMAGE_INPUT: ClassVar[tuple[str, PipeExtractBlueprint]] = ( + "valid_image_input", + PipeExtractBlueprint( + description="Test case: valid_image_input", + inputs={"document_image": "native.Image"}, + output="native.Page", + ), + ) + + VALID_PDF_INPUT: ClassVar[tuple[str, PipeExtractBlueprint]] = ( + "valid_pdf_input", + PipeExtractBlueprint( + description="Test case: valid_pdf_input", + inputs={"document": "native.PDF"}, + output="native.Page", + ), + ) + + VALID_IMAGE_WITH_PAGE_IMAGES: ClassVar[tuple[str, PipeExtractBlueprint]] = ( + "valid_image_with_page_images", + PipeExtractBlueprint( + description="Test case: valid_image_with_page_images", + inputs={"invoice_image": "native.Image"}, + output="native.Page", + page_images=True, + ), + ) + + VALID_PDF_WITH_PAGE_VIEWS: ClassVar[tuple[str, PipeExtractBlueprint]] = ( + "valid_pdf_with_page_views", + PipeExtractBlueprint( + description="Test case: valid_pdf_with_page_views", + inputs={"contract": "native.PDF"}, + output="native.Page", + page_views=True, + page_views_dpi=150, + ), + ) + + VALID_IMAGE_WITH_CAPTIONS: ClassVar[tuple[str, PipeExtractBlueprint]] = ( + "valid_image_with_captions", + PipeExtractBlueprint( + description="Test case: valid_image_with_captions", + inputs={"report_image": "native.Image"}, + output="native.Page", + page_images=True, + page_image_captions=True, + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeExtractBlueprint]]] = [ + VALID_IMAGE_INPUT, + VALID_PDF_INPUT, + VALID_IMAGE_WITH_PAGE_IMAGES, + VALID_PDF_WITH_PAGE_VIEWS, + VALID_IMAGE_WITH_CAPTIONS, + ] + + # Error test cases: (test_id, blueprint, expected_error_message_fragment) + ERROR_NO_INPUT: ClassVar[tuple[str, PipeExtractBlueprint, str]] = ( + "no_input", + PipeExtractBlueprint( + description="Test case: no_input", + inputs={}, + output="native.Page", + ), + "missing_input_variable", + ) + + ERROR_TOO_MANY_INPUTS: ClassVar[tuple[str, PipeExtractBlueprint, str]] = ( + "too_many_inputs", + PipeExtractBlueprint( + description="Test case: too_many_inputs", + inputs={"image1": "native.Image", "image2": "native.Image"}, + output="native.Page", + ), + "too_many_candidate_inputs", + ) + + ERROR_WRONG_INPUT_TYPE: ClassVar[tuple[str, PipeExtractBlueprint, str]] = ( + "wrong_input_type", + PipeExtractBlueprint( + description="Test case: wrong_input_type", + inputs={"text_doc": "native.Text"}, + output="native.Page", + ), + "inadequate_input_concept", + ) + + ERROR_CASES: ClassVar[list[tuple[str, PipeExtractBlueprint, str]]] = [ + ERROR_NO_INPUT, + ERROR_TOO_MANY_INPUTS, + ERROR_WRONG_INPUT_TYPE, + ] diff --git a/tests/unit/pipelex/pipe_operators/pipe_extract/test_pipe_extract_input.py b/tests/unit/pipelex/pipe_operators/pipe_extract/test_pipe_extract_input.py new file mode 100644 index 000000000..359460d9b --- /dev/null +++ b/tests/unit/pipelex/pipe_operators/pipe_extract/test_pipe_extract_input.py @@ -0,0 +1,55 @@ +import pytest + +from pipelex import log +from pipelex.exceptions import StaticValidationError +from pipelex.pipe_operators.extract.pipe_extract_blueprint import PipeExtractBlueprint +from pipelex.pipe_operators.extract.pipe_extract_factory import PipeExtractFactory +from tests.unit.pipelex.pipe_operators.pipe_extract.data import PipeExtractInputTestCases + + +class TestPipeExtractValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeExtractInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeExtractBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + # Validation happens automatically during instantiation via model_validator + pipe_extract = PipeExtractFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_extract is not None + assert pipe_extract.code == f"test_pipe_{test_id}" + + @pytest.mark.parametrize( + ("test_id", "blueprint", "expected_error_message_fragment"), + PipeExtractInputTestCases.ERROR_CASES, + ) + def test_validate_inputs_error_cases( + self, + test_id: str, + blueprint: PipeExtractBlueprint, + expected_error_message_fragment: str, + ): + log.debug(f"Testing error case: {test_id}") + + with pytest.raises(StaticValidationError) as exc_info: + PipeExtractFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + error_str = str(exc_info.value) + assert expected_error_message_fragment in error_str, ( + f"Expected fragment '{expected_error_message_fragment}' not found in error message: {error_str}" + ) diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_llm/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_func/__init__.py similarity index 100% rename from tests/unit/pipelex/core/pipes/pipe_operator/pipe_llm/__init__.py rename to tests/unit/pipelex/pipe_operators/pipe_func/__init__.py diff --git a/tests/unit/pipelex/pipe_operators/pipe_func/data.py b/tests/unit/pipelex/pipe_operators/pipe_func/data.py new file mode 100644 index 000000000..6572cff4b --- /dev/null +++ b/tests/unit/pipelex/pipe_operators/pipe_func/data.py @@ -0,0 +1,71 @@ +from typing import ClassVar + +from pipelex.pipe_operators.func.pipe_func_blueprint import PipeFuncBlueprint + + +class PipeFuncInputTestCases: + """Test cases for PipeFunc input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_NO_INPUTS: ClassVar[tuple[str, PipeFuncBlueprint]] = ( + "valid_no_inputs", + PipeFuncBlueprint( + description="Test case: valid_no_inputs", + inputs={}, + output="native.Text", + function_name="my_function", + ), + ) + + VALID_SINGLE_INPUT: ClassVar[tuple[str, PipeFuncBlueprint]] = ( + "valid_single_input", + PipeFuncBlueprint( + description="Test case: valid_single_input", + inputs={"input_data": "native.Text"}, + output="native.Text", + function_name="process_text", + ), + ) + + VALID_MULTIPLE_INPUTS: ClassVar[tuple[str, PipeFuncBlueprint]] = ( + "valid_multiple_inputs", + PipeFuncBlueprint( + description="Test case: valid_multiple_inputs", + inputs={"text_input": "native.Text", "number_input": "native.Number"}, + output="native.Text", + function_name="combine_data", + ), + ) + + VALID_IMAGE_INPUT: ClassVar[tuple[str, PipeFuncBlueprint]] = ( + "valid_image_input", + PipeFuncBlueprint( + description="Test case: valid_image_input", + inputs={"image": "native.Image"}, + output="native.Text", + function_name="process_image", + ), + ) + + VALID_MIXED_INPUTS: ClassVar[tuple[str, PipeFuncBlueprint]] = ( + "valid_mixed_inputs", + PipeFuncBlueprint( + description="Test case: valid_mixed_inputs", + inputs={"text": "native.Text", "image": "native.Image", "number": "native.Number"}, + output="native.Text", + function_name="process_all", + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeFuncBlueprint]]] = [ + VALID_NO_INPUTS, + VALID_SINGLE_INPUT, + VALID_MULTIPLE_INPUTS, + VALID_IMAGE_INPUT, + VALID_MIXED_INPUTS, + ] + + # Note: PipeFunc has minimal validation since it's very flexible + # The main validation is that function_name is required (enforced by Pydantic) + # We don't have error cases for inputs since any input configuration is valid + ERROR_CASES: ClassVar[list[tuple[str, PipeFuncBlueprint, str]]] = [] diff --git a/tests/unit/pipelex/pipe_operators/pipe_func/test_pipe_func_input.py b/tests/unit/pipelex/pipe_operators/pipe_func/test_pipe_func_input.py new file mode 100644 index 000000000..cffd07d99 --- /dev/null +++ b/tests/unit/pipelex/pipe_operators/pipe_func/test_pipe_func_input.py @@ -0,0 +1,30 @@ +import pytest + +from pipelex import log +from pipelex.pipe_operators.func.pipe_func_blueprint import PipeFuncBlueprint +from pipelex.pipe_operators.func.pipe_func_factory import PipeFuncFactory +from tests.unit.pipelex.pipe_operators.pipe_func.data import PipeFuncInputTestCases + + +class TestPipeFuncValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeFuncInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeFuncBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + pipe_func = PipeFuncFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_func is not None + assert pipe_func.code == f"test_pipe_{test_id}" + assert pipe_func.function_name == blueprint.function_name diff --git a/tests/unit/pipelex/pipe_operators/pipe_img_gen/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_img_gen/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/pipe_operators/pipe_img_gen/data.py b/tests/unit/pipelex/pipe_operators/pipe_img_gen/data.py new file mode 100644 index 000000000..5ab79f8a5 --- /dev/null +++ b/tests/unit/pipelex/pipe_operators/pipe_img_gen/data.py @@ -0,0 +1,159 @@ +from typing import ClassVar + +from pipelex.cogt.img_gen.img_gen_job_components import AspectRatio, Background, OutputFormat +from pipelex.pipe_operators.img_gen.pipe_img_gen_blueprint import PipeImgGenBlueprint + + +class PipeImgGenInputTestCases: + """Test cases for PipeImgGen input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_TEXT_INPUT: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_text_input", + PipeImgGenBlueprint( + description="Test case: valid_text_input", + inputs={"prompt": "native.Text"}, + output="native.Image", + ), + ) + + VALID_WITH_INLINE_PROMPT: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_inline_prompt", + PipeImgGenBlueprint( + description="Test case: valid_with_inline_prompt", + inputs={}, + output="native.Image", + img_gen_prompt="A beautiful sunset over the ocean", + ), + ) + + VALID_WITH_ASPECT_RATIO: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_aspect_ratio", + PipeImgGenBlueprint( + description="Test case: valid_with_aspect_ratio", + inputs={"prompt": "native.Text"}, + output="native.Image", + aspect_ratio=AspectRatio.LANDSCAPE_16_9, + ), + ) + + VALID_WITH_NB_OUTPUT: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_nb_output", + PipeImgGenBlueprint( + description="Test case: valid_with_nb_output", + inputs={"prompt": "native.Text"}, + output="native.Image", + nb_output=3, + ), + ) + + VALID_WITH_SEED: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_seed", + PipeImgGenBlueprint( + description="Test case: valid_with_seed", + inputs={"prompt": "native.Text"}, + output="native.Image", + seed=42, + ), + ) + + VALID_WITH_SEED_AUTO: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_seed_auto", + PipeImgGenBlueprint( + description="Test case: valid_with_seed_auto", + inputs={"prompt": "native.Text"}, + output="native.Image", + seed="auto", + ), + ) + + VALID_WITH_BACKGROUND: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_background", + PipeImgGenBlueprint( + description="Test case: valid_with_background", + inputs={"prompt": "native.Text"}, + output="native.Image", + background=Background.TRANSPARENT, + ), + ) + + VALID_WITH_OUTPUT_FORMAT: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_output_format", + PipeImgGenBlueprint( + description="Test case: valid_with_output_format", + inputs={"prompt": "native.Text"}, + output="native.Image", + output_format=OutputFormat.PNG, + ), + ) + + VALID_WITH_IS_RAW: ClassVar[tuple[str, PipeImgGenBlueprint]] = ( + "valid_with_is_raw", + PipeImgGenBlueprint( + description="Test case: valid_with_is_raw", + inputs={"prompt": "native.Text"}, + output="native.Image", + is_raw=True, + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeImgGenBlueprint]]] = [ + VALID_TEXT_INPUT, + VALID_WITH_INLINE_PROMPT, + VALID_WITH_ASPECT_RATIO, + VALID_WITH_NB_OUTPUT, + VALID_WITH_SEED, + VALID_WITH_SEED_AUTO, + VALID_WITH_BACKGROUND, + VALID_WITH_OUTPUT_FORMAT, + VALID_WITH_IS_RAW, + ] + + # Error test cases: (test_id, blueprint, expected_error_message_fragment) + ERROR_NO_INPUT_NO_PROMPT: ClassVar[tuple[str, PipeImgGenBlueprint, str]] = ( + "no_input_no_prompt", + PipeImgGenBlueprint( + description="Test case: no_input_no_prompt", + inputs={}, + output="native.Image", + ), + "missing_input_variable", + ) + + ERROR_MULTIPLE_INPUTS: ClassVar[tuple[str, PipeImgGenBlueprint, str]] = ( + "multiple_inputs", + PipeImgGenBlueprint( + description="Test case: multiple_inputs", + inputs={"prompt1": "native.Text", "prompt2": "native.Text"}, + output="native.Image", + ), + "too_many_candidate_inputs", + ) + + ERROR_WRONG_INPUT_TYPE: ClassVar[tuple[str, PipeImgGenBlueprint, str]] = ( + "wrong_input_type", + PipeImgGenBlueprint( + description="Test case: wrong_input_type", + inputs={"image": "native.Image"}, + output="native.Image", + ), + "inadequate_input_concept", + ) + + ERROR_BOTH_PROMPT_AND_INPUT: ClassVar[tuple[str, PipeImgGenBlueprint, str]] = ( + "both_prompt_and_input", + PipeImgGenBlueprint( + description="Test case: both_prompt_and_input", + inputs={"prompt": "native.Text"}, + output="native.Image", + img_gen_prompt="A beautiful sunset", + ), + "There must be no inputs if img_gen_prompt is provided", + ) + + ERROR_CASES: ClassVar[list[tuple[str, PipeImgGenBlueprint, str]]] = [ + ERROR_NO_INPUT_NO_PROMPT, + ERROR_MULTIPLE_INPUTS, + ERROR_WRONG_INPUT_TYPE, + ERROR_BOTH_PROMPT_AND_INPUT, + ] diff --git a/tests/unit/pipelex/pipe_operators/pipe_img_gen/test_pipe_img_gen_input.py b/tests/unit/pipelex/pipe_operators/pipe_img_gen/test_pipe_img_gen_input.py new file mode 100644 index 000000000..ca07c75c5 --- /dev/null +++ b/tests/unit/pipelex/pipe_operators/pipe_img_gen/test_pipe_img_gen_input.py @@ -0,0 +1,54 @@ +import pytest + +from pipelex import log +from pipelex.exceptions import PipeDefinitionError, StaticValidationError +from pipelex.pipe_operators.img_gen.pipe_img_gen_blueprint import PipeImgGenBlueprint +from pipelex.pipe_operators.img_gen.pipe_img_gen_factory import PipeImgGenFactory +from tests.unit.pipelex.pipe_operators.pipe_img_gen.data import PipeImgGenInputTestCases + + +class TestPipeImgGenValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeImgGenInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeImgGenBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + pipe_img_gen = PipeImgGenFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_img_gen is not None + assert pipe_img_gen.code == f"test_pipe_{test_id}" + + @pytest.mark.parametrize( + ("test_id", "blueprint", "expected_error_message_fragment"), + PipeImgGenInputTestCases.ERROR_CASES, + ) + def test_validate_inputs_error_cases( + self, + test_id: str, + blueprint: PipeImgGenBlueprint, + expected_error_message_fragment: str, + ): + log.debug(f"Testing error case: {test_id}") + + with pytest.raises((StaticValidationError, PipeDefinitionError)) as exc_info: + PipeImgGenFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + error_str = str(exc_info.value) + assert expected_error_message_fragment in error_str, ( + f"Expected fragment '{expected_error_message_fragment}' not found in error message: {error_str}" + ) diff --git a/tests/unit/pipelex/pipe_operators/pipe_llm/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_llm/data.py b/tests/unit/pipelex/pipe_operators/pipe_llm/data.py similarity index 100% rename from tests/unit/pipelex/core/pipes/pipe_operator/pipe_llm/data.py rename to tests/unit/pipelex/pipe_operators/pipe_llm/data.py diff --git a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_llm/test_pipe_llm_input.py b/tests/unit/pipelex/pipe_operators/pipe_llm/test_pipe_llm_input.py similarity index 94% rename from tests/unit/pipelex/core/pipes/pipe_operator/pipe_llm/test_pipe_llm_input.py rename to tests/unit/pipelex/pipe_operators/pipe_llm/test_pipe_llm_input.py index 48de06393..ecfa7ebe7 100644 --- a/tests/unit/pipelex/core/pipes/pipe_operator/pipe_llm/test_pipe_llm_input.py +++ b/tests/unit/pipelex/pipe_operators/pipe_llm/test_pipe_llm_input.py @@ -4,7 +4,7 @@ from pipelex.exceptions import StaticValidationError from pipelex.pipe_operators.llm.pipe_llm_blueprint import PipeLLMBlueprint from pipelex.pipe_operators.llm.pipe_llm_factory import PipeLLMFactory -from tests.unit.pipelex.core.pipes.pipe_operator.pipe_llm.data import PipeLLMInputTestCases +from tests.unit.pipelex.pipe_operators.pipe_llm.data import PipeLLMInputTestCases class TestPipeLLMValidateInputs: diff --git a/tests/unit/pipelex/pipe_run/__init__.py b/tests/unit/pipelex/pipe_run/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/core/pipes/data.py b/tests/unit/pipelex/pipe_run/data.py similarity index 100% rename from tests/unit/pipelex/core/pipes/data.py rename to tests/unit/pipelex/pipe_run/data.py diff --git a/tests/unit/pipelex/core/pipes/test_pipe_run_params.py b/tests/unit/pipelex/pipe_run/test_pipe_run_params.py similarity index 99% rename from tests/unit/pipelex/core/pipes/test_pipe_run_params.py rename to tests/unit/pipelex/pipe_run/test_pipe_run_params.py index 43e14a3ec..a927f61cf 100644 --- a/tests/unit/pipelex/core/pipes/test_pipe_run_params.py +++ b/tests/unit/pipelex/pipe_run/test_pipe_run_params.py @@ -6,7 +6,7 @@ make_output_multiplicity, output_multiplicity_to_apply, ) -from tests.unit.pipelex.core.pipes.data import MAKE_OUTPUT_MULTIPLICITY_TEST_CASES, OUTPUT_MULTIPLICITY_TO_APPLY_TEST_CASES +from tests.unit.pipelex.pipe_run.data import MAKE_OUTPUT_MULTIPLICITY_TEST_CASES, OUTPUT_MULTIPLICITY_TO_APPLY_TEST_CASES class TestMakeOutputMultiplicity: From d6945796dd2f935ae7e93e49b6cff27bf95c6f27 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Fri, 10 Oct 2025 15:27:52 +0200 Subject: [PATCH 016/115] Add UTs for pipe controllers --- .../unit/pipelex/pipe_controllers/__init__.py | 0 .../pipe_controllers/batch/__init__.py | 0 .../pipelex/pipe_controllers/batch/data.py | 60 ++++++++ .../batch/test_pipe_batch_input.py | 30 ++++ .../pipe_controllers/condition/__init__.py | 0 .../pipe_controllers/condition/data.py | 144 ++++++++++++++++++ .../condition/test_pipe_condition_input.py | 59 +++++++ .../pipe_controllers/parallel/__init__.py | 0 .../pipelex/pipe_controllers/parallel/data.py | 111 ++++++++++++++ .../parallel/test_pipe_parallel_input.py | 59 +++++++ .../pipe_controllers/sequence/__init__.py | 0 .../pipelex/pipe_controllers/sequence/data.py | 105 +++++++++++++ .../sequence/test_pipe_sequence_input.py | 59 +++++++ 13 files changed, 627 insertions(+) create mode 100644 tests/unit/pipelex/pipe_controllers/__init__.py create mode 100644 tests/unit/pipelex/pipe_controllers/batch/__init__.py create mode 100644 tests/unit/pipelex/pipe_controllers/batch/data.py create mode 100644 tests/unit/pipelex/pipe_controllers/batch/test_pipe_batch_input.py create mode 100644 tests/unit/pipelex/pipe_controllers/condition/__init__.py create mode 100644 tests/unit/pipelex/pipe_controllers/condition/data.py create mode 100644 tests/unit/pipelex/pipe_controllers/condition/test_pipe_condition_input.py create mode 100644 tests/unit/pipelex/pipe_controllers/parallel/__init__.py create mode 100644 tests/unit/pipelex/pipe_controllers/parallel/data.py create mode 100644 tests/unit/pipelex/pipe_controllers/parallel/test_pipe_parallel_input.py create mode 100644 tests/unit/pipelex/pipe_controllers/sequence/__init__.py create mode 100644 tests/unit/pipelex/pipe_controllers/sequence/data.py create mode 100644 tests/unit/pipelex/pipe_controllers/sequence/test_pipe_sequence_input.py diff --git a/tests/unit/pipelex/pipe_controllers/__init__.py b/tests/unit/pipelex/pipe_controllers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/pipe_controllers/batch/__init__.py b/tests/unit/pipelex/pipe_controllers/batch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/pipe_controllers/batch/data.py b/tests/unit/pipelex/pipe_controllers/batch/data.py new file mode 100644 index 000000000..f5ec2791e --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/batch/data.py @@ -0,0 +1,60 @@ +from typing import ClassVar + +from pipelex.pipe_controllers.batch.pipe_batch_blueprint import PipeBatchBlueprint + + +class PipeBatchInputTestCases: + """Test cases for PipeBatch input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_SIMPLE_BATCH: ClassVar[tuple[str, PipeBatchBlueprint]] = ( + "valid_simple_batch", + PipeBatchBlueprint( + description="Test case: valid_simple_batch", + inputs={"items": "native.Text"}, + output="native.Text", + branch_pipe_code="process_item", + input_list_name="items", + input_item_name="item", + ), + ) + + VALID_WITHOUT_EXPLICIT_NAMES: ClassVar[tuple[str, PipeBatchBlueprint]] = ( + "valid_without_explicit_names", + PipeBatchBlueprint( + description="Test case: valid_without_explicit_names", + inputs={"data_list": "native.Text"}, + output="native.Text", + branch_pipe_code="process_data", + ), + ) + + VALID_WITH_INPUT_LIST_NAME_ONLY: ClassVar[tuple[str, PipeBatchBlueprint]] = ( + "valid_with_input_list_name_only", + PipeBatchBlueprint( + description="Test case: valid_with_input_list_name_only", + inputs={"records": "native.Text"}, + output="native.Text", + branch_pipe_code="process_record", + input_list_name="records", + ), + ) + + VALID_MULTIPLE_INPUTS: ClassVar[tuple[str, PipeBatchBlueprint]] = ( + "valid_multiple_inputs", + PipeBatchBlueprint( + description="Test case: valid_multiple_inputs", + inputs={"items": "native.Text", "config": "native.Text"}, + output="native.Text", + branch_pipe_code="process_with_config", + input_list_name="items", + input_item_name="item", + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeBatchBlueprint]]] = [ + VALID_SIMPLE_BATCH, + VALID_WITHOUT_EXPLICIT_NAMES, + VALID_WITH_INPUT_LIST_NAME_ONLY, + VALID_MULTIPLE_INPUTS, + ] diff --git a/tests/unit/pipelex/pipe_controllers/batch/test_pipe_batch_input.py b/tests/unit/pipelex/pipe_controllers/batch/test_pipe_batch_input.py new file mode 100644 index 000000000..2eef16fa6 --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/batch/test_pipe_batch_input.py @@ -0,0 +1,30 @@ +import pytest + +from pipelex import log +from pipelex.pipe_controllers.batch.pipe_batch_blueprint import PipeBatchBlueprint +from pipelex.pipe_controllers.batch.pipe_batch_factory import PipeBatchFactory +from tests.unit.pipelex.pipe_controllers.batch.data import PipeBatchInputTestCases + + +class TestPipeBatchValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeBatchInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeBatchBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + # Validation happens automatically during instantiation via model_validator + pipe_batch = PipeBatchFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_batch is not None + assert pipe_batch.code == f"test_pipe_{test_id}" diff --git a/tests/unit/pipelex/pipe_controllers/condition/__init__.py b/tests/unit/pipelex/pipe_controllers/condition/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/pipe_controllers/condition/data.py b/tests/unit/pipelex/pipe_controllers/condition/data.py new file mode 100644 index 000000000..11ff05efa --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/condition/data.py @@ -0,0 +1,144 @@ +from typing import Any, ClassVar + +from pipelex.pipe_controllers.condition.pipe_condition_blueprint import PipeConditionBlueprint +from pipelex.pipe_controllers.condition.special_outcome import SpecialOutcome + + +class PipeConditionInputTestCases: + """Test cases for PipeCondition input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_WITH_EXPRESSION: ClassVar[tuple[str, PipeConditionBlueprint]] = ( + "valid_with_expression", + PipeConditionBlueprint( + description="Test case: valid_with_expression", + inputs={"status": "native.Text"}, + output="native.Text", + expression="status", + outcomes={"approved": "approve_pipe", "rejected": "reject_pipe"}, + default_outcome="fallback_pipe", + ), + ) + + VALID_WITH_EXPRESSION_TEMPLATE: ClassVar[tuple[str, PipeConditionBlueprint]] = ( + "valid_with_expression_template", + PipeConditionBlueprint( + description="Test case: valid_with_expression_template", + inputs={"category": "native.Text"}, + output="native.Text", + expression_template="{{ category }}", + outcomes={"small": "process_small", "large": "process_large"}, + default_outcome="process_default", + ), + ) + + VALID_WITH_MULTIPLE_OUTCOMES: ClassVar[tuple[str, PipeConditionBlueprint]] = ( + "valid_with_multiple_outcomes", + PipeConditionBlueprint( + description="Test case: valid_with_multiple_outcomes", + inputs={"priority": "native.Text"}, + output="native.Text", + expression="priority", + outcomes={ + "high": "urgent_handler", + "medium": "normal_handler", + "low": "delayed_handler", + }, + default_outcome="default_handler", + ), + ) + + VALID_WITH_SPECIAL_OUTCOME: ClassVar[tuple[str, PipeConditionBlueprint]] = ( + "valid_with_special_outcome", + PipeConditionBlueprint( + description="Test case: valid_with_special_outcome", + inputs={"should_process": "native.Text"}, + output="native.Text", + expression="should_process", + outcomes={"yes": "process_pipe"}, + default_outcome=SpecialOutcome.CONTINUE, + ), + ) + + VALID_WITH_ALIAS: ClassVar[tuple[str, PipeConditionBlueprint]] = ( + "valid_with_alias", + PipeConditionBlueprint( + description="Test case: valid_with_alias", + inputs={"type": "native.Text"}, + output="native.Text", + expression="type", + outcomes={"A": "handle_a", "B": "handle_b"}, + default_outcome="handle_default", + add_alias_from_expression_to="selected_type", + ), + ) + + VALID_SINGLE_OUTCOME: ClassVar[tuple[str, PipeConditionBlueprint]] = ( + "valid_single_outcome", + PipeConditionBlueprint( + description="Test case: valid_single_outcome", + inputs={"flag": "native.Text"}, + output="native.Text", + expression="flag", + outcomes={"true": "process_pipe"}, + default_outcome=SpecialOutcome.CONTINUE, + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeConditionBlueprint]]] = [ + VALID_WITH_EXPRESSION, + VALID_WITH_EXPRESSION_TEMPLATE, + VALID_WITH_MULTIPLE_OUTCOMES, + VALID_WITH_SPECIAL_OUTCOME, + VALID_WITH_ALIAS, + VALID_SINGLE_OUTCOME, + ] + + # Error test cases: (test_id, blueprint_dict, expected_error_message_fragment) + # Using dicts instead of blueprints to avoid validation errors during import + ERROR_BOTH_EXPRESSION_AND_TEMPLATE: ClassVar[tuple[str, dict[str, Any], str]] = ( + "both_expression_and_template", + { + "description": "Test case: both_expression_and_template", + "inputs": {"data": "native.Text"}, + "output": "native.Text", + "expression": "data", + "expression_template": "{{ data }}", + "outcomes": {"A": "pipe_a"}, + "default_outcome": "pipe_default", + }, + "exactly one of expression_template or expression", + ) + + ERROR_NEITHER_EXPRESSION_NOR_TEMPLATE: ClassVar[tuple[str, dict[str, Any], str]] = ( + "neither_expression_nor_template", + { + "description": "Test case: neither_expression_nor_template", + "inputs": {"data": "native.Text"}, + "output": "native.Text", + "expression": None, + "expression_template": None, + "outcomes": {"A": "pipe_a"}, + "default_outcome": "pipe_default", + }, + "exactly one of expression_template or expression", + ) + + ERROR_EMPTY_OUTCOMES: ClassVar[tuple[str, dict[str, Any], str]] = ( + "empty_outcomes", + { + "description": "Test case: empty_outcomes", + "inputs": {"data": "native.Text"}, + "output": "native.Text", + "expression": "data", + "outcomes": {}, + "default_outcome": "pipe_default", + }, + "must have at least one mapping in outcomes", + ) + + ERROR_CASES: ClassVar[list[tuple[str, dict[str, Any], str]]] = [ + ERROR_BOTH_EXPRESSION_AND_TEMPLATE, + ERROR_NEITHER_EXPRESSION_NOR_TEMPLATE, + ERROR_EMPTY_OUTCOMES, + ] diff --git a/tests/unit/pipelex/pipe_controllers/condition/test_pipe_condition_input.py b/tests/unit/pipelex/pipe_controllers/condition/test_pipe_condition_input.py new file mode 100644 index 000000000..fb810bf2e --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/condition/test_pipe_condition_input.py @@ -0,0 +1,59 @@ +from typing import Any + +import pytest + +from pipelex import log +from pipelex.exceptions import PipeDefinitionError, StaticValidationError +from pipelex.pipe_controllers.condition.pipe_condition_blueprint import PipeConditionBlueprint +from pipelex.pipe_controllers.condition.pipe_condition_factory import PipeConditionFactory +from tests.unit.pipelex.pipe_controllers.condition.data import PipeConditionInputTestCases + + +class TestPipeConditionValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeConditionInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeConditionBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + # Validation happens automatically during instantiation via model_validator + pipe_condition = PipeConditionFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_condition is not None + assert pipe_condition.code == f"test_pipe_{test_id}" + + @pytest.mark.parametrize( + ("test_id", "blueprint_dict", "expected_error_message_fragment"), + PipeConditionInputTestCases.ERROR_CASES, + ) + def test_validate_inputs_error_cases( + self, + test_id: str, + blueprint_dict: dict[str, Any], + expected_error_message_fragment: str, + ): + log.debug(f"Testing error case: {test_id}") + + with pytest.raises((StaticValidationError, ValueError, PipeDefinitionError)) as exc_info: # noqa: PT012 + # Construct blueprint from dict at test time to trigger validation + blueprint = PipeConditionBlueprint.model_validate(blueprint_dict) + PipeConditionFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + error_str = str(exc_info.value) + assert expected_error_message_fragment in error_str, ( + f"Expected fragment '{expected_error_message_fragment}' not found in error message: {error_str}" + ) diff --git a/tests/unit/pipelex/pipe_controllers/parallel/__init__.py b/tests/unit/pipelex/pipe_controllers/parallel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/pipe_controllers/parallel/data.py b/tests/unit/pipelex/pipe_controllers/parallel/data.py new file mode 100644 index 000000000..bdbfda0f9 --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/parallel/data.py @@ -0,0 +1,111 @@ +from typing import Any, ClassVar + +from pipelex.pipe_controllers.parallel.pipe_parallel_blueprint import PipeParallelBlueprint +from pipelex.pipe_controllers.sub_pipe_blueprint import SubPipeBlueprint + + +class PipeParallelInputTestCases: + """Test cases for PipeParallel input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_WITH_ADD_EACH_OUTPUT: ClassVar[tuple[str, PipeParallelBlueprint]] = ( + "valid_with_add_each_output", + PipeParallelBlueprint( + description="Test case: valid_with_add_each_output", + inputs={"data": "native.Text"}, + output="native.Text", + parallels=[ + SubPipeBlueprint(pipe="process_a", result="result_a"), + SubPipeBlueprint(pipe="process_b", result="result_b"), + ], + add_each_output=True, + ), + ) + + VALID_WITH_COMBINED_OUTPUT: ClassVar[tuple[str, PipeParallelBlueprint]] = ( + "valid_with_combined_output", + PipeParallelBlueprint( + description="Test case: valid_with_combined_output", + inputs={"data": "native.Text"}, + output="native.Text", + parallels=[ + SubPipeBlueprint(pipe="analyze_1", result="analysis_1"), + SubPipeBlueprint(pipe="analyze_2", result="analysis_2"), + ], + combined_output="native.Text", + ), + ) + + VALID_WITH_BOTH_OUTPUT_OPTIONS: ClassVar[tuple[str, PipeParallelBlueprint]] = ( + "valid_with_both_output_options", + PipeParallelBlueprint( + description="Test case: valid_with_both_output_options", + inputs={"data": "native.Text"}, + output="native.Text", + parallels=[ + SubPipeBlueprint(pipe="compute_x", result="x"), + SubPipeBlueprint(pipe="compute_y", result="y"), + ], + add_each_output=True, + combined_output="native.Text", + ), + ) + + VALID_THREE_PARALLELS: ClassVar[tuple[str, PipeParallelBlueprint]] = ( + "valid_three_parallels", + PipeParallelBlueprint( + description="Test case: valid_three_parallels", + inputs={"input_data": "native.Text"}, + output="native.Text", + parallels=[ + SubPipeBlueprint(pipe="branch_1", result="result_1"), + SubPipeBlueprint(pipe="branch_2", result="result_2"), + SubPipeBlueprint(pipe="branch_3", result="result_3"), + ], + add_each_output=True, + ), + ) + + VALID_MULTIPLE_INPUTS: ClassVar[tuple[str, PipeParallelBlueprint]] = ( + "valid_multiple_inputs", + PipeParallelBlueprint( + description="Test case: valid_multiple_inputs", + inputs={"text_data": "native.Text", "image_data": "native.Image"}, + output="native.Text", + parallels=[ + SubPipeBlueprint(pipe="process_text", result="text_result"), + SubPipeBlueprint(pipe="process_image", result="image_result"), + ], + combined_output="native.Text", + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeParallelBlueprint]]] = [ + VALID_WITH_ADD_EACH_OUTPUT, + VALID_WITH_COMBINED_OUTPUT, + VALID_WITH_BOTH_OUTPUT_OPTIONS, + VALID_THREE_PARALLELS, + VALID_MULTIPLE_INPUTS, + ] + + # Error test cases: (test_id, blueprint_dict, expected_error_message_fragment) + # Using dicts instead of blueprints to avoid validation errors during import + ERROR_NO_OUTPUT_OPTIONS: ClassVar[tuple[str, dict[str, Any], str]] = ( + "no_output_options", + { + "description": "Test case: no_output_options", + "inputs": {"data": "native.Text"}, + "output": "native.Text", + "parallels": [ + {"pipe": "process_a", "result": "result_a"}, + {"pipe": "process_b", "result": "result_b"}, + ], + "add_each_output": False, + "combined_output": None, + }, + "requires either add_each_output to be True or combined_output to be set", + ) + + ERROR_CASES: ClassVar[list[tuple[str, dict[str, Any], str]]] = [ + ERROR_NO_OUTPUT_OPTIONS, + ] diff --git a/tests/unit/pipelex/pipe_controllers/parallel/test_pipe_parallel_input.py b/tests/unit/pipelex/pipe_controllers/parallel/test_pipe_parallel_input.py new file mode 100644 index 000000000..08497daf5 --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/parallel/test_pipe_parallel_input.py @@ -0,0 +1,59 @@ +from typing import Any + +import pytest + +from pipelex import log +from pipelex.exceptions import PipeDefinitionError, StaticValidationError +from pipelex.pipe_controllers.parallel.pipe_parallel_blueprint import PipeParallelBlueprint +from pipelex.pipe_controllers.parallel.pipe_parallel_factory import PipeParallelFactory +from tests.unit.pipelex.pipe_controllers.parallel.data import PipeParallelInputTestCases + + +class TestPipeParallelValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeParallelInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeParallelBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + # Validation happens automatically during instantiation via model_validator + pipe_parallel = PipeParallelFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_parallel is not None + assert pipe_parallel.code == f"test_pipe_{test_id}" + + @pytest.mark.parametrize( + ("test_id", "blueprint_dict", "expected_error_message_fragment"), + PipeParallelInputTestCases.ERROR_CASES, + ) + def test_validate_inputs_error_cases( + self, + test_id: str, + blueprint_dict: dict[str, Any], + expected_error_message_fragment: str, + ): + log.debug(f"Testing error case: {test_id}") + + with pytest.raises((StaticValidationError, ValueError, PipeDefinitionError)) as exc_info: # noqa: PT012 + # Construct blueprint from dict at test time to trigger validation + blueprint = PipeParallelBlueprint.model_validate(blueprint_dict) + PipeParallelFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + error_str = str(exc_info.value) + assert expected_error_message_fragment in error_str, ( + f"Expected fragment '{expected_error_message_fragment}' not found in error message: {error_str}" + ) diff --git a/tests/unit/pipelex/pipe_controllers/sequence/__init__.py b/tests/unit/pipelex/pipe_controllers/sequence/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/pipelex/pipe_controllers/sequence/data.py b/tests/unit/pipelex/pipe_controllers/sequence/data.py new file mode 100644 index 000000000..53d853d9c --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/sequence/data.py @@ -0,0 +1,105 @@ +from typing import Any, ClassVar + +from pipelex.pipe_controllers.sequence.pipe_sequence_blueprint import PipeSequenceBlueprint +from pipelex.pipe_controllers.sub_pipe_blueprint import SubPipeBlueprint + + +class PipeSequenceInputTestCases: + """Test cases for PipeSequence input validation.""" + + # Valid test cases: (test_id, blueprint) + VALID_SIMPLE_SEQUENCE: ClassVar[tuple[str, PipeSequenceBlueprint]] = ( + "valid_simple_sequence", + PipeSequenceBlueprint( + description="Test case: valid_simple_sequence", + inputs={"text": "native.Text"}, + output="native.Text", + steps=[ + SubPipeBlueprint(pipe="step_1", result="result_1"), + SubPipeBlueprint(pipe="step_2", result="result_2"), + ], + ), + ) + + VALID_THREE_STEPS: ClassVar[tuple[str, PipeSequenceBlueprint]] = ( + "valid_three_steps", + PipeSequenceBlueprint( + description="Test case: valid_three_steps", + inputs={"input_data": "native.Text"}, + output="native.Text", + steps=[ + SubPipeBlueprint(pipe="process_step_1", result="processed_1"), + SubPipeBlueprint(pipe="process_step_2", result="processed_2"), + SubPipeBlueprint(pipe="process_step_3", result="final_output"), + ], + ), + ) + + VALID_SINGLE_STEP: ClassVar[tuple[str, PipeSequenceBlueprint]] = ( + "valid_single_step", + PipeSequenceBlueprint( + description="Test case: valid_single_step", + inputs={"data": "native.Text"}, + output="native.Text", + steps=[ + SubPipeBlueprint(pipe="single_process", result="output"), + ], + ), + ) + + VALID_MULTIPLE_INPUTS: ClassVar[tuple[str, PipeSequenceBlueprint]] = ( + "valid_multiple_inputs", + PipeSequenceBlueprint( + description="Test case: valid_multiple_inputs", + inputs={"text_input": "native.Text", "image_input": "native.Image"}, + output="native.Text", + steps=[ + SubPipeBlueprint(pipe="analyze_text", result="text_analysis"), + SubPipeBlueprint(pipe="analyze_image", result="image_analysis"), + SubPipeBlueprint(pipe="combine_results", result="final_result"), + ], + ), + ) + + VALID_WITH_BATCH: ClassVar[tuple[str, PipeSequenceBlueprint]] = ( + "valid_with_batch", + PipeSequenceBlueprint( + description="Test case: valid_with_batch", + inputs={"items": "native.Text"}, + output="native.Text", + steps=[ + SubPipeBlueprint( + pipe="process_item", + batch_over="items", + batch_as="item", + result="processed_items", + ), + SubPipeBlueprint(pipe="summarize_results", result="summary"), + ], + ), + ) + + VALID_CASES: ClassVar[list[tuple[str, PipeSequenceBlueprint]]] = [ + VALID_SIMPLE_SEQUENCE, + VALID_THREE_STEPS, + VALID_SINGLE_STEP, + VALID_MULTIPLE_INPUTS, + VALID_WITH_BATCH, + ] + + # Error test cases: (test_id, blueprint_dict, expected_error_message_fragment) + # Using dicts instead of blueprints to avoid validation errors during import + ERROR_EMPTY_STEPS: ClassVar[tuple[str, dict[str, Any], str]] = ( + "empty_steps", + { + "description": "Test case: empty_steps", + "inputs": {"text": "native.Text"}, + "output": "native.Text", + "steps": [], + }, + "must have at least 1 step", + ) + + ERROR_CASES: ClassVar[list[tuple[str, dict[str, Any], str]]] = [ + ERROR_EMPTY_STEPS, + ] diff --git a/tests/unit/pipelex/pipe_controllers/sequence/test_pipe_sequence_input.py b/tests/unit/pipelex/pipe_controllers/sequence/test_pipe_sequence_input.py new file mode 100644 index 000000000..9043917ae --- /dev/null +++ b/tests/unit/pipelex/pipe_controllers/sequence/test_pipe_sequence_input.py @@ -0,0 +1,59 @@ +from typing import Any + +import pytest + +from pipelex import log +from pipelex.exceptions import PipeDefinitionError, StaticValidationError +from pipelex.pipe_controllers.sequence.pipe_sequence_blueprint import PipeSequenceBlueprint +from pipelex.pipe_controllers.sequence.pipe_sequence_factory import PipeSequenceFactory +from tests.unit.pipelex.pipe_controllers.sequence.data import PipeSequenceInputTestCases + + +class TestPipeSequenceValidateInputs: + @pytest.mark.parametrize( + ("test_id", "blueprint"), + PipeSequenceInputTestCases.VALID_CASES, + ) + def test_validate_inputs_valid_cases( + self, + test_id: str, + blueprint: PipeSequenceBlueprint, + ): + log.debug(f"Testing valid case: {test_id}") + + # Validation happens automatically during instantiation via model_validator + pipe_sequence = PipeSequenceFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + # Assert that the pipe was created successfully + assert pipe_sequence is not None + assert pipe_sequence.code == f"test_pipe_{test_id}" + + @pytest.mark.parametrize( + ("test_id", "blueprint_dict", "expected_error_message_fragment"), + PipeSequenceInputTestCases.ERROR_CASES, + ) + def test_validate_inputs_error_cases( + self, + test_id: str, + blueprint_dict: dict[str, Any], + expected_error_message_fragment: str, + ): + log.debug(f"Testing error case: {test_id}") + + with pytest.raises((StaticValidationError, ValueError, PipeDefinitionError)) as exc_info: # noqa: PT012 + # Construct blueprint from dict at test time to trigger validation + blueprint = PipeSequenceBlueprint.model_validate(blueprint_dict) + PipeSequenceFactory.make_from_blueprint( + domain="test_domain", + pipe_code=f"test_pipe_{test_id}", + blueprint=blueprint, + ) + + error_str = str(exc_info.value) + assert expected_error_message_fragment in error_str, ( + f"Expected fragment '{expected_error_message_fragment}' not found in error message: {error_str}" + ) From 7d1f750893502efd7468a82e02dbf43439b42bad Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sat, 11 Oct 2025 18:22:48 +0200 Subject: [PATCH 017/115] Changelog --- CHANGELOG.md | 48 +++++++++++++++++++++++++++++++++++++++++++--- pipelex/pipelex.py | 5 +---- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2bdec807..dbd61b27b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,50 @@ ## [Unreleased] -### Highlights +### Highlights - Moving fast and breaking things + +- Added the new builder pipeline system for auto-generating Pipelex bundles from user briefs + - it's a pipeline to generate pipelines, and it works! + - the pipeline definitions are in `pipelex_libraries/pipelines/base_library/builder/` + - removed the previous draft which was named `meta_pipeline.plx` + +**Breaking changes... for good!** + +We tried to group all the renamings we wanted to do which impact our language, so that you get one migration to apply and then we will be way more stable in the future releases. + +This is all in the spirit of making Pipelex a declarative language, where you express what you want to do, and the system will figure out how to do it. So our focus inwas to make the Pipelex language easier to understand and use for non-technical users, and at the same time use more consistent and obvious words that developers are used to. + + +- General changes + - renamed `definition` fields to `description` across all cases + +- Renamed **PipeJinja2** to **PipeCompose** + - the fact that our templating engine is Jinja2 is a technnical detail, not fundamental to the language, especially since we included a pre-processor enabling insertion of variables in prompts using `@variable` or `$variable`, in addition to the jinja2 syntax `{{ variable }}` + - renamed `jinja2` field to `template` for the same reason + - for more control, instead of providing a string for the `template` field, you can also use a nested `template` section with `template`, `category` and `templating_style` fields + +- Renamed **PipeOCR** to **PipeExtract** + - this is to account for various text extraction techniques from images and docs, including but not only OCR; e.g. we now have integrated the `pypdfium2` package which can extract text and images from PDF, when it's actually real text (not an image), and soon we'll add support for other document extraction models such as IBM's `docling` and Microsoft's `MarkItDown` + - removed obligation to name your document input `ocr_input`, it can now be named whatever you want as long as it's a single input and it's either an `Image` or a `PDF` or some concept refining PDF or Image + - renamed `ocr_page_contents_from_pdf` to `extract_page_contents_from_pdf` + - renamed `ocr_page_contents_and_views_from_pdf` to `extract_page_contents_and_views_from_pdf` + - introduced model settings and presets for extract models like we had for LLMs + - renamed `ocr_model` to `model` for choice of model, preset, or explicit setting and introduced `base_ocr_mistral` as an alias to `mistral-ocr` + +- **PipeLLM** field renames + - image inputs must now be tagged in the prompt like all other inputs; you can just drop their names at the beginning or end of the prompt, or you can reference them in meaningful sentences to guide the Visual LLM, e.g. "Analyze the colors in $some_photo and the shapes in $some_painting." + - renamed `prompt_template` field to `prompt` + - renamed `llm` field to `model` + - renamed `llm_to_structure` field to `model_to_structure` + +- **PipeImgGen** field renames + - renamed `img_gen` field to `model` for choice of model, preset, or explicit setting + - removed some technical settings such as `nb_steps` from the pipe attributes, instead you can set these as model settings or model presets + - introduced model settings and presets for image generation models like we had for LLMs -- In the `PipeLLM`, the image inputs can now be used and tagged in the prompt like all other concepts. -- Use claude-4.5-sonnet instead of claude-4-sonnet in the base deck. +- **PipeCondition** field renames + - renamed `pipe_map` to `outcomes` + - renamed `default_pipe_code` to `default_outcome` and it's now a required field, because we need to know what to do if the expression doesn't match any key in the outcomes map; if you don't know what to do in that case, then it's a failure and you can use the `fail` value ### Added - Added `Flow` class that represents the flow of pipe signatures @@ -26,6 +66,7 @@ - Added `MissingDependencyError` exception for missing optional dependencies ### Changed + - Using `claude-4.5-sonnet` instead of `claude-4-sonnet` across the model deck. - Cleanup env example and better explain how to set up keys in README and docs - Changed Gemini model configuration from `gemini-2.0-flash-exp` (free tier) to `gemini-2.0-flash` with pricing ($0.10 input, $0.40 output per million tokens) - Removed Gemini 1.5 series models (gemini-1.5-pro, gemini-1.5-flash, gemini-1.5-flash-8b) from configuration @@ -65,6 +106,7 @@ - Removed `get_optional_library_manager()` method from PipelexHub - Removed `get_optional_domain_provider()` and `get_optional_concept_provider()` methods from hub - Removed unused test fixtures (apple, cherry, blueberry, concept_provider, pretty) from conftest.py + - Removed some Vision/Image description pipes from the base library, because we doubt they were useful as they were ## [v0.11.0] - 2025-10-01 diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index d568828b1..bb0135bc9 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -219,10 +219,7 @@ def setup( raise PipelexSetupError(msg) from backend_validation_exc except ModelDeckValidationError as deck_validation_exc: msg = self._get_validation_error_msg("model deck", deck_validation_exc) - msg += ( - "\n\nIf you added your own config files to the model deck then they won't be fixed automatically, " - "you'll have to change them manually, sorry about that." - ) + msg += "\n\nIf you added your own config files to the model deck then you'll have to change them manually." raise PipelexSetupError(msg) from deck_validation_exc except InferenceBackendCredentialsError as credentials_exc: backend_name = credentials_exc.backend_name From ebec94c628fdefdb8295c3b0b2fd6c922f3b0cf9 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sat, 11 Oct 2025 18:40:32 +0200 Subject: [PATCH 018/115] Changelog --- CHANGELOG.md | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbd61b27b..65bd866c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,7 +47,25 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - renamed `pipe_map` to `outcomes` - renamed `default_pipe_code` to `default_outcome` and it's now a required field, because we need to know what to do if the expression doesn't match any key in the outcomes map; if you don't know what to do in that case, then it's a failure and you can use the `fail` value +- **Configuration file changes** (`.pipelex/` directory) + - Renamed parameter `llm_handle` to `model` across all LLM presets in deck files + - Renamed parameter `img_gen_handle` to `model` across all image generation presets in deck files + - Renamed parameter `ocr_handle` to `model` in extraction presets + - Renamed `ocr` section to `extract` throughout configuration files + - Renamed `ocr_config` to `extract_config` in `pipelex.toml` + - Renamed `base_ocr_pypdfium2` to `base_extract_pypdfium2` + - Renamed `is_auto_setup_preset_ocr` to `is_auto_setup_preset_extract` + - Renamed `nb_ocr_pages` to `nb_extract_pages` + - Updated pytest marker from 'ocr' to 'extract' + ### Added + - Added `cheap-gpt` model alias for `gpt-4o-mini` + - Added `cheap_llm_for_vision` preset using `gemini-2.5-flash-lite` + - Added `llm_for_testing_vision` and `llm_for_testing_vision_structured` presets for vision testing + - Added `is_dump_text_prompts_enabled` and `is_dump_response_text_enabled` configuration flags to have the console display everything that goes in and out of the LLMs + - Added `generic_templates` section in `llm_config` with structure extraction prompts + - Added useful error messages with migration configuration maps pin-pointing the fields to rename for config and plx files + - Added pytest filterwarnings to ignore deprecated class-based config warnings - Added `Flow` class that represents the flow of pipe signatures - Added `pipe-builder` command `flow` to generate flow view from pipeline brief - Added `FlowFactory` class to create Flow from PipelexBundleSpec or PLX files @@ -66,10 +84,11 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Added `MissingDependencyError` exception for missing optional dependencies ### Changed - - Using `claude-4.5-sonnet` instead of `claude-4-sonnet` across the model deck. + - Updated Gemini 2.0 model from `gemini-2.0-flash-exp` to `gemini-2.0-flash` with new pricing (input: $0.10, output: $0.40 per million tokens) + - Updated Gemini 2.5 Series comment from '(when available)' to stable release + - Updated `base-claude` from `claude-4-sonnet` to `claude-4.5-sonnet` across all presets + - Updated kajson dependency from version `0.3.0` to `0.3.1` - Cleanup env example and better explain how to set up keys in README and docs - - Changed Gemini model configuration from `gemini-2.0-flash-exp` (free tier) to `gemini-2.0-flash` with pricing ($0.10 input, $0.40 output per million tokens) - - Removed Gemini 1.5 series models (gemini-1.5-pro, gemini-1.5-flash, gemini-1.5-flash-8b) from configuration - Changed Gemini routing from `google` backend to `pipelex_inference` backend - Renamed `ConceptProviderAbstract` to `ConceptLibraryAbstract` - Renamed `DomainProviderAbstract` to `DomainLibraryAbstract` @@ -84,7 +103,6 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Changed `PipeLLM` validation to check all inputs are in required variables - Updated `LLMPromptSpec` to handle image collections (lists/tuples) in addition to single images - Changed Mermaid diagram URL generation from `/img/` to `/svg/` endpoint - - Updated kajson dependency from 0.3.0 to 0.3.1 - Changed `PipeLLMPromptTemplate.make_llm_prompt()` to private method `_make_llm_prompt()` - Updated pipe-builder prompts to include concept specs for better context - Updated `PipelexBundleSpec.to_blueprint()` to sort pipes by dependencies before creating bundle @@ -100,6 +118,8 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Updated README badge URL to point to main branch instead of feature/pipe-builder branch ### Removed + - Removed Gemini 1.5 series models: `gemini-1.5-pro`, `gemini-1.5-flash`, and `gemini-1.5-flash-8b` + - Removed `base_templates.toml` file (generic prompts moved to `pipelex.toml`) - Removed `gpt-5-mini` from possible models in pipe-builder - Removed useless functions in `LLMJobFactory`: `make_llm_job_from_prompt_factory()`, `make_llm_job_from_prompt_template()`, `make_llm_job_from_prompt_contents()` - Removed `add_or_update_pipe()` method from PipeLibrary From 7fdca2079bb3c55256beb77e62c74e6b145ad2d3 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 09:49:03 +0200 Subject: [PATCH 019/115] Update AGENTS.md and add MIGRATION.md --- AGENTS.md | 121 ++++++++--- MIGRATION.md | 592 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 682 insertions(+), 31 deletions(-) create mode 100644 MIGRATION.md diff --git a/AGENTS.md b/AGENTS.md index 6cc4503be..6c6930a3e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -100,7 +100,7 @@ Always fix any issues reported by these tools before proceeding. ```bash make tp ``` - Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or ocr)) and not (needs_output or pipelex_api)` + Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` 2. **Specific Tests**: ```bash @@ -110,7 +110,7 @@ Always fix any issues reported by these tools before proceeding. ``` Note: Matches names starting with the provided string. -**Important**: Never run `make ti`, `make test-inference`, `make to`, `make test-ocr`, `make tg`, or `make test-img-gen` - these use costly inference. +**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. ## Pipelines @@ -348,8 +348,9 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" @@ -362,8 +363,9 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" @@ -373,9 +375,25 @@ large = "process_large" - `expression`: Direct boolean or string expression (mutually exclusive with expression_template) - `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `pipe_map`: Dictionary mapping expression results to pipe codes : -1 - The key on the left (`small`, `medium`) is the result of `expression` or `expression_template`. -2 - The value on the right (`process_small`, `process_medium`, ..) is the name of the pipce to trigger +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` ## PipeLLM operator @@ -391,7 +409,7 @@ Simple Text Generation: type = "PipeLLM" description = "Write a short story" output = "Text" -prompt_template = """ +prompt = """ Write a short story about a programmer. """ ``` @@ -403,7 +421,7 @@ type = "PipeLLM" description = "Extract information" inputs = { text = "Text" } output = "PersonInfo" -prompt_template = """ +prompt = """ Extract person information from this text: @text """ @@ -416,7 +434,7 @@ type = "PipeLLM" description = "Expert analysis" output = "Analysis" system_prompt = "You are a data analysis expert" -prompt_template = "Analyze this data" +prompt = "Analyze this data" ``` ### Multiple Outputs @@ -441,14 +459,28 @@ multiple_output = true # Let the LLM decide how many to generate ### Vision -Process images with VLMs: +Process images with VLMs (image inputs must be tagged in the prompt): ```plx [pipe.analyze_image] type = "PipeLLM" description = "Analyze image" -inputs = { image = "Image" } # `image` is the name of the stuff that contains the Image. If its in an attribute within a stuff, you can add something like `{ "page.image": "Image" } +inputs = { image = "Image" } output = "ImageAnalysis" -prompt_template = "Describe what you see in this image" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." ``` ### Writing prompts for PipeLLM @@ -459,7 +491,7 @@ If the inserted text is supposedly a long text, made of several lines or paragra Example template: ```plx -prompt_template = """ +prompt = """ Match the expense with its corresponding invoice: @expense @@ -467,7 +499,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt template. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -478,7 +510,7 @@ If the inserted text is short text and it makes sense to have it inserted direct Example template: ```plx -prompt_template = """ +prompt = """ Your goal is to summarize everything related to $topic in the provided text: @text @@ -508,7 +540,17 @@ inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is output = "Page" ``` -Only one input is allowed and it must either be an `Image` or a `PDF`. +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. The output concept `Page` is a native concept, with the structure `PageContent`: It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` @@ -538,7 +580,7 @@ type = "PipeCompose" description = "Compose a report using template" inputs = { data = "ReportData" } output = "Text" -jinja2 = """ +template = """ # Report Summary Based on the analysis: @@ -555,7 +597,21 @@ type = "PipeCompose" description = "Use a predefined template" inputs = { content = "Text" } output = "Text" -jinja2_name = "standard_report_template" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } ``` CRM Email Template: @@ -567,7 +623,7 @@ inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } output = "Text" template_category = "html" templating_style = { tag_style = "square_brackets", text_format = "html" } -jinja2 = """ +template = """ Subject: Following up on our $deal.product_name discussion Hi $customer.first_name, @@ -599,12 +655,17 @@ $sales_rep.phone | $sales_rep.email ### Key Parameters -- `jinja2`: Inline Jinja2 template (mutually exclusive with jinja2_name) -- `jinja2_name`: Name of a predefined template (mutually exclusive with jinja2) +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) - `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) - `templating_style`: Styling options for template rendering - `extra_context`: Additional context variables for template +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + ### Template Variables Use the same variable insertion rules as PipeLLM: @@ -633,9 +694,9 @@ type = "PipeImgGen" description = "Generate a high-quality photo" inputs = { prompt = "ImgGenPrompt" } output = "Photo" -model = { model = "fast-img-gen", quality = "hd" } +model = { model = "fast-img-gen" } aspect_ratio = "16:9" -nb_steps = 8 +quality = "hd" ``` Multiple Image Generation: @@ -668,11 +729,8 @@ safety_tolerance = 3 ### Key Parameters **Image Generation Settings:** -- `img_gen`: ImgGenChoice (preset name or inline settings) -- `img_gen_handle`: Direct model handle (legacy) +- `model`: Model choice (preset name or inline settings with model name) - `quality`: Image quality ("standard", "hd") -- `nb_steps`: Number of generation steps -- `guidance_scale`: How closely to follow the prompt **Output Configuration:** - `nb_output`: Number of images to generate @@ -808,7 +866,7 @@ The system first looks for direct model names, then checks aliases if no direct ### Using an LLM Handle in a PipeLLM -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: +Here is an example of using a model to specify which LLM to use in a PipeLLM: ```plx [pipe.hello_world] @@ -816,7 +874,7 @@ type = "PipeLLM" description = "Write text about Hello World." output = "Text" model = { model = "gpt-5", temperature = 0.9 } -prompt_template = """ +prompt = """ Write a haiku about Hello World. """ ``` @@ -842,7 +900,7 @@ description = "Extract invoice information from an invoice text transcript" inputs = { invoice_text = "InvoiceText" } output = "Invoice" model = "llm_to_extract_invoice" -prompt_template = """ +prompt = """ Extract invoice information from this invoice: The category of this invoice is: $invoice_details.category. @@ -1113,6 +1171,7 @@ NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. Apply the appropriate markers: - "llm: uses an LLM to generate text or objects" - "img_gen: uses an image generation AI" +- "extract: uses text/image extraction from documents" - "inference: uses either an LLM or an image generation AI" - "gha_disabled: will not be able to run properly on GitHub Actions" diff --git a/MIGRATION.md b/MIGRATION.md new file mode 100644 index 000000000..19017f3f1 --- /dev/null +++ b/MIGRATION.md @@ -0,0 +1,592 @@ +# Migration Guide - Breaking Changes + +This guide will help you migrate your Pipelex pipelines and configurations to the latest version. + +## Overview + +This release introduces several breaking changes to make the Pipelex language more declarative, intuitive, and consistent. The changes affect: +- Pipeline definitions (.plx files) +- Configuration files (.pipelex/ directory) +- Test markers + +## Migration Checklist + +- [ ] Update PipeCompose (formerly PipeJinja2) +- [ ] Update PipeExtract (formerly PipeOCR) +- [ ] Update PipeLLM prompts and fields +- [ ] Update PipeImgGen fields +- [ ] Update PipeCondition fields +- [ ] Update configuration files +- [ ] Update test markers +- [ ] Run validation + +## 1. General Changes + +### Rename `definition` to `description` + +**Find:** `definition = "` +**Replace with:** `description = "` + +This applies to all pipe types. + +**Before:** +```plx +[pipe.example] +type = "PipeLLM" +definition = "Process data" +``` + +**After:** +```plx +[pipe.example] +type = "PipeLLM" +description = "Process data" +``` + +## 2. PipeCompose (formerly PipeJinja2) + +### Rename pipe type + +**Find:** `type = "PipeJinja2"` +**Replace with:** `type = "PipeCompose"` + +### Rename template fields + +**Find:** `jinja2 = ` +**Replace with:** `template = ` + +**Find:** `jinja2_name = ` +**Replace with:** `template_name = ` + +**Before:** +```plx +[pipe.compose_report] +type = "PipeJinja2" +description = "Compose a report" +inputs = { data = "ReportData" } +output = "Text" +jinja2 = """ +Report: $data +""" +``` + +**After:** +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report" +inputs = { data = "ReportData" } +output = "Text" +template = """ +Report: $data +""" +``` + +### Nested template section (optional) + +If you need more control, you can now use a nested template section: + +**Before:** +```plx +[pipe.example] +type = "PipeJinja2" +jinja2 = "Template content" +template_category = "html" +``` + +**After:** +```plx +[pipe.example] +type = "PipeCompose" + +[pipe.example.template] +template = "Template content" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +## 3. PipeExtract (formerly PipeOCR) + +### Rename pipe type + +**Find:** `type = "PipeOCR"` +**Replace with:** `type = "PipeExtract"` + +### Rename model field + +**Find:** `ocr_model = ` +**Replace with:** `model = ` + +### Input naming + +The input no longer needs to be named `ocr_input`. You can name it anything as long as it's a single input that is either an `Image` or a `PDF`. + +**Before:** +```plx +[pipe.extract_info] +type = "PipeOCR" +description = "Extract text from document" +inputs = { ocr_input = "PDF" } +output = "Page" +ocr_model = "mistral-ocr" +``` + +**After:** +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "Extract text from document" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" +``` + +### Python function renames + +If you're using these functions in Python code: + +**Find:** `ocr_page_contents_from_pdf` +**Replace with:** `extract_page_contents_from_pdf` + +**Find:** `ocr_page_contents_and_views_from_pdf` +**Replace with:** `extract_page_contents_and_views_from_pdf` + +## 4. PipeLLM Changes + +### Rename prompt field + +**Find:** `prompt_template = ` +**Replace with:** `prompt = ` + +### Rename model fields + +**Find:** `llm = ` +**Replace with:** `model = ` + +**Find:** `llm_to_structure = ` +**Replace with:** `model_to_structure = ` + +### Tag image inputs in prompts + +Image inputs must now be explicitly tagged in the prompt using `$image_name` or `@image_name`. + +**Before:** +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt_template = "Describe what you see in this image" +``` + +**After:** +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline: +```plx +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +**Complete example:** + +**Before:** +```plx +[pipe.extract_info] +type = "PipeLLM" +definition = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +llm = { llm_handle = "gpt-4o", temperature = 0.1 } +prompt_template = """ +Extract person information from this text: +@text +""" +``` + +**After:** +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +model = { model = "gpt-4o", temperature = 0.1 } +prompt = """ +Extract person information from this text: +@text +""" +``` + +## 5. PipeImgGen Changes + +### Rename model field + +**Find:** `img_gen = ` +**Replace with:** `model = ` + +### Remove technical settings from pipe level + +Settings like `nb_steps` and `guidance_scale` should now be configured in model settings or presets, not at the pipe level. + +**Before:** +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +img_gen = { img_gen_handle = "fast-img-gen", quality = "hd" } +aspect_ratio = "16:9" +nb_steps = 8 +``` + +**After:** +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Or use a preset: +```plx +model = "img_gen_preset_name" +``` + +## 6. PipeCondition Changes + +### Rename outcome fields + +**Find:** `[pipe.your_pipe.pipe_map]` +**Replace with:** `[pipe.your_pipe.outcomes]` + +**Find:** `default_pipe_code = ` +**Replace with:** `default_outcome = ` + +### Add required default_outcome + +The `default_outcome` field is now **required**. If you don't want any default behavior, use `"fail"`. + +**Before:** +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "Decide which pipe to run" +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" + +[pipe.conditional_operation.pipe_map] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +**After:** +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "Decide which pipe to run" +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +To fail when no match: +```plx +default_outcome = "fail" +``` + +## 7. Configuration Files (.pipelex/ directory) + +### LLM presets in deck files + +**Find:** `llm_handle = ` +**Replace with:** `model = ` + +**Before (.pipelex/inference/deck/base_deck.toml):** +```toml +[presets.llm] +llm_to_reason = { llm_handle = "claude-3-5-sonnet", temperature = 1 } +``` + +**After:** +```toml +[presets.llm] +llm_to_reason = { model = "claude-3-5-sonnet", temperature = 1 } +``` + +### Image generation presets + +**Find:** `img_gen_handle = ` +**Replace with:** `model = ` + +**Before:** +```toml +[presets.img_gen] +fast_gen = { img_gen_handle = "fast-img-gen", quality = "standard" } +``` + +**After:** +```toml +[presets.img_gen] +fast_gen = { model = "fast-img-gen", quality = "standard" } +``` + +### Extract presets (formerly OCR) + +**Find:** `ocr_handle = ` +**Replace with:** `model = ` + +**Find:** `[presets.ocr]` +**Replace with:** `[presets.extract]` + +**Find:** `base_ocr_pypdfium2` +**Replace with:** `base_extract_pypdfium2` + +**Find:** `base_ocr_mistral` +**Replace with:** `base_extract_mistral` + +**Before:** +```toml +[presets.ocr] +base_ocr_mistral = { ocr_handle = "mistral-ocr" } +``` + +**After:** +```toml +[presets.extract] +base_extract_mistral = { model = "mistral-ocr" } +``` + +### pipelex.toml + +**Find:** `ocr_config` +**Replace with:** `extract_config` + +**Find:** `is_auto_setup_preset_ocr` +**Replace with:** `is_auto_setup_preset_extract` + +**Find:** `nb_ocr_pages` +**Replace with:** `nb_extract_pages` + +**Before (.pipelex/pipelex.toml):** +```toml +[ocr_config] +is_auto_setup_preset_ocr = true +nb_ocr_pages = 10 +``` + +**After:** +```toml +[extract_config] +is_auto_setup_preset_extract = true +nb_extract_pages = 10 +``` + +## 8. Test Markers + +### Update pytest markers + +**Find:** `@pytest.mark.ocr` +**Replace with:** `@pytest.mark.extract` + +**Before:** +```python +@pytest.mark.ocr +@pytest.mark.inference +class TestOCRPipeline: + async def test_extract(self): + # test code +``` + +**After:** +```python +@pytest.mark.extract +@pytest.mark.inference +class TestExtractPipeline: + async def test_extract(self): + # test code +``` + +### Update test markers in pytest.ini or pyproject.toml + +**Find:** `ocr: uses OCR` +**Replace with:** `extract: uses text/image extraction from documents` + +### Update make commands + +**Find:** `make test-ocr` or `make to` +**Replace with:** `make test-extract` or `make te` + +## 9. Validation + +After making all changes, run validation: + +```bash +# Fix any unused imports +make fix-unused-imports + +# Validate all pipelines +make validate + +# Run type checking and linting +make check + +# Run tests (non-inference) +make tp +``` + +## 10. Common Issues + +### Issue: Pipeline validation fails with "unknown field" + +**Cause:** You may have used an old field name (e.g., `prompt_template`, `jinja2`, `llm`, `ocr_model`). + +**Solution:** Search your .plx files for the old field names and replace them according to this guide. + +### Issue: Tests fail with marker errors + +**Cause:** Test markers haven't been updated from `ocr` to `extract`. + +**Solution:** Update all `@pytest.mark.ocr` to `@pytest.mark.extract`. + +### Issue: Configuration not loading + +**Cause:** Configuration files still use old section names (e.g., `[presets.ocr]`). + +**Solution:** Rename sections and fields in your .pipelex/ configuration files. + +## 11. Automated Migration Script + +You can use this bash script to automatically apply most changes: + +```bash +#!/bin/bash + +# Find all .plx files and apply replacements +find . -name "*.plx" -type f -exec sed -i '' \ + -e 's/definition = "/description = "/g' \ + -e 's/type = "PipeJinja2"/type = "PipeCompose"/g' \ + -e 's/type = "PipeOCR"/type = "PipeExtract"/g' \ + -e 's/prompt_template = /prompt = /g' \ + -e 's/jinja2 = /template = /g' \ + -e 's/jinja2_name = /template_name = /g' \ + -e 's/ocr_model = /model = /g' \ + -e 's/\[pipe\.\([^.]*\)\.pipe_map\]/[pipe.\1.outcomes]/g' \ + -e 's/default_pipe_code = /default_outcome = /g' \ + {} + + +# Find all .toml files in .pipelex and apply replacements +find .pipelex -name "*.toml" -type f -exec sed -i '' \ + -e 's/llm_handle = /model = /g' \ + -e 's/img_gen_handle = /model = /g' \ + -e 's/ocr_handle = /model = /g' \ + -e 's/\[presets\.ocr\]/[presets.extract]/g' \ + -e 's/base_ocr_pypdfium2/base_extract_pypdfium2/g' \ + -e 's/base_ocr_mistral/base_extract_mistral/g' \ + -e 's/ocr_config/extract_config/g' \ + -e 's/is_auto_setup_preset_ocr/is_auto_setup_preset_extract/g' \ + -e 's/nb_ocr_pages/nb_extract_pages/g' \ + {} + + +# Find all test files and update markers +find tests -name "*.py" -type f -exec sed -i '' \ + -e 's/@pytest\.mark\.ocr/@pytest.mark.extract/g' \ + {} + + +echo "Automated migration complete. Please review changes and:" +echo "1. Manually add default_outcome to all PipeCondition pipes" +echo "2. Tag image inputs in PipeLLM prompts" +echo "3. Remove nb_steps from PipeImgGen if present" +echo "4. Run 'make validate' to check for errors" +``` + +**Note:** +- macOS: Use `sed -i ''` (as shown above) +- Linux: Replace `sed -i ''` with `sed -i` +- Windows: Use Git Bash, WSL, or the PowerShell script below + +### Windows PowerShell Migration Script + +```powershell +# Find all .plx files and apply replacements +Get-ChildItem -Path . -Filter *.plx -Recurse | ForEach-Object { + $content = Get-Content $_.FullName -Raw + $content = $content -replace 'definition = "', 'description = "' + $content = $content -replace 'type = "PipeJinja2"', 'type = "PipeCompose"' + $content = $content -replace 'type = "PipeOCR"', 'type = "PipeExtract"' + $content = $content -replace 'prompt_template = ', 'prompt = ' + $content = $content -replace 'jinja2 = ', 'template = ' + $content = $content -replace 'jinja2_name = ', 'template_name = ' + $content = $content -replace 'ocr_model = ', 'model = ' + $content = $content -replace '\[pipe\.([^.]+)\.pipe_map\]', '[pipe.$1.outcomes]' + $content = $content -replace 'default_pipe_code = ', 'default_outcome = ' + Set-Content -Path $_.FullName -Value $content -NoNewline +} + +# Find all .toml files in .pipelex and apply replacements +Get-ChildItem -Path .pipelex -Filter *.toml -Recurse | ForEach-Object { + $content = Get-Content $_.FullName -Raw + $content = $content -replace 'llm_handle = ', 'model = ' + $content = $content -replace 'img_gen_handle = ', 'model = ' + $content = $content -replace 'ocr_handle = ', 'model = ' + $content = $content -replace '\[presets\.ocr\]', '[presets.extract]' + $content = $content -replace 'base_ocr_pypdfium2', 'base_extract_pypdfium2' + $content = $content -replace 'base_ocr_mistral', 'base_extract_mistral' + $content = $content -replace 'ocr_config', 'extract_config' + $content = $content -replace 'is_auto_setup_preset_ocr', 'is_auto_setup_preset_extract' + $content = $content -replace 'nb_ocr_pages', 'nb_extract_pages' + Set-Content -Path $_.FullName -Value $content -NoNewline +} + +# Find all test files and update markers +Get-ChildItem -Path tests -Filter *.py -Recurse | ForEach-Object { + $content = Get-Content $_.FullName -Raw + $content = $content -replace '@pytest\.mark\.ocr', '@pytest.mark.extract' + Set-Content -Path $_.FullName -Value $content -NoNewline +} + +Write-Host "Automated migration complete. Please review changes and:" +Write-Host "1. Manually add default_outcome to all PipeCondition pipes" +Write-Host "2. Tag image inputs in PipeLLM prompts" +Write-Host "3. Remove nb_steps from PipeImgGen if present" +Write-Host "4. Run 'make validate' to check for errors" +``` + +## 12. Additional Resources + +- See AGENTS.md for complete documentation of the current syntax +- Run `make validate` frequently to catch issues early +- Check the test files in `tests/test_pipelines/` for examples of the new syntax + +## Support + +If you encounter issues during migration: +1. Check that all old field names have been replaced +2. Run `make validate` to see specific error messages +3. Review the examples in AGENTS.md +4. Check that required fields like `default_outcome` are present + From a62d41e20f03b2077f8c395360476ca0ba157898 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 10:38:17 +0200 Subject: [PATCH 020/115] All templating going through jinaj2, proper separation from template preprocessor --- .../content_generation/templating_generate.py | 17 ++----- pipelex/cogt/llm/llm_prompt_template.py | 49 +++++++++---------- pipelex/cogt/templating/template_rendering.py | 21 ++++++++ pipelex/pipelex.toml | 12 +++-- pipelex/tools/jinja2/jinja2_rendering.py | 16 +----- tests/cases/__init__.py | 2 +- .../{templates.py => jinja2_templates.py} | 0 .../{tools => cogt}/templating/__init__.py | 0 .../templating/test_template_preprocessor.py | 0 .../tools/{templating => }/test_jinja2.py | 0 10 files changed, 59 insertions(+), 58 deletions(-) create mode 100644 pipelex/cogt/templating/template_rendering.py rename tests/cases/{templates.py => jinja2_templates.py} (100%) rename tests/unit/pipelex/{tools => cogt}/templating/__init__.py (100%) rename tests/unit/pipelex/{tools => cogt}/templating/test_template_preprocessor.py (100%) rename tests/unit/pipelex/tools/{templating => }/test_jinja2.py (100%) diff --git a/pipelex/cogt/content_generation/templating_generate.py b/pipelex/cogt/content_generation/templating_generate.py index 8feaaa46b..ddec36a4e 100644 --- a/pipelex/cogt/content_generation/templating_generate.py +++ b/pipelex/cogt/content_generation/templating_generate.py @@ -1,19 +1,12 @@ from pipelex.cogt.content_generation.assignment_models import TemplatingAssignment -from pipelex.cogt.templating.template_preprocessor import preprocess_template -from pipelex.tools.jinja2.jinja2_parsing import check_jinja2_parsing -from pipelex.tools.jinja2.jinja2_rendering import render_jinja2 +from pipelex.cogt.templating.template_rendering import render_template async def templating_gen_text(templating_assignment: TemplatingAssignment) -> str: - # Intermediate call to preprocess the template with our syntax patterns (@, $, @?, etc.) - if templating_assignment.template: - templating_assignment.template = preprocess_template(template=templating_assignment.template) - check_jinja2_parsing(templating_assignment.template) - - templated_text: str = await render_jinja2( - template_category=templating_assignment.category, - temlating_context=templating_assignment.context, - template_source=templating_assignment.template, + templated_text: str = await render_template( + template=templating_assignment.template, + category=templating_assignment.category, + context=templating_assignment.context, templating_style=templating_assignment.templating_style, ) diff --git a/pipelex/cogt/llm/llm_prompt_template.py b/pipelex/cogt/llm/llm_prompt_template.py index c46c6bdc2..450b3fdf1 100644 --- a/pipelex/cogt/llm/llm_prompt_template.py +++ b/pipelex/cogt/llm/llm_prompt_template.py @@ -3,12 +3,15 @@ from typing_extensions import override from pipelex import log -from pipelex.cogt.exceptions import LLMPromptFactoryError, LLMPromptTemplateInputsError +from pipelex.cogt.exceptions import LLMPromptTemplateInputsError from pipelex.cogt.image.prompt_image import PromptImage from pipelex.cogt.llm.llm_prompt import LLMPrompt from pipelex.cogt.llm.llm_prompt_factory_abstract import LLMPromptFactoryAbstract from pipelex.cogt.llm.llm_prompt_template_inputs import LLMPromptTemplateInputs +from pipelex.cogt.templating.template_category import TemplateCategory +from pipelex.cogt.templating.templating_style import TagStyle, TemplatingStyle, TextFormat from pipelex.config import get_config +from pipelex.hub import get_content_generator from pipelex.tools.misc.string_utils import is_none_or_has_text @@ -24,10 +27,6 @@ class LLMPromptTemplate(LLMPromptFactoryAbstract): proto_prompt: LLMPrompt = make_empty_prompt() base_template_inputs: LLMPromptTemplateInputs = LLMPromptTemplateInputs() - # fields kept for reference and debugging only (they have no effect) - source_system_template_name: str | None = None - source_user_template_name: str | None = None - @override async def make_llm_prompt_from_args( self, @@ -48,7 +47,7 @@ async def make_llm_prompt_from_args( user_images = [arguments_dict.pop("user_image")] is_user_images_append: bool | None = arguments_dict.pop("is_user_images_append", None) - return self._make_llm_prompt( + return await self._make_llm_prompt( system_text=system_text, user_text=user_text, user_images=user_images, @@ -56,7 +55,7 @@ async def make_llm_prompt_from_args( template_inputs=LLMPromptTemplateInputs(root=arguments_dict), ) - def _make_llm_prompt( + async def _make_llm_prompt( self, system_text: str | None = None, user_text: str | None = None, @@ -91,25 +90,25 @@ def _make_llm_prompt( # input variables can be applied to prompt texts used as templates if llm_prompt.system_text: - try: - # TODO: use jinja2 templating here - llm_prompt.system_text = llm_prompt.system_text.format(**all_template_inputs.root) - except KeyError as exc: - error_msg = f"Could not apply inputs to system_text. KeyError = {exc}. system_text = '{llm_prompt.system_text}'" - if template_name := self.source_system_template_name: - error_msg = f"Error using template named '{template_name}': {error_msg}.\n\n Available inputs: {all_template_inputs.list_keys()}" - log.error(error_msg) - raise LLMPromptFactoryError(message=error_msg) from exc + llm_prompt.system_text = await get_content_generator().make_templated_text( + context=all_template_inputs.root, + template=llm_prompt.system_text, + templating_style=TemplatingStyle( + tag_style=TagStyle.XML, + text_format=TextFormat.MARKDOWN, + ), + template_category=TemplateCategory.LLM_PROMPT, + ) if llm_prompt.user_text: - try: - # TODO: use jinja2 templating here - llm_prompt.user_text = llm_prompt.user_text.format(**all_template_inputs.root) - except KeyError as exc: - error_msg = f"Could not apply inputs to user_text. KeyError = {exc}. user_text = '{llm_prompt.user_text}'" - if template_name := self.source_user_template_name: - error_msg = f"Error using template named '{template_name}': {error_msg}.\n\n Available inputs: {all_template_inputs.list_keys()}" - log.error(error_msg) - raise LLMPromptFactoryError(message=error_msg) from exc + llm_prompt.user_text = await get_content_generator().make_templated_text( + context=all_template_inputs.root, + template=llm_prompt.user_text, + templating_style=TemplatingStyle( + tag_style=TagStyle.XML, + text_format=TextFormat.MARKDOWN, + ), + template_category=TemplateCategory.LLM_PROMPT, + ) return llm_prompt diff --git a/pipelex/cogt/templating/template_rendering.py b/pipelex/cogt/templating/template_rendering.py new file mode 100644 index 000000000..0fc02f47e --- /dev/null +++ b/pipelex/cogt/templating/template_rendering.py @@ -0,0 +1,21 @@ +from typing import Any + +from pipelex.cogt.templating.template_category import TemplateCategory +from pipelex.cogt.templating.template_preprocessor import preprocess_template +from pipelex.cogt.templating.templating_style import TemplatingStyle +from pipelex.tools.jinja2.jinja2_rendering import render_jinja2 + + +async def render_template( + template: str, + category: TemplateCategory, + context: dict[str, Any], + templating_style: TemplatingStyle | None = None, +) -> str: + template_source = preprocess_template(template) + return await render_jinja2( + template_source=template_source, + template_category=category, + temlating_context=context, + templating_style=templating_style, + ) diff --git a/pipelex/pipelex.toml b/pipelex/pipelex.toml index bfc75a161..9c295053a 100644 --- a/pipelex/pipelex.toml +++ b/pipelex/pipelex.toml @@ -107,10 +107,9 @@ You are a data modeling expert specialized in extracting structure from text. """ structure_from_preliminary_text_user = """ -Your job is to extract and structure information from a text. -Here is the text: +Extract and structure information from this text: -{preliminary_text} +{{ preliminary_text | tag }} Now generate the JSON in the required format. Do not create information that is not in the text. @@ -118,17 +117,20 @@ Do not create information that is not in the text. output_structure_prompt = """ + --- The instance we want to generate will be for the following class: {{ class_structure_str }} Don't bother with JSON formatting, we'll do that as a second step. For now, just output markdown with the details of the instance. -DO NOT create information. -If some information is not present for an attribute, output the default value or None according to the attribute definition. +DO NOT create new information. +If some information is not present for an attribute, output the default value or None according to the field definition. """ output_structure_prompt_no_preliminary_text = """ + + --- The instance we want to generate will be for the following class: {{ class_structure_str }} diff --git a/pipelex/tools/jinja2/jinja2_rendering.py b/pipelex/tools/jinja2/jinja2_rendering.py index ad6fbb9a2..5d984150f 100644 --- a/pipelex/tools/jinja2/jinja2_rendering.py +++ b/pipelex/tools/jinja2/jinja2_rendering.py @@ -1,15 +1,12 @@ from typing import Any -from jinja2 import Template, meta from jinja2.exceptions import ( TemplateAssertionError, TemplateSyntaxError, UndefinedError, ) -from pipelex import log from pipelex.cogt.templating.template_category import TemplateCategory -from pipelex.cogt.templating.template_preprocessor import preprocess_template from pipelex.cogt.templating.templating_style import TemplatingStyle from pipelex.tools.jinja2.jinja2_environment import make_jinja2_env_without_loader from pipelex.tools.jinja2.jinja2_errors import ( @@ -18,7 +15,6 @@ Jinja2TemplateRenderError, ) from pipelex.tools.jinja2.jinja2_models import Jinja2ContextKey -from pipelex.tools.jinja2.jinja2_parsing import check_jinja2_parsing def _add_to_templating_context(temlating_context: dict[str, Any], jinja2_context_key: Jinja2ContextKey, value: Any) -> None: @@ -38,23 +34,13 @@ async def render_jinja2( template_category=template_category, ) - template: Template try: template = jinja2_env.from_string(template_source) except TemplateAssertionError as exc: msg = f"Jinja2 render error: '{exc}', template_source:\n{template_source}" raise Jinja2TemplateRenderError(msg) from exc - template_source = preprocess_template(template_source) - check_jinja2_parsing( - template_source=template_source, - template_category=template_category, - ) - parsed_ast = jinja2_env.parse(template_source) - if undeclared_variables := meta.find_undeclared_variables(parsed_ast): - undeclared_variables.discard("preliminary_text") - if undeclared_variables: - log.verbose(undeclared_variables, "Jinja2 undeclared_variables") + # Create a copy to avoid mutating the caller's original dictionary temlating_context = temlating_context.copy() if templating_style: _add_to_templating_context( diff --git a/tests/cases/__init__.py b/tests/cases/__init__.py index 135dbd61f..4c16db2fb 100644 --- a/tests/cases/__init__.py +++ b/tests/cases/__init__.py @@ -6,8 +6,8 @@ from .documents import PDFTestCases from .images import ImageTestCases +from .jinja2_templates import JINJA2TestCases from .registry import ClassRegistryTestCases, FileHelperTestCases, Fruit -from .templates import JINJA2TestCases from .urls import TestURLs __all__ = [ diff --git a/tests/cases/templates.py b/tests/cases/jinja2_templates.py similarity index 100% rename from tests/cases/templates.py rename to tests/cases/jinja2_templates.py diff --git a/tests/unit/pipelex/tools/templating/__init__.py b/tests/unit/pipelex/cogt/templating/__init__.py similarity index 100% rename from tests/unit/pipelex/tools/templating/__init__.py rename to tests/unit/pipelex/cogt/templating/__init__.py diff --git a/tests/unit/pipelex/tools/templating/test_template_preprocessor.py b/tests/unit/pipelex/cogt/templating/test_template_preprocessor.py similarity index 100% rename from tests/unit/pipelex/tools/templating/test_template_preprocessor.py rename to tests/unit/pipelex/cogt/templating/test_template_preprocessor.py diff --git a/tests/unit/pipelex/tools/templating/test_jinja2.py b/tests/unit/pipelex/tools/test_jinja2.py similarity index 100% rename from tests/unit/pipelex/tools/templating/test_jinja2.py rename to tests/unit/pipelex/tools/test_jinja2.py From 7000cda188d6bac68180531e1a79fa999daac4fc Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 11:03:30 +0200 Subject: [PATCH 021/115] stop using toml directly in config manager --- pipelex/tools/config/manager.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/pipelex/tools/config/manager.py b/pipelex/tools/config/manager.py index 578356e84..c3cfb4403 100644 --- a/pipelex/tools/config/manager.py +++ b/pipelex/tools/config/manager.py @@ -3,8 +3,6 @@ from configparser import ConfigParser from typing import Any -import toml - from pipelex.tools.config.config_root import ( CONFIG_BASE_OVERRIDES_AFTER_ENV, CONFIG_BASE_OVERRIDES_BEFORE_ENV, @@ -98,7 +96,7 @@ def _find_package_path(package_name: str) -> str | None: return None - pyproject = toml.load(pyproject_path) + pyproject = load_toml_from_path(path=pyproject_path) if "tool" in pyproject and "pipelex" in pyproject["tool"] and "config_inheritance" in pyproject["tool"]["pipelex"]: for config_name in pyproject["tool"]["pipelex"]["config_inheritance"]: package_path = _find_package_path(config_name) @@ -179,30 +177,16 @@ def get_project_name(self) -> str | None: """ # First check pipelex's pyproject.toml pipelex_pyproject_path = os.path.join(os.path.dirname(self.local_root_dir), "pyproject.toml") - try: - pyproject = toml.load(pipelex_pyproject_path) - if (project_name := pyproject.get("project", {}).get("name")) and isinstance(project_name, str): + if pipelex_pyproject := load_toml_from_path_if_exists(path=pipelex_pyproject_path): + if (project_name := pipelex_pyproject.get("project", {}).get("name")) and isinstance(project_name, str): return str(project_name) - except FileNotFoundError: - pass - except toml.TomlDecodeError as exc: - print(f"Failed to parse pipelex pyproject.toml at {pipelex_pyproject_path}: {exc}") - except (KeyError, TypeError, AttributeError) as exc: - print(f"Invalid structure in pipelex pyproject.toml at {pipelex_pyproject_path}: {exc}") # Check local pyproject.toml - pyproject_path = os.path.join(self.local_root_dir, "pyproject.toml") - try: - pyproject = toml.load(pyproject_path) - name_obj: object = pyproject.get("project", {}).get("name") or pyproject.get("tool", {}).get("poetry", {}).get("name") + local_pyproject_path = os.path.join(self.local_root_dir, "pyproject.toml") + if local_pyproject := load_toml_from_path_if_exists(local_pyproject_path): + name_obj: object = local_pyproject.get("project", {}).get("name") or local_pyproject.get("tool", {}).get("poetry", {}).get("name") if isinstance(name_obj, str): return name_obj - except FileNotFoundError as exc: - print(f"Local pyproject.toml not found at {pyproject_path}: {exc}") - except toml.TomlDecodeError as exc: - print(f"Failed to parse local pyproject.toml at {pyproject_path}: {exc}") - except (KeyError, TypeError, AttributeError) as exc: - print(f"Invalid structure in local pyproject.toml at {pyproject_path}: {exc}") # Check setup.cfg setup_cfg_path = os.path.join(self.local_root_dir, "setup.cfg") From 73406de00a386b7a03ed335139cf34f45f97c813 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 11:13:23 +0200 Subject: [PATCH 022/115] stop using toml directly in interpreter.py --- pipelex/core/interpreter.py | 12 ++++++------ pipelex/tools/misc/toml_utils.py | 23 ++++++++++++++++++++--- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/pipelex/core/interpreter.py b/pipelex/core/interpreter.py index 0912f8ec0..de9729387 100644 --- a/pipelex/core/interpreter.py +++ b/pipelex/core/interpreter.py @@ -1,7 +1,6 @@ from pathlib import Path from typing import Any -import toml from pydantic import BaseModel, model_validator from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint @@ -9,10 +8,11 @@ PipelexConfigurationError, ) from pipelex.tools.misc.file_utils import load_text_from_path +from pipelex.tools.misc.toml_utils import TomlError, load_toml_from_content from pipelex.types import Self -class PLXDecodeError(toml.TomlDecodeError): +class PLXDecodeError(TomlError): """Raised when PLX decoding fails.""" @@ -92,11 +92,11 @@ def is_pipelex_file(file_path: Path) -> bool: def _parse_plx_content(self, content: str) -> dict[str, Any]: """Parse PLX content and return the dictionary.""" try: - return toml.loads(content) - except toml.TomlDecodeError as exc: - file_path_str = str(self.file_path) if self.file_path else "content" + return load_toml_from_content(content=content) + except TomlError as exc: + file_path_str = str(self.file_path) if self.file_path else "string content" msg = f"PLX parsing error in '{file_path_str}': {exc}" - raise PLXDecodeError(msg, exc.doc, exc.pos) from exc + raise PLXDecodeError(message=msg, doc=exc.doc, pos=exc.pos) from exc def make_pipelex_bundle_blueprint(self) -> PipelexBundleBlueprint: """Make a PipelexBundleBlueprint from the file_path or file_content""" diff --git a/pipelex/tools/misc/toml_utils.py b/pipelex/tools/misc/toml_utils.py index b01ea4515..8e26286c8 100644 --- a/pipelex/tools/misc/toml_utils.py +++ b/pipelex/tools/misc/toml_utils.py @@ -4,9 +4,26 @@ import toml +from pipelex.tools.exceptions import ToolException from pipelex.tools.misc.file_utils import path_exists +class TomlError(ToolException): + def __init__(self, message: str, doc: str | None = None, pos: int | None = None): + super().__init__(message) + self.doc = doc + self.pos = pos + + +def load_toml_from_content(content: str) -> dict[str, Any]: + """Load TOML from content.""" + try: + return toml.loads(content) + except toml.TomlDecodeError as exc: + msg = f"TOML parsing error in content: {exc}" + raise TomlError(message=msg, doc=exc.doc, pos=exc.pos) from exc + + def load_toml_from_path(path: str) -> dict[str, Any]: """Load TOML from path. @@ -25,10 +42,10 @@ def load_toml_from_path(path: str) -> dict[str, Any]: content = file.read() # Parse TOML first - return toml.loads(content) - except toml.TomlDecodeError as exc: + return load_toml_from_content(content) + except TomlError as exc: msg = f"TOML parsing error in file '{path}': {exc}" - raise toml.TomlDecodeError(msg, exc.doc, exc.pos) from exc + raise TomlError(message=msg, doc=exc.doc, pos=exc.pos) from exc def load_toml_from_path_if_exists(path: str) -> dict[str, Any] | None: From 7224dc30002c101b781219387a2a31bc92c9dd82 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 11:47:38 +0200 Subject: [PATCH 023/115] Use tomli instead of toml package --- pipelex/core/interpreter.py | 39 +- pipelex/tools/misc/toml_utils.py | 30 +- pyproject.toml | 2 +- .../core/test_data/errors/invalid_plx.py | 2 +- uv.lock | 379 +++++++++--------- 5 files changed, 221 insertions(+), 231 deletions(-) diff --git a/pipelex/core/interpreter.py b/pipelex/core/interpreter.py index de9729387..146bfad42 100644 --- a/pipelex/core/interpreter.py +++ b/pipelex/core/interpreter.py @@ -1,5 +1,4 @@ from pathlib import Path -from typing import Any from pydantic import BaseModel, model_validator @@ -7,8 +6,7 @@ from pipelex.core.exceptions import ( PipelexConfigurationError, ) -from pipelex.tools.misc.file_utils import load_text_from_path -from pipelex.tools.misc.toml_utils import TomlError, load_toml_from_content +from pipelex.tools.misc.toml_utils import TomlError, load_toml_from_content, load_toml_from_path from pipelex.types import Self @@ -44,16 +42,6 @@ def check_file_path_or_file_content(self) -> Self: raise PipelexConfigurationError(msg) return self - def _get_content(self) -> str: - """Load PLX content from file_path or use file_content directly.""" - if self.file_path: - return load_text_from_path(path=str(self.file_path)) - elif self.file_content: - return self.file_content - else: - msg = "file_content must be provided if file_path is not provided" - raise PipelexConfigurationError(msg) - @staticmethod def is_pipelex_file(file_path: Path) -> bool: """Check if a file is a valid Pipelex PLX file. @@ -89,19 +77,18 @@ def is_pipelex_file(file_path: Path) -> bool: # If we can't read the file, it's not a valid Pipelex file return False - def _parse_plx_content(self, content: str) -> dict[str, Any]: - """Parse PLX content and return the dictionary.""" - try: - return load_toml_from_content(content=content) - except TomlError as exc: - file_path_str = str(self.file_path) if self.file_path else "string content" - msg = f"PLX parsing error in '{file_path_str}': {exc}" - raise PLXDecodeError(message=msg, doc=exc.doc, pos=exc.pos) from exc - def make_pipelex_bundle_blueprint(self) -> PipelexBundleBlueprint: """Make a PipelexBundleBlueprint from the file_path or file_content""" - file_content = self._get_content() - blueprint_dict = self._parse_plx_content(file_content) - if self.file_path: - blueprint_dict.update(source=str(self.file_path)) + # Load PLX content from file_path or use file_content directly. + try: + if self.file_path: + blueprint_dict = load_toml_from_path(path=str(self.file_path)) + blueprint_dict.update(source=str(self.file_path)) + elif self.file_content: + blueprint_dict = load_toml_from_content(content=self.file_content) + else: + msg = "Could not make PipelexBundleBlueprint: either file_path or file_content must be provided" + raise PipelexConfigurationError(msg) + except TomlError as exc: + raise PLXDecodeError(message=exc.message, doc=exc.doc, pos=exc.pos, lineno=exc.lineno, colno=exc.colno) from exc return PipelexBundleBlueprint.model_validate(blueprint_dict) diff --git a/pipelex/tools/misc/toml_utils.py b/pipelex/tools/misc/toml_utils.py index 8e26286c8..e525bb02d 100644 --- a/pipelex/tools/misc/toml_utils.py +++ b/pipelex/tools/misc/toml_utils.py @@ -2,26 +2,31 @@ from typing import Any -import toml +import tomli from pipelex.tools.exceptions import ToolException from pipelex.tools.misc.file_utils import path_exists class TomlError(ToolException): - def __init__(self, message: str, doc: str | None = None, pos: int | None = None): + def __init__(self, message: str, doc: str, pos: int, lineno: int, colno: int): super().__init__(message) self.doc = doc self.pos = pos + self.lineno = lineno + self.colno = colno + + @classmethod + def from_tomli_error(cls, exc: tomli.TOMLDecodeError) -> TomlError: + return cls(message=exc.msg, doc=exc.doc, pos=exc.pos, lineno=exc.lineno, colno=exc.colno) def load_toml_from_content(content: str) -> dict[str, Any]: """Load TOML from content.""" try: - return toml.loads(content) - except toml.TomlDecodeError as exc: - msg = f"TOML parsing error in content: {exc}" - raise TomlError(message=msg, doc=exc.doc, pos=exc.pos) from exc + return tomli.loads(content) + except tomli.TOMLDecodeError as exc: + raise TomlError.from_tomli_error(exc) from exc def load_toml_from_path(path: str) -> dict[str, Any]: @@ -38,14 +43,11 @@ def load_toml_from_path(path: str) -> dict[str, Any]: """ try: - with open(path, encoding="utf-8") as file: - content = file.read() - - # Parse TOML first - return load_toml_from_content(content) - except TomlError as exc: - msg = f"TOML parsing error in file '{path}': {exc}" - raise TomlError(message=msg, doc=exc.doc, pos=exc.pos) from exc + with open(path, "rb") as f: + return tomli.load(f) + except tomli.TOMLDecodeError as exc: + msg = f"TOML parsing error in file '{path}': {exc.msg}" + raise TomlError(message=msg, doc=exc.doc, pos=exc.pos, lineno=exc.lineno, colno=exc.colno) from exc def load_toml_from_path_if_exists(path: str) -> dict[str, Any] | None: diff --git a/pyproject.toml b/pyproject.toml index b54926cb7..8e1296a3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "PyYAML>=6.0.2", "rich>=13.8.1", "shortuuid>=1.0.13", - "toml>=0.10.2", + "tomli>=2.3.0", "tomlkit>=0.13.2", "typer>=0.16.0", "typing-extensions>=4.13.2", diff --git a/tests/unit/pipelex/core/test_data/errors/invalid_plx.py b/tests/unit/pipelex/core/test_data/errors/invalid_plx.py index fa64ecd34..5d8ae047c 100644 --- a/tests/unit/pipelex/core/test_data/errors/invalid_plx.py +++ b/tests/unit/pipelex/core/test_data/errors/invalid_plx.py @@ -188,7 +188,7 @@ [concept] TestConcept = ["Unclosed array" """, - ValidationError, + PLXDecodeError, ) INVALID_ARRAY_SYNTAX2 = ( "invalid_array_syntax", diff --git a/uv.lock b/uv.lock index 6663e9aee..6c92aea93 100644 --- a/uv.lock +++ b/uv.lock @@ -47,11 +47,11 @@ boto3 = [ [[package]] name = "aiofiles" -version = "24.1.0" +version = "25.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/03/a88171e277e8caa88a4c77808c20ebb04ba74cc4681bf1e9416c862de237/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c", size = 30247, upload-time = "2024-06-24T11:02:03.584Z" } +sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/45/30bb92d442636f570cb5651bc661f52b610e2eec3f891a5dc3a4c3667db0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5", size = 15896, upload-time = "2024-06-24T11:02:01.529Z" }, + { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" }, ] [[package]] @@ -335,16 +335,16 @@ wheels = [ [[package]] name = "boto3-stubs" -version = "1.40.47" +version = "1.40.50" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore-stubs" }, { name = "types-s3transfer" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c8/57/94842f8f5cdf79d1df73423e4e8eac63c42d7a997bc3e59d2d737aa91922/boto3_stubs-1.40.47.tar.gz", hash = "sha256:643ee24bf8b7651130e99e152380566bd34ab35d102f594f3336ba61cc9e7aec", size = 100830, upload-time = "2025-10-07T19:42:28.533Z" } +sdist = { url = "https://files.pythonhosted.org/packages/35/c8/06584145c4ccc80e3297a97874bfaa43e6b2fb9f8a69bcc38e29a1457bf5/boto3_stubs-1.40.50.tar.gz", hash = "sha256:29828adfcb8629b5e285468eb89610f1fc71f964ad0913de3049a0a9d5de0be1", size = 100836, upload-time = "2025-10-10T20:32:34.867Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c4/1e/95ae2cdde2a9d409070533e6acf84a5dfffbe1823781f626618747a49504/boto3_stubs-1.40.47-py3-none-any.whl", hash = "sha256:77b0caa066ff407eaf7a8fced06b6a9d929f970c0b166e15dc09af17e683b3b7", size = 69688, upload-time = "2025-10-07T19:42:23.099Z" }, + { url = "https://files.pythonhosted.org/packages/7b/69/f18c7135dc8a2b74e21b4a2375fa455e4d9e7e47f7838bc175d52005054a/boto3_stubs-1.40.50-py3-none-any.whl", hash = "sha256:01b9c67df62f26371a4a7473c616eece988a5305e7f7cb3fbc014d178685ac4e", size = 69689, upload-time = "2025-10-10T20:32:25.77Z" }, ] [[package]] @@ -363,14 +363,14 @@ wheels = [ [[package]] name = "botocore-stubs" -version = "1.40.33" +version = "1.40.50" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "types-awscrt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ae/94/16f8e1f41feaa38f1350aa5a4c60c5724b6c8524ca0e6c28523bf5070e74/botocore_stubs-1.40.33.tar.gz", hash = "sha256:89c51ae0b28d9d79fde8c497cf908ddf872ce027d2737d4d4ba473fde9cdaa82", size = 42742, upload-time = "2025-09-17T20:25:56.388Z" } +sdist = { url = "https://files.pythonhosted.org/packages/20/4b/86ad2d24ea36eed159c8e1f85a2645bfeedae34ccb8c77ea8c99abbd66d1/botocore_stubs-1.40.50.tar.gz", hash = "sha256:d772b2d3aea6b4e464963fe45b2d504eee7bc3842f047cebbae5492b3993e0fd", size = 42250, upload-time = "2025-10-11T23:08:59.925Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/7b/6d8fe12a955b16094460e89ea7c4e063f131f4b3bd461b96bcd625d0c79e/botocore_stubs-1.40.33-py3-none-any.whl", hash = "sha256:ad21fee32cbdc7ad4730f29baf88424c7086bf88a745f8e43660ca3e9a7e5f89", size = 66843, upload-time = "2025-09-17T20:25:54.052Z" }, + { url = "https://files.pythonhosted.org/packages/b9/c1/4a736155b2d5dd7fdd09af8fba9ed59693c565d6e2bc1b5adc769da36cb5/botocore_stubs-1.40.50-py3-none-any.whl", hash = "sha256:7cb8d636e061e600929cd03339c3bbc162c21435b4bfeb6413cf7b0b612e7de0", size = 66541, upload-time = "2025-10-11T23:08:57.678Z" }, ] [[package]] @@ -1023,7 +1023,7 @@ wheels = [ [[package]] name = "google-genai" -version = "1.41.0" +version = "1.43.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1035,9 +1035,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/8b/ee20bcf707769b3b0e1106c3b5c811507736af7e8a60f29a70af1750ba19/google_genai-1.41.0.tar.gz", hash = "sha256:134f861bb0ace4e34af0501ecb75ceee15f7662fd8120698cd185e8cb39f2800", size = 245812, upload-time = "2025-10-02T22:30:29.699Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c1/75/992ca4462682949750709678b8efbc865222c9a16cf34504b69c5459606c/google_genai-1.43.0.tar.gz", hash = "sha256:84eb219d320759c5882bc2cdb4e2ac84544d00f5d12c7892c79fb03d71bfc9a4", size = 236132, upload-time = "2025-10-10T23:16:40.131Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/14/e5e8fbca8863fee718208566c4e927b8e9f45fd46ec5cf89e24759da545b/google_genai-1.41.0-py3-none-any.whl", hash = "sha256:111a3ee64c1a0927d3879faddb368234594432479a40c311e5fe4db338ca8778", size = 245931, upload-time = "2025-10-02T22:30:27.885Z" }, + { url = "https://files.pythonhosted.org/packages/61/85/e90dda488d5044e6e4cd1b49e7e7f0cc7f4a2a1c8004e88a5122d42ea024/google_genai-1.43.0-py3-none-any.whl", hash = "sha256:be1d4b1acab268125d536fd81b73c38694a70cb08266759089154718924434fd", size = 236733, upload-time = "2025-10-10T23:16:38.809Z" }, ] [[package]] @@ -1079,11 +1079,11 @@ wheels = [ [[package]] name = "httpx-sse" -version = "0.4.2" +version = "0.4.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/63/7a/280d644f906f077e4f4a6d327e9b6e5a936624395ad1bf6ee9165a9d9959/httpx_sse-0.4.2.tar.gz", hash = "sha256:5bb6a2771a51e6c7a5f5c645e40b8a5f57d8de708f46cb5f3868043c3c18124e", size = 16000, upload-time = "2025-10-07T08:10:05.219Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4f/e5/ec31165492ecc52426370b9005e0637d6da02f9579283298affcb1ab614d/httpx_sse-0.4.2-py3-none-any.whl", hash = "sha256:a9fa4afacb293fa50ef9bacb6cae8287ba5fd1f4b1c2d10a35bb981c41da31ab", size = 9018, upload-time = "2025-10-07T08:10:04.257Z" }, + { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, ] [[package]] @@ -2164,7 +2164,7 @@ dependencies = [ { name = "pyyaml" }, { name = "rich" }, { name = "shortuuid" }, - { name = "toml" }, + { name = "tomli" }, { name = "tomlkit" }, { name = "typer" }, { name = "typing-extensions" }, @@ -2271,7 +2271,7 @@ requires-dist = [ { name = "rich", specifier = ">=13.8.1" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.6.8" }, { name = "shortuuid", specifier = ">=1.0.13" }, - { name = "toml", specifier = ">=0.10.2" }, + { name = "tomli", specifier = ">=2.3.0" }, { name = "tomlkit", specifier = ">=0.13.2" }, { name = "typer", specifier = ">=0.16.0" }, { name = "types-aioboto3", extras = ["bedrock", "bedrock-runtime"], marker = "extra == 'dev'", specifier = ">=13.4.0" }, @@ -2290,11 +2290,11 @@ provides-extras = ["anthropic", "bedrock", "fal", "google", "google-genai", "mis [[package]] name = "platformdirs" -version = "4.4.0" +version = "4.5.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/23/e8/21db9c9987b0e728855bd57bff6984f67952bea55d6f75e055c46b5383e8/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf", size = 21634, upload-time = "2025-08-26T14:32:04.268Z" } +sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632, upload-time = "2025-10-08T17:44:48.791Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654, upload-time = "2025-08-26T14:32:02.735Z" }, + { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" }, ] [[package]] @@ -2321,116 +2321,116 @@ wheels = [ [[package]] name = "propcache" -version = "0.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ea/c8/d70cd26d845c6d85479d8f5a11a0fd7151e9bc4794cc5e6eb5a790f12df8/propcache-0.4.0.tar.gz", hash = "sha256:c1ad731253eb738f9cadd9fa1844e019576c70bca6a534252e97cf33a57da529", size = 45187, upload-time = "2025-10-04T21:57:39.546Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/7b/4bd85fea3dc58b6f246abf0e6c9e44adca26f6817e6c136780315d723b82/propcache-0.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:779aaae64089e2f4992e993faea801925395d26bb5de4a47df7ef7f942c14f80", size = 79437, upload-time = "2025-10-04T21:54:49.766Z" }, - { url = "https://files.pythonhosted.org/packages/e0/91/379ecc1ab37fe33648c7cb2d2252f58969adac1edcd6ec74682d7fb2d920/propcache-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:566552ed9b003030745e5bc7b402b83cf3cecae1bade95262d78543741786db5", size = 45369, upload-time = "2025-10-04T21:54:51.688Z" }, - { url = "https://files.pythonhosted.org/packages/de/8e/2e002e59e359bbc6ababbb7da168226f93e0533429ea1e93989a7eedcb2a/propcache-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:944de70384c62d16d4a00c686b422aa75efbc67c4addaebefbb56475d1c16034", size = 47191, upload-time = "2025-10-04T21:54:52.915Z" }, - { url = "https://files.pythonhosted.org/packages/b5/9a/f56eef9932dc3cbc63df4716f09fbaefec7a475608b643842784a01351b6/propcache-0.4.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e878553543ece1f8006d0ba4d096b40290580db173bfb18e16158045b9371335", size = 201000, upload-time = "2025-10-04T21:54:54.556Z" }, - { url = "https://files.pythonhosted.org/packages/6e/84/e7ad1e09c13f0574dbad261441f6a7f1fb8cc1e2fcb23ec4d4b3e4c7dc67/propcache-0.4.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8659f995b19185179474b18de8755689e1f71e1334d05c14e1895caa4e409cf7", size = 209175, upload-time = "2025-10-04T21:54:55.996Z" }, - { url = "https://files.pythonhosted.org/packages/4a/74/0b785ac0fbb44a5a7c267efc409b7a62d7a03b17c6442ecb52fd29152314/propcache-0.4.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7aa8cc5c94e682dce91cb4d12d7b81c01641f4ef5b3b3dc53325d43f0e3b9f2e", size = 214874, upload-time = "2025-10-04T21:54:57.831Z" }, - { url = "https://files.pythonhosted.org/packages/b4/fd/e8d795def2b1d8dc1dc4731d36da1f6111d7c73212909e79462172d0434c/propcache-0.4.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da584d917a1a17f690fc726617fd2c3f3006ea959dae5bb07a5630f7b16f9f5f", size = 196686, upload-time = "2025-10-04T21:54:59.218Z" }, - { url = "https://files.pythonhosted.org/packages/79/c2/dc992c712c3a1bfaa11d13ff177dbdf9b8b272e7bd443601e37f35728338/propcache-0.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:892a072e5b19c3f324a4f8543c9f7e8fc2b0aa08579e46f69bdf0cfc1b440454", size = 192000, upload-time = "2025-10-04T21:55:00.645Z" }, - { url = "https://files.pythonhosted.org/packages/b3/de/bb108dbdfae594148b033ff283d9fa6e4b0906a99f2c03b98b526883149d/propcache-0.4.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c20d796210720455086ef3f85adc413d1e41d374742f9b439354f122bbc3b528", size = 190310, upload-time = "2025-10-04T21:55:02.107Z" }, - { url = "https://files.pythonhosted.org/packages/ba/7b/1bdb5d44ba4c87d270bcf11354950f8f7fbc9ace1fbe7745e683fcb57b5a/propcache-0.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:df7107a91126a495880576610ae989f19106e1900dd5218d08498391fa43b31d", size = 199646, upload-time = "2025-10-04T21:55:03.55Z" }, - { url = "https://files.pythonhosted.org/packages/e5/04/44beda877f779f49f5b8c0ff4817a62b5f90a2dfac1ec5311df15a9dfceb/propcache-0.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0b04ac2120c161416c866d0b6a4259e47e92231ff166b518cc0efb95777367c3", size = 200507, upload-time = "2025-10-04T21:55:04.914Z" }, - { url = "https://files.pythonhosted.org/packages/d4/62/a13ad0a63e06f3695fcaeaeeeb62e2cc685181a1248b23a2bc877c8b7111/propcache-0.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1e7fa29c71ffa8d6a37324258737d09475f84715a6e8c350f67f0bc8e5e44993", size = 192787, upload-time = "2025-10-04T21:55:06.385Z" }, - { url = "https://files.pythonhosted.org/packages/9b/07/386246b3b4a6b11208bcbf57580210fb8c923ab26759389fe594e5615cd7/propcache-0.4.0-cp310-cp310-win32.whl", hash = "sha256:01c0ebc172ca28e9d62876832befbf7f36080eee6ed9c9e00243de2a8089ad57", size = 38004, upload-time = "2025-10-04T21:55:07.692Z" }, - { url = "https://files.pythonhosted.org/packages/a4/f2/e1fcb9694f590bc443ae5044f982546bb01cbaa3cdf05286e9473a9874bf/propcache-0.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:84f847e64f4d1a232e50460eebc1196642ee9b4c983612f41cd2d44fd2fe7c71", size = 41516, upload-time = "2025-10-04T21:55:08.854Z" }, - { url = "https://files.pythonhosted.org/packages/15/f4/d211744d41d72fbb89d3ee53963c1dc26892c49f53ae3c49fbc15cfb2548/propcache-0.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:2166466a666a5bebc332cd209cad77d996fad925ca7e8a2a6310ba9e851ae641", size = 38122, upload-time = "2025-10-04T21:55:10.044Z" }, - { url = "https://files.pythonhosted.org/packages/f9/c4/72b8d41bdbae8aea9c25b869d7cdc3ab5f281f979d8aea30f4646ad12743/propcache-0.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6a6a36b94c09711d6397d79006ca47901539fbc602c853d794c39abd6a326549", size = 80035, upload-time = "2025-10-04T21:55:11.266Z" }, - { url = "https://files.pythonhosted.org/packages/e9/f8/f87115733e221408a363f3a9753419cf2d4be7a8a7ec9dc0788325cd23f1/propcache-0.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:da47070e1340a1639aca6b1c18fe1f1f3d8d64d3a1f9ddc67b94475f44cd40f3", size = 45622, upload-time = "2025-10-04T21:55:12.41Z" }, - { url = "https://files.pythonhosted.org/packages/5d/cc/391f883248faa2efdf6886bdb12ac8edf20eac0863770d8d925450d8cc76/propcache-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de536cf796abc5b58d11c0ad56580215d231d9554ea4bb6b8b1b3bed80aa3234", size = 47517, upload-time = "2025-10-04T21:55:13.819Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d2/5593b59999f42d1044c5ab5f238be1f9d537ab91b0c910727986d520a6e9/propcache-0.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5c82af8e329c3cdc3e717dd3c7b2ff1a218b6de611f6ce76ee34967570a9de9", size = 214540, upload-time = "2025-10-04T21:55:15.206Z" }, - { url = "https://files.pythonhosted.org/packages/bb/5d/028cdc0eaa1a66ee2ec339a08b5e6ec15e7e71dac86103bebe53ba10dc0f/propcache-0.4.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:abe04e7aa5ab2e4056fcf3255ebee2071e4a427681f76d4729519e292c46ecc1", size = 221603, upload-time = "2025-10-04T21:55:16.704Z" }, - { url = "https://files.pythonhosted.org/packages/e8/f8/e30aee5f59ea21647faef9c82bd67fa510295c34908a7a38571def555881/propcache-0.4.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:075ca32384294434344760fdcb95f7833e1d7cf7c4e55f0e726358140179da35", size = 227749, upload-time = "2025-10-04T21:55:18.082Z" }, - { url = "https://files.pythonhosted.org/packages/d7/85/0757dfc73931bea63b18d26b2c5e7bf13113ca60fe0e5f19905f104bcf6a/propcache-0.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:626ec13592928b677f48ff5861040b604b635e93d8e2162fb638397ea83d07e8", size = 209792, upload-time = "2025-10-04T21:55:19.475Z" }, - { url = "https://files.pythonhosted.org/packages/d2/45/35a6a6241f46948c0ac2418d5bf50cfbcd9735739f42028a1c11e9066a72/propcache-0.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:02e071548b6a376e173b0102c3f55dc16e7d055b5307d487e844c320e38cacf2", size = 207979, upload-time = "2025-10-04T21:55:21.164Z" }, - { url = "https://files.pythonhosted.org/packages/e3/d1/5930396e75c9ed477958eac1496e6fb08794d823e9b14a459f1c0e20f338/propcache-0.4.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:2af6de831a26f42a3f94592964becd8d7f238551786d7525807f02e53defbd13", size = 201923, upload-time = "2025-10-04T21:55:22.5Z" }, - { url = "https://files.pythonhosted.org/packages/98/72/675455f22bcefeda16907461f9a9a4a93709ff2095e8cf799bdb6c78e030/propcache-0.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bd6c6dba1a3b8949e08c4280071c86e38cb602f02e0ed6659234108c7a7cd710", size = 212117, upload-time = "2025-10-04T21:55:23.858Z" }, - { url = "https://files.pythonhosted.org/packages/13/27/c533302ff80a49a848c3dbd01bb18f87b06826602b3b37043ff00d6b5005/propcache-0.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:783e91595cf9b66c2deda17f2e8748ae8591aa9f7c65dcab038872bfe83c5bb1", size = 216594, upload-time = "2025-10-04T21:55:25.169Z" }, - { url = "https://files.pythonhosted.org/packages/63/91/8250fbb601fd16c427e5f469132f27e175c6692dbfa784ef1266dc652e55/propcache-0.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c3f4b125285d354a627eb37f3ea7c13b8842c7c0d47783581d0df0e272dbf5f0", size = 204863, upload-time = "2025-10-04T21:55:26.511Z" }, - { url = "https://files.pythonhosted.org/packages/34/c4/fd945a9a25845aafb6094b9fa6a88286e4e1c55686e60172c60fe669e0d1/propcache-0.4.0-cp311-cp311-win32.whl", hash = "sha256:71c45f02ffbb8a21040ae816ceff7f6cd749ffac29fc0f9daa42dc1a9652d577", size = 37948, upload-time = "2025-10-04T21:55:27.719Z" }, - { url = "https://files.pythonhosted.org/packages/42/02/f30e7304661ffe8d51ff4050e06765ac2df6d95cf23c999dfe5a0cd0eb4c/propcache-0.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:7d51f70f77950f8efafed4383865d3533eeee52d8a0dd1c35b65f24de41de4e0", size = 41511, upload-time = "2025-10-04T21:55:29.15Z" }, - { url = "https://files.pythonhosted.org/packages/a5/f2/edd329d86085438a1ba32cf4cf45fc982d18343bed1f16b218b516c3340d/propcache-0.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:858eaabd2191dd0da5272993ad08a748b5d3ae1aefabea8aee619b45c2af4a64", size = 37957, upload-time = "2025-10-04T21:55:30.31Z" }, - { url = "https://files.pythonhosted.org/packages/b3/cf/3f88344261d69f8021256f20e82e820c5df3aba96e5ba9b5fdd3685d3a9f/propcache-0.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:381c84a445efb8c9168f1393a5a7c566de22edc42bfe207a142fff919b37f5d9", size = 79846, upload-time = "2025-10-04T21:55:31.447Z" }, - { url = "https://files.pythonhosted.org/packages/be/fa/0286fc92764eead9dcfee639b67828daa32e61dd0f1618831547141eb28b/propcache-0.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5a531d29d7b873b12730972237c48b1a4e5980b98cf21b3f09fa4710abd3a8c3", size = 45850, upload-time = "2025-10-04T21:55:32.637Z" }, - { url = "https://files.pythonhosted.org/packages/c7/83/57840656f972f8a67992eee40781e4066657776dcb889f49df0e8eecb112/propcache-0.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cd6e22255ed73efeaaeb1765505a66a48a9ec9ebc919fce5ad490fe5e33b1555", size = 47171, upload-time = "2025-10-04T21:55:33.819Z" }, - { url = "https://files.pythonhosted.org/packages/9f/8e/e0a0bd376c3440476b924eca517589ee535bb4520420d178268bf88558ba/propcache-0.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9a8d277dc218ddf04ec243a53ac309b1afcebe297c0526a8f82320139b56289", size = 225306, upload-time = "2025-10-04T21:55:35.312Z" }, - { url = "https://files.pythonhosted.org/packages/84/fe/76884442da1bab6d4353ba1c43fdc4a770c3b3973f3ac7620a7205402fdd/propcache-0.4.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:399c73201d88c856a994916200d7cba41d7687096f8eb5139eb68f02785dc3f7", size = 230013, upload-time = "2025-10-04T21:55:37.005Z" }, - { url = "https://files.pythonhosted.org/packages/f4/b7/322af273bd1136bb7e13628821fb855c9f61d64651c73fea71dded68dda5/propcache-0.4.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a1d5e474d43c238035b74ecf997f655afa67f979bae591ac838bb3fbe3076392", size = 238331, upload-time = "2025-10-04T21:55:38.713Z" }, - { url = "https://files.pythonhosted.org/packages/84/5e/036d2b105927ae7f179346c9911d16c345f4dba5a19a063f23a8d28acfbd/propcache-0.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22f589652ee38de96aa58dd219335604e09666092bc250c1d9c26a55bcef9932", size = 221461, upload-time = "2025-10-04T21:55:40.034Z" }, - { url = "https://files.pythonhosted.org/packages/63/0d/babd038efb12a87a46ab070438c52daeac6bed0a930693a418feef8cb8a6/propcache-0.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5227da556b2939da6125cda1d5eecf9e412e58bc97b41e2f192605c3ccbb7c2", size = 216707, upload-time = "2025-10-04T21:55:41.455Z" }, - { url = "https://files.pythonhosted.org/packages/ab/68/dd075a037381581f16e7e504a6da9c1d7e415e945dd8ed67905d608f0687/propcache-0.4.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:92bc43a1ab852310721ce856f40a3a352254aa6f5e26f0fad870b31be45bba2e", size = 212591, upload-time = "2025-10-04T21:55:42.938Z" }, - { url = "https://files.pythonhosted.org/packages/ff/43/22698f28fc8e04c32b109cb9cb81305a4873b77c907b17484566b6133aef/propcache-0.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:83ae2f5343f6f06f4c91ae530d95f56b415f768f9c401a5ee2a10459cf74370b", size = 220188, upload-time = "2025-10-04T21:55:44.53Z" }, - { url = "https://files.pythonhosted.org/packages/96/7a/27886e4a4c69598a38fbeeed64f9b8ddfa6f08fe3452035845a1fe90336f/propcache-0.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:077a32977399dc05299b16e793210341a0b511eb0a86d1796873e83ce47334cc", size = 226736, upload-time = "2025-10-04T21:55:46.348Z" }, - { url = "https://files.pythonhosted.org/packages/5b/c7/313c632b5888db3c9f4cb262420dcd5e57cf858d939d6ad9c3b1b90c12af/propcache-0.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:94a278c45e6463031b5a8278e40a07edf2bcc3b5379510e22b6c1a6e6498c194", size = 216363, upload-time = "2025-10-04T21:55:47.768Z" }, - { url = "https://files.pythonhosted.org/packages/7a/5d/5aaf82bd1542aedb47d10483b84f49ee8f00d970a58e27534cd241e9c5ac/propcache-0.4.0-cp312-cp312-win32.whl", hash = "sha256:4c491462e1dc80f9deb93f428aad8d83bb286de212837f58eb48e75606e7726c", size = 37945, upload-time = "2025-10-04T21:55:49.104Z" }, - { url = "https://files.pythonhosted.org/packages/4c/67/47ffff6eb176f383f56319f31c0e1bcf7500cb94ffb7582efc600c6b3c73/propcache-0.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cdb0cecafb528ab15ed89cdfed183074d15912d046d3e304955513b50a34b907", size = 41530, upload-time = "2025-10-04T21:55:50.261Z" }, - { url = "https://files.pythonhosted.org/packages/f3/7e/61b70306b9d7527286ce887a8ff28c304ab2514e5893eea36b5bdf7a21af/propcache-0.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:b2f29697d1110e8cdf7a39cc630498df0082d7898b79b731c1c863f77c6e8cfc", size = 37662, upload-time = "2025-10-04T21:55:51.35Z" }, - { url = "https://files.pythonhosted.org/packages/cd/dd/f405b0fe84d29d356895bc048404d3321a2df849281cf3f932158c9346ac/propcache-0.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e2d01fd53e89cb3d71d20b8c225a8c70d84660f2d223afc7ed7851a4086afe6d", size = 77565, upload-time = "2025-10-04T21:55:52.907Z" }, - { url = "https://files.pythonhosted.org/packages/c0/48/dfb2c45e1b0d92228c9c66fa929af7316c15cbe69a7e438786aaa60c1b3c/propcache-0.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7dfa60953169d2531dd8ae306e9c27c5d4e5efe7a2ba77049e8afdaece062937", size = 44602, upload-time = "2025-10-04T21:55:54.406Z" }, - { url = "https://files.pythonhosted.org/packages/d0/d9/b15e88b4463df45a7793fb04e2b5497334f8fcc24e281c221150a0af9aff/propcache-0.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:227892597953611fce2601d49f1d1f39786a6aebc2f253c2de775407f725a3f6", size = 46168, upload-time = "2025-10-04T21:55:55.537Z" }, - { url = "https://files.pythonhosted.org/packages/40/ac/983e69cce8800251aab85858069cf9359b22222a9cda47591e03e2f24eec/propcache-0.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e0a5bc019014531308fb67d86066d235daa7551baf2e00e1ea7b00531f6ea85", size = 207997, upload-time = "2025-10-04T21:55:57.022Z" }, - { url = "https://files.pythonhosted.org/packages/ae/9c/5586a7a54e7e0b9a87fdd8ba935961f398c0e6eaecd57baaa8eca468a236/propcache-0.4.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6ebc6e2e65c31356310ddb6519420eaa6bb8c30fbd809d0919129c89dcd70f4c", size = 210948, upload-time = "2025-10-04T21:55:58.397Z" }, - { url = "https://files.pythonhosted.org/packages/5f/ba/644e367f8a86461d45bd023ace521180938e76515040550af9b44085e99a/propcache-0.4.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1927b78dd75fc31a7fdc76cc7039e39f3170cb1d0d9a271e60f0566ecb25211a", size = 217988, upload-time = "2025-10-04T21:56:00.251Z" }, - { url = "https://files.pythonhosted.org/packages/24/0e/1e21af74b4732d002b0452605bdf31d6bf990fd8b720cb44e27a97d80db5/propcache-0.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b113feeda47f908562d9a6d0e05798ad2f83d4473c0777dafa2bc7756473218", size = 204442, upload-time = "2025-10-04T21:56:01.93Z" }, - { url = "https://files.pythonhosted.org/packages/fd/30/ae2eec96995a8a760acb9a0b6c92b9815f1fc885c7d8481237ccb554eab0/propcache-0.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4596c12aa7e3bb2abf158ea8f79eb0fb4851606695d04ab846b2bb386f5690a1", size = 199371, upload-time = "2025-10-04T21:56:03.25Z" }, - { url = "https://files.pythonhosted.org/packages/45/1d/a18fac8cb04f8379ccb79cf15aac31f4167a270d1cd1111f33c0d38ce4fb/propcache-0.4.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:6d1f67dad8cc36e8abc2207a77f3f952ac80be7404177830a7af4635a34cbc16", size = 196638, upload-time = "2025-10-04T21:56:04.619Z" }, - { url = "https://files.pythonhosted.org/packages/48/45/3549a2b6f74dce6f21b2664d078bd26ceb876aae9c58f3c017cf590f0ee3/propcache-0.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e6229ad15366cd8b6d6b4185c55dd48debf9ca546f91416ba2e5921ad6e210a6", size = 203651, upload-time = "2025-10-04T21:56:06.153Z" }, - { url = "https://files.pythonhosted.org/packages/7d/f0/90ea14d518c919fc154332742a9302db3004af4f1d3df688676959733283/propcache-0.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2a4bf309d057327f1f227a22ac6baf34a66f9af75e08c613e47c4d775b06d6c7", size = 205726, upload-time = "2025-10-04T21:56:07.955Z" }, - { url = "https://files.pythonhosted.org/packages/f6/de/8efc1dbafeb42108e7af744822cdca944b990869e9da70e79efb21569d6b/propcache-0.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c2e274f3d1cbb2ddcc7a55ce3739af0f8510edc68a7f37981b2258fa1eedc833", size = 199576, upload-time = "2025-10-04T21:56:09.43Z" }, - { url = "https://files.pythonhosted.org/packages/d7/38/4d79fe3477b050398fb8d8f59301ed116d8c6ea3c4dbf09498c679103f90/propcache-0.4.0-cp313-cp313-win32.whl", hash = "sha256:f114a3e1f8034e2957d34043b7a317a8a05d97dfe8fddb36d9a2252c0117dbbc", size = 37474, upload-time = "2025-10-04T21:56:10.74Z" }, - { url = "https://files.pythonhosted.org/packages/36/9b/a283daf665a1945cff1b03d1104e7c9ee92bb7b6bbcc6518b24fcdac8bd0/propcache-0.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:9ba68c57cde9c667f6b65b98bc342dfa7240b1272ffb2c24b32172ee61b6d281", size = 40685, upload-time = "2025-10-04T21:56:11.896Z" }, - { url = "https://files.pythonhosted.org/packages/e9/f7/def8fc0b4d7a89f1628f337cb122bb9a946c5ed97760f2442b27b7fa5a69/propcache-0.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:eb77a85253174bf73e52c968b689d64be62d71e8ac33cabef4ca77b03fb4ef92", size = 37046, upload-time = "2025-10-04T21:56:13.021Z" }, - { url = "https://files.pythonhosted.org/packages/ca/6b/f6e8b36b58d17dfb6c505b9ae1163fcf7a4cf98825032fdc77bba4ab5c4a/propcache-0.4.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c0e1c218fff95a66ad9f2f83ad41a67cf4d0a3f527efe820f57bde5fda616de4", size = 81274, upload-time = "2025-10-04T21:56:14.206Z" }, - { url = "https://files.pythonhosted.org/packages/8e/c5/1fd0baa222b8faf53ba04dd4f34de33ea820b80e34f87c7960666bae5f4f/propcache-0.4.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:5710b1c01472542bb024366803812ca13e8774d21381bcfc1f7ae738eeb38acc", size = 46232, upload-time = "2025-10-04T21:56:15.337Z" }, - { url = "https://files.pythonhosted.org/packages/cb/6b/7aa5324983cab7666ed58fc32c68a0430468a18e02e3f04e7a879c002414/propcache-0.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d7f008799682e8826ce98f25e8bc43532d2cd26c187a1462499fa8d123ae054f", size = 48239, upload-time = "2025-10-04T21:56:16.768Z" }, - { url = "https://files.pythonhosted.org/packages/24/0f/58c192301c0436762ed5fed5a3edadb0ae399cb73528fb9c1b5cb8e53523/propcache-0.4.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0596d2ae99d74ca436553eb9ce11fe4163dc742fcf8724ebe07d7cb0db679bb1", size = 275804, upload-time = "2025-10-04T21:56:18.066Z" }, - { url = "https://files.pythonhosted.org/packages/f7/b9/092ee32064ebfabedae4251952787e63e551075af1a1205e8061b3ed5838/propcache-0.4.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab9c1bd95ebd1689f0e24f2946c495808777e9e8df7bb3c1dfe3e9eb7f47fe0d", size = 273996, upload-time = "2025-10-04T21:56:19.801Z" }, - { url = "https://files.pythonhosted.org/packages/43/82/becf618ed28e732f3bba3df172cd290a1afbd99f291074f747fd5bd031bb/propcache-0.4.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a8ef2ea819549ae2e8698d2ec229ae948d7272feea1cb2878289f767b6c585a4", size = 280266, upload-time = "2025-10-04T21:56:21.136Z" }, - { url = "https://files.pythonhosted.org/packages/51/be/b370930249a9332a81b5c4c550dac614b7e11b6c160080777e903d57e197/propcache-0.4.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:71a400b2f0b079438cc24f9a27f02eff24d8ef78f2943f949abc518b844ade3d", size = 263186, upload-time = "2025-10-04T21:56:22.787Z" }, - { url = "https://files.pythonhosted.org/packages/33/b6/546fd3e31770aed3aed1c01b120944c689edb510aeb7a25472edc472ce23/propcache-0.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4c2735d3305e6cecab6e53546909edf407ad3da5b9eeaf483f4cf80142bb21be", size = 260721, upload-time = "2025-10-04T21:56:24.22Z" }, - { url = "https://files.pythonhosted.org/packages/80/70/3751930d16e5984490c73ca65b80777e4b26e7a0015f2d41f31d75959a71/propcache-0.4.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:72b51340047ac43b3cf388eebd362d052632260c9f73a50882edbb66e589fd44", size = 247516, upload-time = "2025-10-04T21:56:25.577Z" }, - { url = "https://files.pythonhosted.org/packages/59/90/4bc96ce6476f67e2e6b72469f328c92b53259a0e4d1d5386d71a36e9258c/propcache-0.4.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:184c779363740d6664982ad05699f378f7694220e2041996f12b7c2a4acdcad0", size = 262675, upload-time = "2025-10-04T21:56:27.065Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d1/f16d096869c5f1c93d67fc37488c0c814add0560574f6877653a10239cde/propcache-0.4.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a60634a9de41f363923c6adfb83105d39e49f7a3058511563ed3de6748661af6", size = 263379, upload-time = "2025-10-04T21:56:28.517Z" }, - { url = "https://files.pythonhosted.org/packages/ab/2a/da5cd1bc1c6412939c457ea65bbe7e034045c395d98ff8ff880d06ec4553/propcache-0.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c9b8119244d122241a9c4566bce49bb20408a6827044155856735cf14189a7da", size = 257694, upload-time = "2025-10-04T21:56:30.051Z" }, - { url = "https://files.pythonhosted.org/packages/a5/11/938e67c07189b662a6c72551d48285a02496de885408392447c25657dd47/propcache-0.4.0-cp313-cp313t-win32.whl", hash = "sha256:515b610a364c8cdd2b72c734cc97dece85c416892ea8d5c305624ac8734e81db", size = 41321, upload-time = "2025-10-04T21:56:31.406Z" }, - { url = "https://files.pythonhosted.org/packages/f4/6e/72b11a4dcae68c728b15126cc5bc830bf275c84836da2633412b768d07e0/propcache-0.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7ea86eb32e74f9902df57e8608e8ac66f1e1e1d24d1ed2ddeb849888413b924d", size = 44846, upload-time = "2025-10-04T21:56:32.5Z" }, - { url = "https://files.pythonhosted.org/packages/94/09/0ef3c025e0621e703ef71b69e0085181a3124bcc1beef29e0ffef59ed7f4/propcache-0.4.0-cp313-cp313t-win_arm64.whl", hash = "sha256:c1443fa4bb306461a3a8a52b7de0932a2515b100ecb0ebc630cc3f87d451e0a9", size = 39689, upload-time = "2025-10-04T21:56:33.686Z" }, - { url = "https://files.pythonhosted.org/packages/60/89/7699d8e9f8c222bbef1fae26afd72d448353f164a52125d5f87dd9fec2c7/propcache-0.4.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:de8e310d24b5a61de08812dd70d5234da1458d41b059038ee7895a9e4c8cae79", size = 77977, upload-time = "2025-10-04T21:56:34.836Z" }, - { url = "https://files.pythonhosted.org/packages/77/c5/2758a498199ce46d6d500ba4391a8594df35400cc85738aa9f0c9b8366db/propcache-0.4.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:55a54de5266bc44aa274915cdf388584fa052db8748a869e5500ab5993bac3f4", size = 44715, upload-time = "2025-10-04T21:56:36.075Z" }, - { url = "https://files.pythonhosted.org/packages/0d/da/5a44e10282a28c2dd576e5e1a2c7bb8145587070ddab7375fb643f7129d7/propcache-0.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:88d50d662c917ec2c9d3858920aa7b9d5bfb74ab9c51424b775ccbe683cb1b4e", size = 46463, upload-time = "2025-10-04T21:56:37.227Z" }, - { url = "https://files.pythonhosted.org/packages/d5/5a/b2c314f655f46c10c204dc0d69e19fadfb1cc4d40ab33f403698a35c3281/propcache-0.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae3adf88a66f5863cf79394bc359da523bb27a2ed6ba9898525a6a02b723bfc5", size = 206980, upload-time = "2025-10-04T21:56:38.828Z" }, - { url = "https://files.pythonhosted.org/packages/7c/4e/f6643ec2cd5527b92c93488f9b67a170494736bb1c5460136399d709ce5a/propcache-0.4.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7f088e21d15b3abdb9047e4b7b7a0acd79bf166893ac2b34a72ab1062feb219e", size = 211385, upload-time = "2025-10-04T21:56:40.2Z" }, - { url = "https://files.pythonhosted.org/packages/71/41/362766a346c3f8d3bbeb7899e1ff40f18844e0fe37e9f6f536553cf6b6be/propcache-0.4.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a4efbaf10793fd574c76a5732c75452f19d93df6e0f758c67dd60552ebd8614b", size = 215315, upload-time = "2025-10-04T21:56:41.574Z" }, - { url = "https://files.pythonhosted.org/packages/ff/98/17385d51816d56fa6acc035d8625fbf833b6a795d7ef7fb37ea3f62db6c9/propcache-0.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:681a168d06284602d56e97f09978057aa88bcc4177352b875b3d781df4efd4cb", size = 201416, upload-time = "2025-10-04T21:56:42.947Z" }, - { url = "https://files.pythonhosted.org/packages/7a/83/801178ca1c29e217564ee507ff2a49d3f24a4dd85c9b9d681fd1d62b15f2/propcache-0.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a7f06f077fc4ef37e8a37ca6bbb491b29e29db9fb28e29cf3896aad10dbd4137", size = 197726, upload-time = "2025-10-04T21:56:44.313Z" }, - { url = "https://files.pythonhosted.org/packages/d2/38/c8743917bca92b7e5474366b6b04c7b3982deac32a0fe4b705f2e92c09bb/propcache-0.4.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:082a643479f49a6778dcd68a80262fc324b14fd8e9b1a5380331fe41adde1738", size = 192819, upload-time = "2025-10-04T21:56:45.702Z" }, - { url = "https://files.pythonhosted.org/packages/0b/74/3de3ef483e8615aaaf62026fcdcb20cbfc4535ea14871b12f72d52c1d6dc/propcache-0.4.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:26692850120241a99bb4a4eec675cd7b4fdc431144f0d15ef69f7f8599f6165f", size = 202492, upload-time = "2025-10-04T21:56:47.388Z" }, - { url = "https://files.pythonhosted.org/packages/46/86/a130dd85199d651a6986ba6bf1ce297b7bbcafc01c8e139e6ba2b8218a20/propcache-0.4.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:33ad7d37b9a386f97582f5d042cc7b8d4b3591bb384cf50866b749a17e4dba90", size = 204106, upload-time = "2025-10-04T21:56:49.139Z" }, - { url = "https://files.pythonhosted.org/packages/b2/f7/44eab58659d71d21995146c94139e63882bac280065b3a9ed10376897bcc/propcache-0.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1e7fd82d4a5b7583588f103b0771e43948532f1292105f13ee6f3b300933c4ca", size = 198043, upload-time = "2025-10-04T21:56:50.561Z" }, - { url = "https://files.pythonhosted.org/packages/96/14/df37be1bf1423d2dda201a4cdb1c5cb44048d34e31a97df227cc25b0a55c/propcache-0.4.0-cp314-cp314-win32.whl", hash = "sha256:213eb0d3bc695a70cffffe11a1c2e1c2698d89ffd8dba35a49bc44a035d45c93", size = 38036, upload-time = "2025-10-04T21:56:51.868Z" }, - { url = "https://files.pythonhosted.org/packages/99/96/9cea65d6c50224737e80c57a3f3db4ca81bc7b1b52bc73346df8c50db400/propcache-0.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:087e2d3d7613e1b59b2ffca0daabd500c1a032d189c65625ee05ea114afcad0b", size = 41156, upload-time = "2025-10-04T21:56:53.242Z" }, - { url = "https://files.pythonhosted.org/packages/52/4d/91523dcbe23cc127b097623a6ba177da51fca6b7c979082aa49745b527b7/propcache-0.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:94b0f7407d18001dbdcbb239512e753b1b36725a6e08a4983be1c948f5435f79", size = 37976, upload-time = "2025-10-04T21:56:54.351Z" }, - { url = "https://files.pythonhosted.org/packages/ec/f7/7118a944cb6cdb548c9333cf311bda120f9793ecca54b2ca4a3f7e58723e/propcache-0.4.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b730048ae8b875e2c0af1a09ca31b303fc7b5ed27652beec03fa22b29545aec9", size = 81270, upload-time = "2025-10-04T21:56:55.516Z" }, - { url = "https://files.pythonhosted.org/packages/ab/f9/04a8bc9977ea201783f3ccb04106f44697f635f70439a208852d4d08554d/propcache-0.4.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f495007ada16a4e16312b502636fafff42a9003adf1d4fb7541e0a0870bc056f", size = 46224, upload-time = "2025-10-04T21:56:56.695Z" }, - { url = "https://files.pythonhosted.org/packages/0f/3d/808b074034156f130a0047304d811a5a5df3bb0976c9adfb9383718fd888/propcache-0.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:659a0ea6d9017558ed7af00fb4028186f64d0ba9adfc70a4d2c85fcd3d026321", size = 48246, upload-time = "2025-10-04T21:56:57.926Z" }, - { url = "https://files.pythonhosted.org/packages/66/eb/e311f3a59ddc93078cb079b12699af9fd844142c4b4d382b386ee071d921/propcache-0.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d74aa60b1ec076d4d5dcde27c9a535fc0ebb12613f599681c438ca3daa68acac", size = 275562, upload-time = "2025-10-04T21:56:59.221Z" }, - { url = "https://files.pythonhosted.org/packages/f4/05/a146094d6a00bb2f2036dd2a2f4c2b2733ff9574b59ce53bd8513edfca5d/propcache-0.4.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:34000e31795bdcda9826e0e70e783847a42e3dcd0d6416c5d3cb717905ebaec0", size = 273627, upload-time = "2025-10-04T21:57:00.582Z" }, - { url = "https://files.pythonhosted.org/packages/91/95/a6d138f6e3d5f6c9b34dbd336b964a1293f2f1a79cafbe70ae3403d7cc46/propcache-0.4.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bcb5bfac5b9635e6fc520c8af6efc7a0a56f12a1fe9e9d3eb4328537e316dd6a", size = 279778, upload-time = "2025-10-04T21:57:01.944Z" }, - { url = "https://files.pythonhosted.org/packages/ac/09/19594a20da0519bfa00deef8cf35dda6c9a5b51bba947f366e85ea59b3de/propcache-0.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ea11fceb31fa95b0fa2007037f19e922e2caceb7dc6c6cac4cb56e2d291f1a2", size = 262833, upload-time = "2025-10-04T21:57:03.326Z" }, - { url = "https://files.pythonhosted.org/packages/b5/92/60d2ddc7662f7b2720d3b628ad8ce888015f4ab5c335b7b1b50183194e68/propcache-0.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:cd8684f628fe285ea5c86f88e1c30716239dc9d6ac55e7851a4b7f555b628da3", size = 260456, upload-time = "2025-10-04T21:57:05.159Z" }, - { url = "https://files.pythonhosted.org/packages/6f/e2/4c2e25c77cf43add2e05a86c4fcf51107edc4d92318e5c593bbdc2515d57/propcache-0.4.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:790286d3d542c0ef9f6d0280d1049378e5e776dcba780d169298f664c39394db", size = 247284, upload-time = "2025-10-04T21:57:06.566Z" }, - { url = "https://files.pythonhosted.org/packages/dc/3e/c273ab8edc80683ec8b15b486e95c03096ef875d99e4b0ab0a36c1e42c94/propcache-0.4.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:009093c9b5dbae114a5958e6a649f8a5d94dd6866b0f82b60395eb92c58002d4", size = 262368, upload-time = "2025-10-04T21:57:08.231Z" }, - { url = "https://files.pythonhosted.org/packages/ac/a9/3fa231f65a9f78614c5aafa9cee788d7f55c22187cc2f33e86c7c16d0262/propcache-0.4.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:728d98179e92d77096937fdfecd2c555a3d613abe56c9909165c24196a3b5012", size = 263010, upload-time = "2025-10-04T21:57:09.641Z" }, - { url = "https://files.pythonhosted.org/packages/38/a0/f4f5d368e60c9dc04d3158eaf1ca0ad899b40ac3d29c015bf62735225a6f/propcache-0.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a9725d96a81e17e48a0fe82d0c3de2f5e623d7163fec70a6c7df90753edd1bec", size = 257298, upload-time = "2025-10-04T21:57:11.125Z" }, - { url = "https://files.pythonhosted.org/packages/c7/30/f78d6758dc36a98f1cddc39b3185cefde616cc58248715b7c65495491cb1/propcache-0.4.0-cp314-cp314t-win32.whl", hash = "sha256:0964c55c95625193defeb4fd85f8f28a9a754ed012cab71127d10e3dc66b1373", size = 42484, upload-time = "2025-10-04T21:57:12.652Z" }, - { url = "https://files.pythonhosted.org/packages/4e/ad/de0640e9b56d2caa796c4266d7d1e6cc4544cc327c25b7ced5c59893b625/propcache-0.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:24403152e41abf09488d3ae9c0c3bf7ff93e2fb12b435390718f21810353db28", size = 46229, upload-time = "2025-10-04T21:57:14.034Z" }, - { url = "https://files.pythonhosted.org/packages/da/bf/5aed62dddbf2bbe62a3564677436261909c9dd63a0fa1fb6cf0629daa13c/propcache-0.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0363a696a9f24b37a04ed5e34c2e07ccbe92798c998d37729551120a1bb744c4", size = 40329, upload-time = "2025-10-04T21:57:15.198Z" }, - { url = "https://files.pythonhosted.org/packages/c7/16/794c114f6041bbe2de23eb418ef58a0f45de27224d5540f5dbb266a73d72/propcache-0.4.0-py3-none-any.whl", hash = "sha256:015b2ca2f98ea9e08ac06eecc409d5d988f78c5fd5821b2ad42bc9afcd6b1557", size = 13183, upload-time = "2025-10-04T21:57:38.054Z" }, +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/0e/934b541323035566a9af292dba85a195f7b78179114f2c6ebb24551118a9/propcache-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c2d1fa3201efaf55d730400d945b5b3ab6e672e100ba0f9a409d950ab25d7db", size = 79534, upload-time = "2025-10-08T19:46:02.083Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6b/db0d03d96726d995dc7171286c6ba9d8d14251f37433890f88368951a44e/propcache-0.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1eb2994229cc8ce7fe9b3db88f5465f5fd8651672840b2e426b88cdb1a30aac8", size = 45526, upload-time = "2025-10-08T19:46:03.884Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c3/82728404aea669e1600f304f2609cde9e665c18df5a11cdd57ed73c1dceb/propcache-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:66c1f011f45a3b33d7bcb22daed4b29c0c9e2224758b6be00686731e1b46f925", size = 47263, upload-time = "2025-10-08T19:46:05.405Z" }, + { url = "https://files.pythonhosted.org/packages/df/1b/39313ddad2bf9187a1432654c38249bab4562ef535ef07f5eb6eb04d0b1b/propcache-0.4.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9a52009f2adffe195d0b605c25ec929d26b36ef986ba85244891dee3b294df21", size = 201012, upload-time = "2025-10-08T19:46:07.165Z" }, + { url = "https://files.pythonhosted.org/packages/5b/01/f1d0b57d136f294a142acf97f4ed58c8e5b974c21e543000968357115011/propcache-0.4.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5d4e2366a9c7b837555cf02fb9be2e3167d333aff716332ef1b7c3a142ec40c5", size = 209491, upload-time = "2025-10-08T19:46:08.909Z" }, + { url = "https://files.pythonhosted.org/packages/a1/c8/038d909c61c5bb039070b3fb02ad5cccdb1dde0d714792e251cdb17c9c05/propcache-0.4.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9d2b6caef873b4f09e26ea7e33d65f42b944837563a47a94719cc3544319a0db", size = 215319, upload-time = "2025-10-08T19:46:10.7Z" }, + { url = "https://files.pythonhosted.org/packages/08/57/8c87e93142b2c1fa2408e45695205a7ba05fb5db458c0bf5c06ba0e09ea6/propcache-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b16ec437a8c8a965ecf95739448dd938b5c7f56e67ea009f4300d8df05f32b7", size = 196856, upload-time = "2025-10-08T19:46:12.003Z" }, + { url = "https://files.pythonhosted.org/packages/42/df/5615fec76aa561987a534759b3686008a288e73107faa49a8ae5795a9f7a/propcache-0.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:296f4c8ed03ca7476813fe666c9ea97869a8d7aec972618671b33a38a5182ef4", size = 193241, upload-time = "2025-10-08T19:46:13.495Z" }, + { url = "https://files.pythonhosted.org/packages/d5/21/62949eb3a7a54afe8327011c90aca7e03547787a88fb8bd9726806482fea/propcache-0.4.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:1f0978529a418ebd1f49dad413a2b68af33f85d5c5ca5c6ca2a3bed375a7ac60", size = 190552, upload-time = "2025-10-08T19:46:14.938Z" }, + { url = "https://files.pythonhosted.org/packages/30/ee/ab4d727dd70806e5b4de96a798ae7ac6e4d42516f030ee60522474b6b332/propcache-0.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fd138803047fb4c062b1c1dd95462f5209456bfab55c734458f15d11da288f8f", size = 200113, upload-time = "2025-10-08T19:46:16.695Z" }, + { url = "https://files.pythonhosted.org/packages/8a/0b/38b46208e6711b016aa8966a3ac793eee0d05c7159d8342aa27fc0bc365e/propcache-0.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8c9b3cbe4584636d72ff556d9036e0c9317fa27b3ac1f0f558e7e84d1c9c5900", size = 200778, upload-time = "2025-10-08T19:46:18.023Z" }, + { url = "https://files.pythonhosted.org/packages/cf/81/5abec54355ed344476bee711e9f04815d4b00a311ab0535599204eecc257/propcache-0.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f93243fdc5657247533273ac4f86ae106cc6445a0efacb9a1bfe982fcfefd90c", size = 193047, upload-time = "2025-10-08T19:46:19.449Z" }, + { url = "https://files.pythonhosted.org/packages/ec/b6/1f237c04e32063cb034acd5f6ef34ef3a394f75502e72703545631ab1ef6/propcache-0.4.1-cp310-cp310-win32.whl", hash = "sha256:a0ee98db9c5f80785b266eb805016e36058ac72c51a064040f2bc43b61101cdb", size = 38093, upload-time = "2025-10-08T19:46:20.643Z" }, + { url = "https://files.pythonhosted.org/packages/a6/67/354aac4e0603a15f76439caf0427781bcd6797f370377f75a642133bc954/propcache-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:1cdb7988c4e5ac7f6d175a28a9aa0c94cb6f2ebe52756a3c0cda98d2809a9e37", size = 41638, upload-time = "2025-10-08T19:46:21.935Z" }, + { url = "https://files.pythonhosted.org/packages/e0/e1/74e55b9fd1a4c209ff1a9a824bf6c8b3d1fc5a1ac3eabe23462637466785/propcache-0.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:d82ad62b19645419fe79dd63b3f9253e15b30e955c0170e5cebc350c1844e581", size = 38229, upload-time = "2025-10-08T19:46:23.368Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d4/4e2c9aaf7ac2242b9358f98dccd8f90f2605402f5afeff6c578682c2c491/propcache-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf", size = 80208, upload-time = "2025-10-08T19:46:24.597Z" }, + { url = "https://files.pythonhosted.org/packages/c2/21/d7b68e911f9c8e18e4ae43bdbc1e1e9bbd971f8866eb81608947b6f585ff/propcache-0.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5", size = 45777, upload-time = "2025-10-08T19:46:25.733Z" }, + { url = "https://files.pythonhosted.org/packages/d3/1d/11605e99ac8ea9435651ee71ab4cb4bf03f0949586246476a25aadfec54a/propcache-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e", size = 47647, upload-time = "2025-10-08T19:46:27.304Z" }, + { url = "https://files.pythonhosted.org/packages/58/1a/3c62c127a8466c9c843bccb503d40a273e5cc69838805f322e2826509e0d/propcache-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566", size = 214929, upload-time = "2025-10-08T19:46:28.62Z" }, + { url = "https://files.pythonhosted.org/packages/56/b9/8fa98f850960b367c4b8fe0592e7fc341daa7a9462e925228f10a60cf74f/propcache-0.4.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165", size = 221778, upload-time = "2025-10-08T19:46:30.358Z" }, + { url = "https://files.pythonhosted.org/packages/46/a6/0ab4f660eb59649d14b3d3d65c439421cf2f87fe5dd68591cbe3c1e78a89/propcache-0.4.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc", size = 228144, upload-time = "2025-10-08T19:46:32.607Z" }, + { url = "https://files.pythonhosted.org/packages/52/6a/57f43e054fb3d3a56ac9fc532bc684fc6169a26c75c353e65425b3e56eef/propcache-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48", size = 210030, upload-time = "2025-10-08T19:46:33.969Z" }, + { url = "https://files.pythonhosted.org/packages/40/e2/27e6feebb5f6b8408fa29f5efbb765cd54c153ac77314d27e457a3e993b7/propcache-0.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570", size = 208252, upload-time = "2025-10-08T19:46:35.309Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f8/91c27b22ccda1dbc7967f921c42825564fa5336a01ecd72eb78a9f4f53c2/propcache-0.4.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85", size = 202064, upload-time = "2025-10-08T19:46:36.993Z" }, + { url = "https://files.pythonhosted.org/packages/f2/26/7f00bd6bd1adba5aafe5f4a66390f243acab58eab24ff1a08bebb2ef9d40/propcache-0.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e", size = 212429, upload-time = "2025-10-08T19:46:38.398Z" }, + { url = "https://files.pythonhosted.org/packages/84/89/fd108ba7815c1117ddca79c228f3f8a15fc82a73bca8b142eb5de13b2785/propcache-0.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757", size = 216727, upload-time = "2025-10-08T19:46:39.732Z" }, + { url = "https://files.pythonhosted.org/packages/79/37/3ec3f7e3173e73f1d600495d8b545b53802cbf35506e5732dd8578db3724/propcache-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f", size = 205097, upload-time = "2025-10-08T19:46:41.025Z" }, + { url = "https://files.pythonhosted.org/packages/61/b0/b2631c19793f869d35f47d5a3a56fb19e9160d3c119f15ac7344fc3ccae7/propcache-0.4.1-cp311-cp311-win32.whl", hash = "sha256:f1d2f90aeec838a52f1c1a32fe9a619fefd5e411721a9117fbf82aea638fe8a1", size = 38084, upload-time = "2025-10-08T19:46:42.693Z" }, + { url = "https://files.pythonhosted.org/packages/f4/78/6cce448e2098e9f3bfc91bb877f06aa24b6ccace872e39c53b2f707c4648/propcache-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:364426a62660f3f699949ac8c621aad6977be7126c5807ce48c0aeb8e7333ea6", size = 41637, upload-time = "2025-10-08T19:46:43.778Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e9/754f180cccd7f51a39913782c74717c581b9cc8177ad0e949f4d51812383/propcache-0.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:e53f3a38d3510c11953f3e6a33f205c6d1b001129f972805ca9b42fc308bc239", size = 38064, upload-time = "2025-10-08T19:46:44.872Z" }, + { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" }, + { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload-time = "2025-10-08T19:46:50.055Z" }, + { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload-time = "2025-10-08T19:46:51.815Z" }, + { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload-time = "2025-10-08T19:46:53.208Z" }, + { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload-time = "2025-10-08T19:46:54.511Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload-time = "2025-10-08T19:46:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload-time = "2025-10-08T19:46:57.595Z" }, + { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload-time = "2025-10-08T19:46:59.067Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload-time = "2025-10-08T19:47:00.544Z" }, + { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload-time = "2025-10-08T19:47:01.968Z" }, + { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload-time = "2025-10-08T19:47:03.503Z" }, + { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload-time = "2025-10-08T19:47:04.973Z" }, + { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload-time = "2025-10-08T19:47:06.077Z" }, + { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload-time = "2025-10-08T19:47:07.648Z" }, + { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload-time = "2025-10-08T19:47:08.851Z" }, + { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload-time = "2025-10-08T19:47:09.982Z" }, + { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload-time = "2025-10-08T19:47:11.319Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload-time = "2025-10-08T19:47:13.146Z" }, + { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload-time = "2025-10-08T19:47:14.913Z" }, + { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload-time = "2025-10-08T19:47:16.277Z" }, + { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload-time = "2025-10-08T19:47:17.962Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload-time = "2025-10-08T19:47:19.355Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload-time = "2025-10-08T19:47:21.338Z" }, + { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload-time = "2025-10-08T19:47:23.059Z" }, + { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload-time = "2025-10-08T19:47:24.445Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586, upload-time = "2025-10-08T19:47:25.736Z" }, + { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790, upload-time = "2025-10-08T19:47:26.847Z" }, + { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158, upload-time = "2025-10-08T19:47:27.961Z" }, + { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload-time = "2025-10-08T19:47:29.445Z" }, + { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload-time = "2025-10-08T19:47:30.579Z" }, + { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload-time = "2025-10-08T19:47:31.79Z" }, + { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload-time = "2025-10-08T19:47:33.481Z" }, + { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload-time = "2025-10-08T19:47:34.906Z" }, + { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload-time = "2025-10-08T19:47:36.338Z" }, + { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload-time = "2025-10-08T19:47:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload-time = "2025-10-08T19:47:39.659Z" }, + { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload-time = "2025-10-08T19:47:41.084Z" }, + { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload-time = "2025-10-08T19:47:42.51Z" }, + { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload-time = "2025-10-08T19:47:43.927Z" }, + { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload-time = "2025-10-08T19:47:45.448Z" }, + { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396, upload-time = "2025-10-08T19:47:47.202Z" }, + { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897, upload-time = "2025-10-08T19:47:48.336Z" }, + { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789, upload-time = "2025-10-08T19:47:49.876Z" }, + { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152, upload-time = "2025-10-08T19:47:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869, upload-time = "2025-10-08T19:47:52.594Z" }, + { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596, upload-time = "2025-10-08T19:47:54.073Z" }, + { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981, upload-time = "2025-10-08T19:47:55.715Z" }, + { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490, upload-time = "2025-10-08T19:47:57.499Z" }, + { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371, upload-time = "2025-10-08T19:47:59.317Z" }, + { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424, upload-time = "2025-10-08T19:48:00.67Z" }, + { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566, upload-time = "2025-10-08T19:48:02.604Z" }, + { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130, upload-time = "2025-10-08T19:48:04.499Z" }, + { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625, upload-time = "2025-10-08T19:48:06.213Z" }, + { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209, upload-time = "2025-10-08T19:48:08.432Z" }, + { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797, upload-time = "2025-10-08T19:48:09.968Z" }, + { url = "https://files.pythonhosted.org/packages/ee/36/66367de3575db1d2d3f3d177432bd14ee577a39d3f5d1b3d5df8afe3b6e2/propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f", size = 38140, upload-time = "2025-10-08T19:48:11.232Z" }, + { url = "https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153", size = 41257, upload-time = "2025-10-08T19:48:12.707Z" }, + { url = "https://files.pythonhosted.org/packages/34/5e/63bd5896c3fec12edcbd6f12508d4890d23c265df28c74b175e1ef9f4f3b/propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992", size = 38097, upload-time = "2025-10-08T19:48:13.923Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455, upload-time = "2025-10-08T19:48:15.16Z" }, + { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372, upload-time = "2025-10-08T19:48:16.424Z" }, + { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411, upload-time = "2025-10-08T19:48:17.577Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712, upload-time = "2025-10-08T19:48:18.901Z" }, + { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557, upload-time = "2025-10-08T19:48:20.762Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015, upload-time = "2025-10-08T19:48:22.592Z" }, + { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880, upload-time = "2025-10-08T19:48:23.947Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938, upload-time = "2025-10-08T19:48:25.656Z" }, + { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641, upload-time = "2025-10-08T19:48:27.207Z" }, + { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510, upload-time = "2025-10-08T19:48:28.65Z" }, + { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161, upload-time = "2025-10-08T19:48:30.133Z" }, + { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393, upload-time = "2025-10-08T19:48:31.567Z" }, + { url = "https://files.pythonhosted.org/packages/08/02/87b25304249a35c0915d236575bc3574a323f60b47939a2262b77632a3ee/propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85", size = 42546, upload-time = "2025-10-08T19:48:32.872Z" }, + { url = "https://files.pythonhosted.org/packages/cb/ef/3c6ecf8b317aa982f309835e8f96987466123c6e596646d4e6a1dfcd080f/propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1", size = 46259, upload-time = "2025-10-08T19:48:34.226Z" }, + { url = "https://files.pythonhosted.org/packages/c4/2d/346e946d4951f37eca1e4f55be0f0174c52cd70720f84029b02f296f4a38/propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9", size = 40428, upload-time = "2025-10-08T19:48:35.441Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] [[package]] @@ -2914,15 +2914,15 @@ wheels = [ [[package]] name = "rich" -version = "14.1.0" +version = "14.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fe/75/af448d8e52bf1d8fa6a9d089ca6c07ff4453d86c65c145d0a300bb073b9b/rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8", size = 224441, upload-time = "2025-07-25T07:32:58.125Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/30/3c4d035596d3cf444529e0b2953ad0466f6049528a879d27534700580395/rich-14.1.0-py3-none-any.whl", hash = "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f", size = 243368, upload-time = "2025-07-25T07:32:56.73Z" }, + { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, ] [[package]] @@ -3029,52 +3029,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4f/bd/de8d508070629b6d84a30d01d57e4a65c69aa7f5abe7560b8fad3b50ea59/termcolor-3.1.0-py3-none-any.whl", hash = "sha256:591dd26b5c2ce03b9e43f391264626557873ce1d379019786f99b0c2bee140aa", size = 7684, upload-time = "2025-04-30T11:37:52.382Z" }, ] -[[package]] -name = "toml" -version = "0.10.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" }, -] - [[package]] name = "tomli" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175, upload-time = "2024-11-27T22:38:36.873Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077, upload-time = "2024-11-27T22:37:54.956Z" }, - { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429, upload-time = "2024-11-27T22:37:56.698Z" }, - { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067, upload-time = "2024-11-27T22:37:57.63Z" }, - { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030, upload-time = "2024-11-27T22:37:59.344Z" }, - { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898, upload-time = "2024-11-27T22:38:00.429Z" }, - { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894, upload-time = "2024-11-27T22:38:02.094Z" }, - { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319, upload-time = "2024-11-27T22:38:03.206Z" }, - { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273, upload-time = "2024-11-27T22:38:04.217Z" }, - { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310, upload-time = "2024-11-27T22:38:05.908Z" }, - { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309, upload-time = "2024-11-27T22:38:06.812Z" }, - { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762, upload-time = "2024-11-27T22:38:07.731Z" }, - { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453, upload-time = "2024-11-27T22:38:09.384Z" }, - { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486, upload-time = "2024-11-27T22:38:10.329Z" }, - { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349, upload-time = "2024-11-27T22:38:11.443Z" }, - { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159, upload-time = "2024-11-27T22:38:13.099Z" }, - { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243, upload-time = "2024-11-27T22:38:14.766Z" }, - { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645, upload-time = "2024-11-27T22:38:15.843Z" }, - { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584, upload-time = "2024-11-27T22:38:17.645Z" }, - { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875, upload-time = "2024-11-27T22:38:19.159Z" }, - { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418, upload-time = "2024-11-27T22:38:20.064Z" }, - { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708, upload-time = "2024-11-27T22:38:21.659Z" }, - { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582, upload-time = "2024-11-27T22:38:22.693Z" }, - { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543, upload-time = "2024-11-27T22:38:24.367Z" }, - { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691, upload-time = "2024-11-27T22:38:26.081Z" }, - { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170, upload-time = "2024-11-27T22:38:27.921Z" }, - { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530, upload-time = "2024-11-27T22:38:29.591Z" }, - { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666, upload-time = "2024-11-27T22:38:30.639Z" }, - { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954, upload-time = "2024-11-27T22:38:31.702Z" }, - { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724, upload-time = "2024-11-27T22:38:32.837Z" }, - { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383, upload-time = "2024-11-27T22:38:34.455Z" }, - { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392, upload-time = "2025-10-08T22:01:47.119Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236, upload-time = "2025-10-08T22:01:00.137Z" }, + { url = "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084, upload-time = "2025-10-08T22:01:01.63Z" }, + { url = "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832, upload-time = "2025-10-08T22:01:02.543Z" }, + { url = "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052, upload-time = "2025-10-08T22:01:03.836Z" }, + { url = "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555, upload-time = "2025-10-08T22:01:04.834Z" }, + { url = "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128, upload-time = "2025-10-08T22:01:05.84Z" }, + { url = "https://files.pythonhosted.org/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445, upload-time = "2025-10-08T22:01:06.896Z" }, + { url = "https://files.pythonhosted.org/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165, upload-time = "2025-10-08T22:01:08.107Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891, upload-time = "2025-10-08T22:01:09.082Z" }, + { url = "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796, upload-time = "2025-10-08T22:01:10.266Z" }, + { url = "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121, upload-time = "2025-10-08T22:01:11.332Z" }, + { url = "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070, upload-time = "2025-10-08T22:01:12.498Z" }, + { url = "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859, upload-time = "2025-10-08T22:01:13.551Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296, upload-time = "2025-10-08T22:01:14.614Z" }, + { url = "https://files.pythonhosted.org/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124, upload-time = "2025-10-08T22:01:15.629Z" }, + { url = "https://files.pythonhosted.org/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698, upload-time = "2025-10-08T22:01:16.51Z" }, + { url = "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", size = 154819, upload-time = "2025-10-08T22:01:17.964Z" }, + { url = "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", size = 148766, upload-time = "2025-10-08T22:01:18.959Z" }, + { url = "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", size = 240771, upload-time = "2025-10-08T22:01:20.106Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", size = 248586, upload-time = "2025-10-08T22:01:21.164Z" }, + { url = "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", size = 244792, upload-time = "2025-10-08T22:01:22.417Z" }, + { url = "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", size = 248909, upload-time = "2025-10-08T22:01:23.859Z" }, + { url = "https://files.pythonhosted.org/packages/f8/84/ef50c51b5a9472e7265ce1ffc7f24cd4023d289e109f669bdb1553f6a7c2/tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606", size = 96946, upload-time = "2025-10-08T22:01:24.893Z" }, + { url = "https://files.pythonhosted.org/packages/b2/b7/718cd1da0884f281f95ccfa3a6cc572d30053cba64603f79d431d3c9b61b/tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999", size = 107705, upload-time = "2025-10-08T22:01:26.153Z" }, + { url = "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", size = 154244, upload-time = "2025-10-08T22:01:27.06Z" }, + { url = "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", size = 148637, upload-time = "2025-10-08T22:01:28.059Z" }, + { url = "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", size = 241925, upload-time = "2025-10-08T22:01:29.066Z" }, + { url = "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", size = 249045, upload-time = "2025-10-08T22:01:31.98Z" }, + { url = "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", size = 245835, upload-time = "2025-10-08T22:01:32.989Z" }, + { url = "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", size = 253109, upload-time = "2025-10-08T22:01:34.052Z" }, + { url = "https://files.pythonhosted.org/packages/22/0c/b4da635000a71b5f80130937eeac12e686eefb376b8dee113b4a582bba42/tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463", size = 97930, upload-time = "2025-10-08T22:01:35.082Z" }, + { url = "https://files.pythonhosted.org/packages/b9/74/cb1abc870a418ae99cd5c9547d6bce30701a954e0e721821df483ef7223c/tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8", size = 107964, upload-time = "2025-10-08T22:01:36.057Z" }, + { url = "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", size = 163065, upload-time = "2025-10-08T22:01:37.27Z" }, + { url = "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", size = 159088, upload-time = "2025-10-08T22:01:38.235Z" }, + { url = "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", size = 268193, upload-time = "2025-10-08T22:01:39.712Z" }, + { url = "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", size = 275488, upload-time = "2025-10-08T22:01:40.773Z" }, + { url = "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", size = 272669, upload-time = "2025-10-08T22:01:41.824Z" }, + { url = "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", size = 279709, upload-time = "2025-10-08T22:01:43.177Z" }, + { url = "https://files.pythonhosted.org/packages/7e/46/cc36c679f09f27ded940281c38607716c86cf8ba4a518d524e349c8b4874/tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0", size = 107563, upload-time = "2025-10-08T22:01:44.233Z" }, + { url = "https://files.pythonhosted.org/packages/84/ff/426ca8683cf7b753614480484f6437f568fd2fda2edbdf57a2d3d8b27a0b/tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba", size = 119756, upload-time = "2025-10-08T22:01:45.234Z" }, + { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" }, ] [[package]] @@ -3138,15 +3139,15 @@ bedrock-runtime = [ [[package]] name = "types-aiobotocore" -version = "2.24.3" +version = "2.25.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore-stubs" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d4/ec/bdd9468061247fe48b5158ece8c39ed6ff21336a55bc49529e3980063d76/types_aiobotocore-2.24.3.tar.gz", hash = "sha256:b2b43b2e3102c2c4d4c3bca4a7ec97f89856cdde000355512e019e0dbdcd4a36", size = 86847, upload-time = "2025-10-08T01:43:02.483Z" } +sdist = { url = "https://files.pythonhosted.org/packages/51/0e/ce075f02b7b99cf06607c06d85f731cf70d22a628a4c05b14f8fdbd7a852/types_aiobotocore-2.25.0.tar.gz", hash = "sha256:7e5e96568935d5255095b5f8aaedc0c1b265770a260a2ab6ed7d4c7ea7fe8228", size = 86814, upload-time = "2025-10-11T01:37:34.424Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/23/ab/95b02273a34ea6a359d6488daff3061fa8915fb021d18ab0d61620dcfc09/types_aiobotocore-2.24.3-py3-none-any.whl", hash = "sha256:313250d1dfc8392248a0083eb158f39616fe7e6593ceca7d670ec6da5c02e9a8", size = 54296, upload-time = "2025-10-08T01:42:58.977Z" }, + { url = "https://files.pythonhosted.org/packages/4e/fb/1b78fc895712e48d9a3b94aeee9aed31eee5682a447844f8873f036186fb/types_aiobotocore-2.25.0-py3-none-any.whl", hash = "sha256:7a9efa7e8240774546b95ae0db3b6f7cbf9f05c89db317612ec1d678649a88ff", size = 54291, upload-time = "2025-10-11T01:37:29.108Z" }, ] [[package]] @@ -3175,20 +3176,20 @@ wheels = [ [[package]] name = "types-aiofiles" -version = "24.1.0.20250822" +version = "25.1.0.20251011" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/19/48/c64471adac9206cc844afb33ed311ac5a65d2f59df3d861e0f2d0cad7414/types_aiofiles-24.1.0.20250822.tar.gz", hash = "sha256:9ab90d8e0c307fe97a7cf09338301e3f01a163e39f3b529ace82466355c84a7b", size = 14484, upload-time = "2025-08-22T03:02:23.039Z" } +sdist = { url = "https://files.pythonhosted.org/packages/84/6c/6d23908a8217e36704aa9c79d99a620f2fdd388b66a4b7f72fbc6b6ff6c6/types_aiofiles-25.1.0.20251011.tar.gz", hash = "sha256:1c2b8ab260cb3cd40c15f9d10efdc05a6e1e6b02899304d80dfa0410e028d3ff", size = 14535, upload-time = "2025-10-11T02:44:51.237Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/8e/5e6d2215e1d8f7c2a94c6e9d0059ae8109ce0f5681956d11bb0a228cef04/types_aiofiles-24.1.0.20250822-py3-none-any.whl", hash = "sha256:0ec8f8909e1a85a5a79aed0573af7901f53120dd2a29771dd0b3ef48e12328b0", size = 14322, upload-time = "2025-08-22T03:02:21.918Z" }, + { url = "https://files.pythonhosted.org/packages/71/0f/76917bab27e270bb6c32addd5968d69e558e5b6f7fb4ac4cbfa282996a96/types_aiofiles-25.1.0.20251011-py3-none-any.whl", hash = "sha256:8ff8de7f9d42739d8f0dadcceeb781ce27cd8d8c4152d4a7c52f6b20edb8149c", size = 14338, upload-time = "2025-10-11T02:44:50.054Z" }, ] [[package]] name = "types-awscrt" -version = "0.27.6" +version = "0.28.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/56/ce/5d84526a39f44c420ce61b16654193f8437d74b54f21597ea2ac65d89954/types_awscrt-0.27.6.tar.gz", hash = "sha256:9d3f1865a93b8b2c32f137514ac88cb048b5bc438739945ba19d972698995bfb", size = 16937, upload-time = "2025-08-13T01:54:54.659Z" } +sdist = { url = "https://files.pythonhosted.org/packages/60/19/a3a6377c9e2e389c1421c033a1830c29cac08f2e1e05a082ea84eb22c75f/types_awscrt-0.28.1.tar.gz", hash = "sha256:66d77ec283e1dc907526a44511a12624118723a396c36d3f3dd9855cb614ce14", size = 17410, upload-time = "2025-10-11T21:55:07.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/af/e3d20e3e81d235b3964846adf46a334645a8a9b25a0d3d472743eb079552/types_awscrt-0.27.6-py3-none-any.whl", hash = "sha256:18aced46da00a57f02eb97637a32e5894dc5aa3dc6a905ba3e5ed85b9f3c526b", size = 39626, upload-time = "2025-08-13T01:54:53.454Z" }, + { url = "https://files.pythonhosted.org/packages/ea/c7/0266b797d19b82aebe0e177efe35de7aabdc192bc1605ce3309331f0a505/types_awscrt-0.28.1-py3-none-any.whl", hash = "sha256:d88f43ef779f90b841ba99badb72fe153077225a4e426ae79e943184827b4443", size = 41851, upload-time = "2025-10-11T21:55:06.235Z" }, ] [[package]] @@ -3275,11 +3276,11 @@ wheels = [ [[package]] name = "types-s3transfer" -version = "0.13.1" +version = "0.14.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a5/c5/23946fac96c9dd5815ec97afd1c8ad6d22efa76c04a79a4823f2f67692a5/types_s3transfer-0.13.1.tar.gz", hash = "sha256:ce488d79fdd7d3b9d39071939121eca814ec65de3aa36bdce1f9189c0a61cc80", size = 14181, upload-time = "2025-08-31T16:57:06.93Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/9b/8913198b7fc700acc1dcb84827137bb2922052e43dde0f4fb0ed2dc6f118/types_s3transfer-0.14.0.tar.gz", hash = "sha256:17f800a87c7eafab0434e9d87452c809c290ae906c2024c24261c564479e9c95", size = 14218, upload-time = "2025-10-11T21:11:27.892Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/dc/b3f9b5c93eed6ffe768f4972661250584d5e4f248b548029026964373bcd/types_s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:4ff730e464a3fd3785b5541f0f555c1bd02ad408cf82b6b7a95429f6b0d26b4a", size = 19617, upload-time = "2025-08-31T16:57:05.73Z" }, + { url = "https://files.pythonhosted.org/packages/92/c3/4dfb2e87c15ca582b7d956dfb7e549de1d005c758eb9a305e934e1b83fda/types_s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:108134854069a38b048e9b710b9b35904d22a9d0f37e4e1889c2e6b58e5b3253", size = 19697, upload-time = "2025-10-11T21:11:26.749Z" }, ] [[package]] From 07c95a99182f83eb8aa59f219d1f442078309a2b Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 12:05:12 +0200 Subject: [PATCH 024/115] Cleanup unused dependencies --- pyproject.toml | 3 --- uv.lock | 48 ------------------------------------------------ 2 files changed, 51 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8e1296a3c..5bc984b96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,13 +82,10 @@ dev = [ "ruff>=0.6.8", "types-aioboto3[bedrock,bedrock-runtime]>=13.4.0", "types-aiofiles>=24.1.0.20240626", - "types-beautifulsoup4>=4.12.0.20240907", "types-markdown>=3.6.0.20240316", "types-networkx>=3.3.0.20241020", "types-openpyxl>=3.1.5.20250306", - "types-requests>=2.32.0.2024091", "types-PyYAML>=6.0.12.20250326", - "types-toml>=0.10.8.20240310", ] [project.scripts] diff --git a/uv.lock b/uv.lock index 6c92aea93..61acfe018 100644 --- a/uv.lock +++ b/uv.lock @@ -2195,13 +2195,10 @@ dev = [ { name = "ruff" }, { name = "types-aioboto3", extra = ["bedrock", "bedrock-runtime"] }, { name = "types-aiofiles" }, - { name = "types-beautifulsoup4" }, { name = "types-markdown" }, { name = "types-networkx" }, { name = "types-openpyxl" }, { name = "types-pyyaml" }, - { name = "types-requests" }, - { name = "types-toml" }, ] docs = [ { name = "mkdocs" }, @@ -2276,13 +2273,10 @@ requires-dist = [ { name = "typer", specifier = ">=0.16.0" }, { name = "types-aioboto3", extras = ["bedrock", "bedrock-runtime"], marker = "extra == 'dev'", specifier = ">=13.4.0" }, { name = "types-aiofiles", marker = "extra == 'dev'", specifier = ">=24.1.0.20240626" }, - { name = "types-beautifulsoup4", marker = "extra == 'dev'", specifier = ">=4.12.0.20240907" }, { name = "types-markdown", marker = "extra == 'dev'", specifier = ">=3.6.0.20240316" }, { name = "types-networkx", marker = "extra == 'dev'", specifier = ">=3.3.0.20241020" }, { name = "types-openpyxl", marker = "extra == 'dev'", specifier = ">=3.1.5.20250306" }, { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12.20250326" }, - { name = "types-requests", marker = "extra == 'dev'", specifier = ">=2.32.0.2024091" }, - { name = "types-toml", marker = "extra == 'dev'", specifier = ">=0.10.8.20240310" }, { name = "typing-extensions", specifier = ">=4.13.2" }, { name = "yattag", specifier = ">=1.15.2" }, ] @@ -3192,27 +3186,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/c7/0266b797d19b82aebe0e177efe35de7aabdc192bc1605ce3309331f0a505/types_awscrt-0.28.1-py3-none-any.whl", hash = "sha256:d88f43ef779f90b841ba99badb72fe153077225a4e426ae79e943184827b4443", size = 41851, upload-time = "2025-10-11T21:55:06.235Z" }, ] -[[package]] -name = "types-beautifulsoup4" -version = "4.12.0.20250516" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "types-html5lib" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6d/d1/32b410f6d65eda94d3dfb0b3d0ca151f12cb1dc4cef731dcf7cbfd8716ff/types_beautifulsoup4-4.12.0.20250516.tar.gz", hash = "sha256:aa19dd73b33b70d6296adf92da8ab8a0c945c507e6fb7d5db553415cc77b417e", size = 16628, upload-time = "2025-05-16T03:09:09.93Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/79/d84de200a80085b32f12c5820d4fd0addcbe7ba6dce8c1c9d8605e833c8e/types_beautifulsoup4-4.12.0.20250516-py3-none-any.whl", hash = "sha256:5923399d4a1ba9cc8f0096fe334cc732e130269541d66261bb42ab039c0376ee", size = 16879, upload-time = "2025-05-16T03:09:09.051Z" }, -] - -[[package]] -name = "types-html5lib" -version = "1.1.11.20250917" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/68/4b/a970718e8bd9324ee8fb8eaf02ff069f6d03c20d4523bb4232892ecc3d06/types_html5lib-1.1.11.20250917.tar.gz", hash = "sha256:7b52743377f33f9b4fd7385afbd2d457b8864ee51f90ff2a795ad9e8c053373a", size = 16868, upload-time = "2025-09-17T02:47:41.18Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/78/8a/da91a9c64dcb5e69beb567519857411996d8ecae9f6f128bcef8260e7a8d/types_html5lib-1.1.11.20250917-py3-none-any.whl", hash = "sha256:b294fd06d60da205daeb2f615485ca4d475088d2eff1009cf427f4a80fcd5346", size = 22908, upload-time = "2025-09-17T02:47:40.39Z" }, -] - [[package]] name = "types-markdown" version = "3.9.0.20250906" @@ -3262,18 +3235,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, ] -[[package]] -name = "types-requests" -version = "2.32.4.20250913" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/36/27/489922f4505975b11de2b5ad07b4fe1dca0bca9be81a703f26c5f3acfce5/types_requests-2.32.4.20250913.tar.gz", hash = "sha256:abd6d4f9ce3a9383f269775a9835a4c24e5cd6b9f647d64f88aa4613c33def5d", size = 23113, upload-time = "2025-09-13T02:40:02.309Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/20/9a227ea57c1285986c4cf78400d0a91615d25b24e257fd9e2969606bdfae/types_requests-2.32.4.20250913-py3-none-any.whl", hash = "sha256:78c9c1fffebbe0fa487a418e0fa5252017e9c60d1a2da394077f1780f655d7e1", size = 20658, upload-time = "2025-09-13T02:40:01.115Z" }, -] - [[package]] name = "types-s3transfer" version = "0.14.0" @@ -3283,15 +3244,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/92/c3/4dfb2e87c15ca582b7d956dfb7e549de1d005c758eb9a305e934e1b83fda/types_s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:108134854069a38b048e9b710b9b35904d22a9d0f37e4e1889c2e6b58e5b3253", size = 19697, upload-time = "2025-10-11T21:11:26.749Z" }, ] -[[package]] -name = "types-toml" -version = "0.10.8.20240310" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/86/47/3e4c75042792bff8e90d7991aa5c51812cc668828cc6cce711e97f63a607/types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331", size = 4392, upload-time = "2024-03-10T02:18:37.518Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/da/a2/d32ab58c0b216912638b140ab2170ee4b8644067c293b170e19fba340ccc/types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d", size = 4777, upload-time = "2024-03-10T02:18:36.568Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0" From 1d065612480f503f5f43f0ae155acc124941c46e Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 12:07:05 +0200 Subject: [PATCH 025/115] Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65bd866c1..49db9a7f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -84,6 +84,7 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Added `MissingDependencyError` exception for missing optional dependencies ### Changed + - Replaced package `toml` by `tomli` which is more modern and faster - Updated Gemini 2.0 model from `gemini-2.0-flash-exp` to `gemini-2.0-flash` with new pricing (input: $0.10, output: $0.40 per million tokens) - Updated Gemini 2.5 Series comment from '(when available)' to stable release - Updated `base-claude` from `claude-4-sonnet` to `claude-4.5-sonnet` across all presets From 608b285da779a2800b04c3780b07ae90a9d994e9 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 12:17:27 +0200 Subject: [PATCH 026/115] Cleanup --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49db9a7f0..7e9d3300d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - for more control, instead of providing a string for the `template` field, you can also use a nested `template` section with `template`, `category` and `templating_style` fields - Renamed **PipeOCR** to **PipeExtract** - - this is to account for various text extraction techniques from images and docs, including but not only OCR; e.g. we now have integrated the `pypdfium2` package which can extract text and images from PDF, when it's actually real text (not an image), and soon we'll add support for other document extraction models such as IBM's `docling` and Microsoft's `MarkItDown` + - this is to account for various text extraction techniques from images and docs, including but not only OCR; e.g. we now have integrated the `pypdfium2` package which can extract text and images from PDF, when it's actually real text (not an image), and soon we'll add support for other document extraction models solutions - removed obligation to name your document input `ocr_input`, it can now be named whatever you want as long as it's a single input and it's either an `Image` or a `PDF` or some concept refining PDF or Image - renamed `ocr_page_contents_from_pdf` to `extract_page_contents_from_pdf` - renamed `ocr_page_contents_and_views_from_pdf` to `extract_page_contents_and_views_from_pdf` From 26367290c7b6f597b2001408df199ca14972647e Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 12:37:47 +0200 Subject: [PATCH 027/115] Support having a space in dir path --- Makefile | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index a9c974418..2f9a28a83 100644 --- a/Makefile +++ b/Makefile @@ -7,14 +7,15 @@ PROJECT_NAME := $(shell grep '^name = ' pyproject.toml | sed -E 's/name = "(.*)" # The "?" is used to make the variable optional, so that it can be overridden by the user. PYTHON_VERSION ?= 3.11 -VENV_PYTHON := $(VIRTUAL_ENV)/bin/python -VENV_PYTEST := $(VIRTUAL_ENV)/bin/pytest -VENV_RUFF := $(VIRTUAL_ENV)/bin/ruff -VENV_PYRIGHT := $(VIRTUAL_ENV)/bin/pyright -VENV_MYPY := $(VIRTUAL_ENV)/bin/mypy -VENV_PIPELEX := $(VIRTUAL_ENV)/bin/pipelex -VENV_MKDOCS := $(VIRTUAL_ENV)/bin/mkdocs -VENV_PYLINT := $(VIRTUAL_ENV)/bin/pylint +# Note: VENV_* variables include quotes to handle paths with spaces (e.g., "My Projects/pipelex") +VENV_PYTHON := "$(VIRTUAL_ENV)/bin/python" +VENV_PYTEST := "$(VIRTUAL_ENV)/bin/pytest" +VENV_RUFF := "$(VIRTUAL_ENV)/bin/ruff" +VENV_PYRIGHT := "$(VIRTUAL_ENV)/bin/pyright" +VENV_MYPY := "$(VIRTUAL_ENV)/bin/mypy" +VENV_PIPELEX := "$(VIRTUAL_ENV)/bin/pipelex" +VENV_MKDOCS := "$(VIRTUAL_ENV)/bin/mkdocs" +VENV_PYLINT := "$(VIRTUAL_ENV)/bin/pylint" UV_MIN_VERSION = $(shell grep -m1 'required-version' pyproject.toml | sed -E 's/.*= *"([^<>=, ]+).*/\1/') @@ -132,17 +133,17 @@ check-uv: env: check-uv $(call PRINT_TITLE,"Creating virtual environment") - @if [ ! -d $(VIRTUAL_ENV) ]; then \ + @if [ ! -d "$(VIRTUAL_ENV)" ]; then \ echo "Creating Python virtual env in \`${VIRTUAL_ENV}\`"; \ - uv venv $(VIRTUAL_ENV) --python $(PYTHON_VERSION); \ + uv venv "$(VIRTUAL_ENV)" --python $(PYTHON_VERSION); \ else \ echo "Python virtual env already exists in \`${VIRTUAL_ENV}\`"; \ fi - @echo "Using Python: $$($(VENV_PYTHON) --version) from $$(which $$(readlink -f $(VENV_PYTHON)))" + @echo "Using Python: $$($(VENV_PYTHON) --version) from $$(readlink $(VENV_PYTHON) 2>/dev/null || echo $(VENV_PYTHON))" install: env $(call PRINT_TITLE,"Installing dependencies") - @. $(VIRTUAL_ENV)/bin/activate && \ + @. "$(VIRTUAL_ENV)/bin/activate" && \ uv sync --all-extras && \ echo "Installed Pipelex dependencies in ${VIRTUAL_ENV} with all extras."; @@ -406,7 +407,7 @@ merge-check-ruff-lint: env check-unused-imports merge-check-pyright: env $(call PRINT_TITLE,"Typechecking with pyright") - $(VENV_PYRIGHT) --pythonpath $(VIRTUAL_ENV)/bin/python3 + $(VENV_PYRIGHT) --pythonpath "$(VIRTUAL_ENV)/bin/python3" merge-check-mypy: env $(call PRINT_TITLE,"Typechecking with mypy") From f76bc65077fb4792ebe60c8d51892beabfc9d9c6 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 12:41:49 +0200 Subject: [PATCH 028/115] improve merge-check-pyright --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2bfb41b38..41c953a9b 100644 --- a/Makefile +++ b/Makefile @@ -426,7 +426,7 @@ merge-check-ruff-lint: env check-unused-imports merge-check-pyright: env $(call PRINT_TITLE,"Typechecking with pyright") - $(VENV_PYRIGHT) --pythonpath "$(VIRTUAL_ENV)/bin/python3" + $(VENV_PYRIGHT) --pythonpath $(VENV_PYTHON) merge-check-mypy: env $(call PRINT_TITLE,"Typechecking with mypy") From 5c1ba5573eba01115ae54ce394c62395a133431e Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 13:08:29 +0200 Subject: [PATCH 029/115] Improve error messages --- pipelex/cogt/model_backends/backend_library.py | 2 +- pipelex/pipelex.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pipelex/cogt/model_backends/backend_library.py b/pipelex/cogt/model_backends/backend_library.py index 7e2482a4e..007dbe2c8 100644 --- a/pipelex/cogt/model_backends/backend_library.py +++ b/pipelex/cogt/model_backends/backend_library.py @@ -68,7 +68,7 @@ def load(self): ) from var_fallback_pattern_exc except VarNotFoundError as var_not_found_exc: msg = ( - f"Variable substitution failed due to a variable not found error in file '{backends_library_path}':" + f"Variable substitution failed due to a 'variable not found' error in file '{backends_library_path}':" f"\n{var_not_found_exc}\nRun mode: '{runtime_manager.run_mode}'" ) raise InferenceBackendCredentialsError( diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index bb0135bc9..ebbfd0ff8 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -227,12 +227,12 @@ def setup( error_msg: str if secrets_provider: error_msg = ( - f"Could not get credentials for inference backend {backend_name}:\n{credentials_exc}," + f"Could not get credentials for inference backend '{backend_name}':\n{credentials_exc}," f"\ncheck that secret '{var_name}' is available from your secrets provider." ) else: error_msg = ( - f"Could not get credentials for inference backend {backend_name}:\n{credentials_exc},\n" + f"Could not get credentials for inference backend '{backend_name}':\n{credentials_exc},\n" f"you need to add '{var_name}' to your environment variables or to your .env file." ) if credentials_exc.backend_name == "pipelex_inference": @@ -240,7 +240,7 @@ def setup( "\nYou can check the project's README about getting a Pipelex Inference API key,\n\n" "or you can bring your own 'OPENAI_API_KEY', " "'AZURE_OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'MISTRAL_API_KEY' etc.\n" - "--> choose which inference backends to enable in .pipelex/inference/backends.toml\n" + "--> choose which inference backends to enable in '.pipelex/inference/backends.toml'\n" ) raise PipelexSetupError(error_msg) from credentials_exc self.pipelex_hub.set_content_generator(content_generator or ContentGenerator()) From 59016fd40d00b6eb29714bb2dfcc6a9288d410e5 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 14:09:29 +0200 Subject: [PATCH 030/115] Completed MIGRATION.md --- MIGRATION.md | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 153 insertions(+), 3 deletions(-) diff --git a/MIGRATION.md b/MIGRATION.md index 19017f3f1..3cd1e392b 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -459,7 +459,116 @@ make check make tp ``` -## 10. Common Issues +## 10. Python API Changes for Client Projects + +These changes affect Python code that imports from or uses pipelex. + +### Renamed Base Library Pipes + +**Find:** `ocr_page_contents_from_pdf` +**Replace with:** `extract_page_contents_from_pdf` + +**Find:** `ocr_page_contents_and_views_from_pdf` +**Replace with:** `extract_page_contents_and_views_from_pdf` + +**Before:** +```python +pipe_output = await execute_pipeline( + pipe_code="ocr_page_contents_from_pdf", + input_memory={ + "ocr_input": PDFContent(url=pdf_url), + }, +) +``` + +**After:** +```python +pipe_output = await execute_pipeline( + pipe_code="extract_page_contents_from_pdf", + input_memory={ + "document": PDFContent(url=pdf_url), + }, +) +``` + +### Removed Methods and Classes + +The following methods and classes have been removed. If your code uses them, you'll need to refactor: + +- `PipeLibrary.add_or_update_pipe()` - Removed +- `PipelexHub.get_optional_library_manager()` - Removed +- Hub methods: `get_optional_domain_provider()` and `get_optional_concept_provider()` - Removed + +### Renamed Internal Classes (if used) + +If your project directly imports these internal classes: + +- `ConceptProviderAbstract` → `ConceptLibraryAbstract` +- `DomainProviderAbstract` → `DomainLibraryAbstract` +- `PipeProviderAbstract` → `PipeLibraryAbstract` +- `PipeInputSpec` → `InputRequirements` +- `PipeInputSpecFactory` → `InputRequirementsFactory` +- `PipelexError` → `PipelexException` (base exception class) + +### Hub Method Renames + +If you use hub methods directly: + +**Find:** `get_*_provider()` +**Replace with:** `get_*_library()` + +**Find:** `set_*_provider()` +**Replace with:** `set_*_library()` + +### External Plugin API Changes + +If you're using external LLM plugins: + +**Find:** `llm_handle` parameter +**Replace with:** `model` parameter + +**Before:** +```python +get_inference_manager().set_llm_worker_from_external_plugin( + llm_handle="my_custom_llm", + llm_worker_class=MyLLMWorker, +) +``` + +**After:** +```python +get_inference_manager().set_llm_worker_from_external_plugin( + model="my_custom_llm", + llm_worker_class=MyLLMWorker, +) +``` + +## 11. File Cleanup + +### Remove Deprecated Files + +Remove the following files if they exist in your project: + +```bash +# Remove old template file (moved to .pipelex/pipelex.toml) +rm -f pipelex_libraries/templates/base_templates.toml +rm -rf pipelex_libraries/templates/ # If empty after removal +``` + +### Update Documentation Files + +If your project has `AGENTS.md` or `CLAUDE.md` files with Pipelex examples: + +1. Update all PLX syntax examples following sections 1-8 of this guide +2. Update Python code examples following section 10 +3. Search for and update: + - `ocr_page_contents_from_pdf` → `extract_page_contents_from_pdf` + - `type = "PipeOcr"` → `type = "PipeExtract"` + - `ocr_model` → `model` + - `llm = ` → `model = ` + - `prompt_template = ` → `prompt = ` + +## 12. Common Issues ### Issue: Pipeline validation fails with "unknown field" @@ -479,7 +588,19 @@ make tp **Solution:** Rename sections and fields in your .pipelex/ configuration files. -## 11. Automated Migration Script +### Issue: Import errors for renamed classes + +**Cause:** Code imports classes that were renamed (e.g., `ConceptProviderAbstract`). + +**Solution:** Update imports to use new names (`ConceptLibraryAbstract`, etc.) or refactor to avoid using internal classes. + +### Issue: base_templates.toml not found + +**Cause:** The `base_templates.toml` file has been removed. Generic prompts moved to `.pipelex/pipelex.toml`. + +**Solution:** Remove references to this file. The templates are now auto-loaded from the config. + +## 13. Automated Migration Script You can use this bash script to automatically apply most changes: @@ -499,6 +620,21 @@ find . -name "*.plx" -type f -exec sed -i '' \ -e 's/default_pipe_code = /default_outcome = /g' \ {} + +# Update Python files with renamed pipe codes +find . -name "*.py" -type f -exec sed -i '' \ + -e 's/ocr_page_contents_from_pdf/extract_page_contents_from_pdf/g' \ + -e 's/ocr_page_contents_and_views_from_pdf/extract_page_contents_and_views_from_pdf/g' \ + {} + + +# Update documentation files +find . \( -name "AGENTS.md" -o -name "CLAUDE.md" \) -type f -exec sed -i '' \ + -e 's/definition = "/description = "/g' \ + -e 's/type = "PipeOcr"/type = "PipeExtract"/g' \ + -e 's/ocr_model = /model = /g' \ + -e 's/ocr_page_contents_from_pdf/extract_page_contents_from_pdf/g' \ + -e 's/ocr_page_contents_and_views_from_pdf/extract_page_contents_and_views_from_pdf/g' \ + {} + + # Find all .toml files in .pipelex and apply replacements find .pipelex -name "*.toml" -type f -exec sed -i '' \ -e 's/llm_handle = /model = /g' \ @@ -517,6 +653,9 @@ find tests -name "*.py" -type f -exec sed -i '' \ -e 's/@pytest\.mark\.ocr/@pytest.mark.extract/g' \ {} + +# Remove deprecated files +rm -f pipelex_libraries/templates/base_templates.toml + echo "Automated migration complete. Please review changes and:" echo "1. Manually add default_outcome to all PipeCondition pipes" echo "2. Tag image inputs in PipeLLM prompts" @@ -547,6 +686,14 @@ Get-ChildItem -Path . -Filter *.plx -Recurse | ForEach-Object { Set-Content -Path $_.FullName -Value $content -NoNewline } +# Update Python files with renamed pipe codes +Get-ChildItem -Path . -Filter *.py -Recurse | ForEach-Object { + $content = Get-Content $_.FullName -Raw + $content = $content -replace 'ocr_page_contents_from_pdf', 'extract_page_contents_from_pdf' + $content = $content -replace 'ocr_page_contents_and_views_from_pdf', 'extract_page_contents_and_views_from_pdf' + Set-Content -Path $_.FullName -Value $content -NoNewline +} + # Find all .toml files in .pipelex and apply replacements Get-ChildItem -Path .pipelex -Filter *.toml -Recurse | ForEach-Object { $content = Get-Content $_.FullName -Raw @@ -569,6 +716,9 @@ Get-ChildItem -Path tests -Filter *.py -Recurse | ForEach-Object { Set-Content -Path $_.FullName -Value $content -NoNewline } +# Remove deprecated files +Remove-Item -Path "pipelex_libraries/templates/base_templates.toml" -ErrorAction SilentlyContinue + Write-Host "Automated migration complete. Please review changes and:" Write-Host "1. Manually add default_outcome to all PipeCondition pipes" Write-Host "2. Tag image inputs in PipeLLM prompts" @@ -576,7 +726,7 @@ Write-Host "3. Remove nb_steps from PipeImgGen if present" Write-Host "4. Run 'make validate' to check for errors" ``` -## 12. Additional Resources +## 14. Additional Resources - See AGENTS.md for complete documentation of the current syntax - Run `make validate` frequently to catch issues early From a3c79b342e0793c63add6ed3ad90c01962f8ff1e Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 15:24:07 +0200 Subject: [PATCH 031/115] Prepare kit --- pipelex/cogt/models/model_manager.py | 2 +- pipelex/kit/__init__.py | 1 + pipelex/kit/agents/pytest_standards.md | 158 ++ pipelex/kit/agents/python_standards.md | 1306 +++++++++++++++++ pipelex/kit/agents/run_pipelines.md | 1306 +++++++++++++++++ pipelex/kit/agents/write_pipelex.md | 793 ++++++++++ .../configs}/inference/backends.toml | 0 .../inference/backends/anthropic.toml | 0 .../inference/backends/azure_openai.toml | 0 .../configs}/inference/backends/bedrock.toml | 0 .../inference/backends/blackboxai.toml | 0 .../configs}/inference/backends/fal.toml | 0 .../configs}/inference/backends/google.toml | 0 .../configs}/inference/backends/internal.toml | 0 .../configs}/inference/backends/mistral.toml | 0 .../configs}/inference/backends/ollama.toml | 0 .../configs}/inference/backends/openai.toml | 0 .../inference/backends/perplexity.toml | 0 .../inference/backends/pipelex_inference.toml | 0 .../configs}/inference/backends/vertexai.toml | 0 .../configs}/inference/backends/xai.toml | 0 .../configs}/inference/deck/base_deck.toml | 0 .../configs}/inference/deck/overrides.toml | 0 .../configs}/inference/routing_profiles.toml | 0 .../configs}/pipelex.toml | 5 - pipelex/kit/index.toml | 0 .../kit/migrations/migrate_0.11.0_0.12.0.md | 0 pipelex/tools/config/manager.py | 2 - pyproject.toml | 1 + uv.lock | 121 ++ 30 files changed, 3687 insertions(+), 8 deletions(-) create mode 100644 pipelex/kit/__init__.py create mode 100644 pipelex/kit/agents/pytest_standards.md create mode 100644 pipelex/kit/agents/python_standards.md create mode 100644 pipelex/kit/agents/run_pipelines.md create mode 100644 pipelex/kit/agents/write_pipelex.md rename pipelex/{config_template => kit/configs}/inference/backends.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/anthropic.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/azure_openai.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/bedrock.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/blackboxai.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/fal.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/google.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/internal.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/mistral.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/ollama.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/openai.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/perplexity.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/pipelex_inference.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/vertexai.toml (100%) rename pipelex/{config_template => kit/configs}/inference/backends/xai.toml (100%) rename pipelex/{config_template => kit/configs}/inference/deck/base_deck.toml (100%) rename pipelex/{config_template => kit/configs}/inference/deck/overrides.toml (100%) rename pipelex/{config_template => kit/configs}/inference/routing_profiles.toml (100%) rename pipelex/{config_template => kit/configs}/pipelex.toml (73%) create mode 100644 pipelex/kit/index.toml rename MIGRATION.md => pipelex/kit/migrations/migrate_0.11.0_0.12.0.md (100%) diff --git a/pipelex/cogt/models/model_manager.py b/pipelex/cogt/models/model_manager.py index 1d14f3dbe..f669131c8 100644 --- a/pipelex/cogt/models/model_manager.py +++ b/pipelex/cogt/models/model_manager.py @@ -14,7 +14,7 @@ from pipelex.cogt.models.model_manager_abstract import ModelManagerAbstract from pipelex.config import get_config from pipelex.tools.misc.json_utils import deep_update -from pipelex.tools.misc.toml_utils import load_toml_from_path +from pipelex.tools.misc.toml_utils import load_toml_from_path, load_toml_from_path_if_exists, load_toml_from_content, TomlError class ModelManager(ModelManagerAbstract): diff --git a/pipelex/kit/__init__.py b/pipelex/kit/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/pipelex/kit/__init__.py @@ -0,0 +1 @@ + diff --git a/pipelex/kit/agents/pytest_standards.md b/pipelex/kit/agents/pytest_standards.md new file mode 100644 index 000000000..e6ab9d5b6 --- /dev/null +++ b/pipelex/kit/agents/pytest_standards.md @@ -0,0 +1,158 @@ +# Writing unit tests + +## Unit test generalities + +NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. + +### Test file structure + +- Name test files with `test_` prefix +- Use descriptive names that match the functionality being tested +- Place test files in the appropriate test category directory: + - `tests/unit/` - for unit tests that test individual functions/classes in isolation + - `tests/integration/` - for integration tests that test component interactions + - `tests/e2e/` - for end-to-end tests that test complete workflows + - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) +- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest +- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. +- Always put test inside Test classes. +- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` + +### Markers + +Apply the appropriate markers: +- "llm: uses an LLM to generate text or objects" +- "img_gen: uses an image generation AI" +- "extract: uses text/image extraction from documents" +- "inference: uses either an LLM or an image generation AI" +- "gha_disabled: will not be able to run properly on GitHub Actions" + +Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. + +### Important rules + +- Never use the unittest.mock. Use pytest-mock. + +### Test Class Structure + +Always group the tests of a module into a test class: + +```python +@pytest.mark.llm +@pytest.mark.inference +@pytest.mark.asyncio(loop_scope="class") +class TestFooBar: + @pytest.mark.parametrize( + "topic test_case_blueprint", + [ + TestCases.CASE_1, + TestCases.CASE_2, + ], + ) + async def test_pipe_processing( + self, + request: FixtureRequest, + topic: str, + test_case_blueprint: StuffBlueprint, + ): + # Test implementation +``` + +Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. + +## Writing integration test to test pipes + +### Required imports for pipe tests + +```python +import pytest +from pytest import FixtureRequest +from pipelex import log, pretty_print +from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory +from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory +from pipelex.hub import get_report_delegate +from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt +from pipelex.config_pipelex import get_config + +from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe +from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +``` + +### Pipe test implementation steps + +1. Create Stuff from blueprint: + +```python +stuff = StuffFactory.make_stuff( + concept_code="RetrievedExcerpt", + domain="retrieve", + content=RetrievedExcerpt(text="", justification="") + name="retrieved_text", +) +``` + +2. Create Working Memory: + +```python +working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +``` + +3. Run the pipe: + +```python +pipe_output = await pipe_router.run_pipe( + pipe_code="pipe_name", + pipe_run_params=PipeRunParamsFactory.make_run_params(), + working_memory=working_memory, + job_metadata=JobMetadata(), +) +``` + +4. Basic assertions: + +```python +assert pipe_output is not None +assert pipe_output.working_memory is not None +assert pipe_output.main_stuff is not None +``` + +### Test Data Organization + +- If it's not already there, create a `test_data.py` file in the test directory +- Define test cases using `StuffBlueprint`: + +```python +class TestCases: + CASE_BLUEPRINT_1 = StuffBlueprint( + name="test_case_1", + concept_code="domain.ConceptName1", + value="test_value" + ) + CASE_BLUEPRINT_2 = StuffBlueprint( + name="test_case_2", + concept_code="domain.ConceptName2", + value="test_value" + ) + + CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" + ("topic1", CASE_BLUEPRINT_1), + ("topic2", CASE_BLUEPRINT_2), + ] +``` + +Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. +Also note that we provide a topic for the test case, which is purely for convenience. + +## Best Practices for Testing + +- Use parametrize for multiple test cases +- Test both success and failure cases +- Verify working memory state +- Check output structure and content +- Use meaningful test case names +- Include docstrings explaining test purpose +- Log outputs for debugging +- Generate reports for cost tracking diff --git a/pipelex/kit/agents/python_standards.md b/pipelex/kit/agents/python_standards.md new file mode 100644 index 000000000..6c6930a3e --- /dev/null +++ b/pipelex/kit/agents/python_standards.md @@ -0,0 +1,1306 @@ +# Coding Standards & Best Practices + +This document outlines the core coding standards, best practices, and quality control procedures for the codebase. + +## Type Hints + +1. **Always Use Type Hints** + + - Every function parameter must be typed + - Every function return must be typed + - Use type hints for all variables where type is not obvious + - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] + - Use type hints for all fields + - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals + - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` + - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` + - Keep models focused and single-purpose + +2. **StrEnum** + - Import from `pipelex.types`: + ```python + from pipelex.types import StrEnum + ``` + +3. **Self type** + - Import from `pipelex.types`: + ```python + from pipelex.types import Self + ``` + +## Factory Pattern + + - Use Factory Pattern for object creation when dealing with multiple implementations + - Our factory methods are named `make_from_...` and such + +## Error Handling + + - Always catch exceptions at the place where you can add useful context to it. + - Use try/except blocks with specific exceptions + - Convert third-party exceptions to our custom ones + - Never catch Exception, only catch specific exceptions + - Always add `from exc` to the exception + + ```python + try: + self.models_manager.setup() + except RoutingProfileLibraryNotFoundError as exc: + msg = "The routing library could not be found, please call `pipelex init config` to create it" + raise PipelexSetupError(msg) from exc + ``` + + **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. + +## Documentation + +1. **Docstring Format** + ```python + def process_image(image_path: str, size: Tuple[int, int]) -> bytes: + """Process and resize an image. + + Args: + image_path: Path to the source image + size: Tuple of (width, height) for resizing + + Returns: + Processed image as bytes + """ + pass + ``` + +2. **Class Documentation** + ```python + class ImageProcessor: + """Handles image processing operations. + + Provides methods for resizing, converting, and optimizing images. + """ + ``` + +## Code Quality Checks + +### Linting and Type Checking + +Before finalizing a task, run: +```bash +make fix-unused-imports +make check +``` + +This runs multiple code quality tools: +- Pyright: Static type checking +- Ruff: Fast Python linter +- Mypy: Static type checker + +Always fix any issues reported by these tools before proceeding. + +### Running Tests + +1. **Quick Test Run** (no LLM/image generation): + ```bash + make tp + ``` + Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` + +2. **Specific Tests**: + ```bash + make tp TEST=TestClassName + # or + make tp TEST=test_function_name + ``` + Note: Matches names starting with the provided string. + +**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. + +## Pipelines + +- All pipeline definitions go in `pipelex/libraries/pipelines/` +- Always validate pipelines after creation/edit with `make validate`. + Iterate if there are errors. + +## Project Structure + +- **Pipelines**: `pipelex/libraries/pipelines/` +- **Tests**: `tests/` directory +- **Documentation**: `docs/` directory + +--- + +# Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +## Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for structures +- Use descriptive names in `snake_case` + +## Pipeline File Structure +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +### Concept Definitions +```plx +[concept] +ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output +``` + +Important Rules: +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") +- Avoid adjectives (no "LargeText", use "Text") +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) +yes +### Pipe Definitions + +## Pipe Base Structure + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +DO NOT WRITE: +```plx +[pipe.your_pipe_name] +type = "pipe_sequence" +``` + +But it should be: + +```plx +[pipe.your_pipe_name] +type = "PipeSequence" +description = "....." +``` + +The pipes will all have at least this base structure. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +## Structuring Models + +### Model Location and Registration + +- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Models must inherit from `StructuredContent` or appropriate content type + +## Model Structure + +Concepts and their structure classes are meant to indicate an idea. +A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. + +**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** + +DO NOT create structures like: +```python +class Joke(TextContent): + """A humorous text that makes people laugh.""" + pass +``` + +If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: +```plx +[concept] +Joke = "A humorous text that makes people laugh." +``` +If you simply need to refine another native concept, construct it like this: +```plx +[concept.Landscape] +refines = "Image" +``` +Only create a Python structure class when you need to add specific fields: + +```python +from datetime import datetime +from typing import List, Optional +from pydantic import Field + +from pipelex.core.stuffs.structured_content import StructuredContent + +# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +class YourModel(StructuredContent): # Always be a subclass of StructuredContent + # Required fields + field1: str + field2: int + + # Optional fields with defaults + field3: Optional[str] = Field(None, "Description of field3") + field4: List[str] = Field(default_factory=list) + + # Date fields should remove timezone + date_field: Optional[datetime] = None +``` +### Usage + +Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. + +Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. + + +### Best Practices for structures + +- Respect Pydantic v2 standards +- Use type hints for all fields +- Use `Field` declaration and write the description + + +## Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +## PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +### Basic Structure +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +## PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +## PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +## PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: Optional[TextContent] + images: Optional[List[ImageContent]] + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: Optional[ImageContent] = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +## PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +# Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +## PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +## PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +--- + +# Guide to write an example to execute a pipeline + +## Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +# start Pipelex +Pipelex.make() +# run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +## Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + +from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +# start Pipelex +Pipelex.make() + +# run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +## Setting up the input memory + +### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str +ImplicitMemory = Dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +# Here we have a single input and it's a Text. +# If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +# Here we have a single input and it's a PDF. +# Because PDFContent is a native concept, we can use it directly as a value, +# the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +# Here we have a single input and it's an Image. +# Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +# Here we have a single input, it's an image but +# its actually a more specific concept gantt.GanttImage which refines Image, +# so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +# Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +## Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +# Writing unit tests + +## Unit test generalities + +NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. + +### Test file structure + +- Name test files with `test_` prefix +- Use descriptive names that match the functionality being tested +- Place test files in the appropriate test category directory: + - `tests/unit/` - for unit tests that test individual functions/classes in isolation + - `tests/integration/` - for integration tests that test component interactions + - `tests/e2e/` - for end-to-end tests that test complete workflows + - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) +- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest +- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. +- Always put test inside Test classes. +- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` + +### Markers + +Apply the appropriate markers: +- "llm: uses an LLM to generate text or objects" +- "img_gen: uses an image generation AI" +- "extract: uses text/image extraction from documents" +- "inference: uses either an LLM or an image generation AI" +- "gha_disabled: will not be able to run properly on GitHub Actions" + +Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. + +### Important rules + +- Never use the unittest.mock. Use pytest-mock. + +### Test Class Structure + +Always group the tests of a module into a test class: + +```python +@pytest.mark.llm +@pytest.mark.inference +@pytest.mark.asyncio(loop_scope="class") +class TestFooBar: + @pytest.mark.parametrize( + "topic test_case_blueprint", + [ + TestCases.CASE_1, + TestCases.CASE_2, + ], + ) + async def test_pipe_processing( + self, + request: FixtureRequest, + topic: str, + test_case_blueprint: StuffBlueprint, + ): + # Test implementation +``` + +Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. + +## Writing integration test to test pipes + +### Required imports for pipe tests + +```python +import pytest +from pytest import FixtureRequest +from pipelex import log, pretty_print +from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory +from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory +from pipelex.hub import get_report_delegate +from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt +from pipelex.config_pipelex import get_config + +from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe +from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +``` + +### Pipe test implementation steps + +1. Create Stuff from blueprint: + +```python +stuff = StuffFactory.make_stuff( + concept_code="RetrievedExcerpt", + domain="retrieve", + content=RetrievedExcerpt(text="", justification="") + name="retrieved_text", +) +``` + +2. Create Working Memory: + +```python +working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +``` + +3. Run the pipe: + +```python +pipe_output = await pipe_router.run_pipe( + pipe_code="pipe_name", + pipe_run_params=PipeRunParamsFactory.make_run_params(), + working_memory=working_memory, + job_metadata=JobMetadata(), +) +``` + +4. Basic assertions: + +```python +assert pipe_output is not None +assert pipe_output.working_memory is not None +assert pipe_output.main_stuff is not None +``` + +### Test Data Organization + +- If it's not already there, create a `test_data.py` file in the test directory +- Define test cases using `StuffBlueprint`: + +```python +class TestCases: + CASE_BLUEPRINT_1 = StuffBlueprint( + name="test_case_1", + concept_code="domain.ConceptName1", + value="test_value" + ) + CASE_BLUEPRINT_2 = StuffBlueprint( + name="test_case_2", + concept_code="domain.ConceptName2", + value="test_value" + ) + + CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" + ("topic1", CASE_BLUEPRINT_1), + ("topic2", CASE_BLUEPRINT_2), + ] +``` + +Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. +Also note that we provide a topic for the test case, which is purely for convenience. + +## Best Practices for Testing + +- Use parametrize for multiple test cases +- Test both success and failure cases +- Verify working memory state +- Check output structure and content +- Use meaningful test case names +- Include docstrings explaining test purpose +- Log outputs for debugging +- Generate reports for cost tracking diff --git a/pipelex/kit/agents/run_pipelines.md b/pipelex/kit/agents/run_pipelines.md new file mode 100644 index 000000000..6c6930a3e --- /dev/null +++ b/pipelex/kit/agents/run_pipelines.md @@ -0,0 +1,1306 @@ +# Coding Standards & Best Practices + +This document outlines the core coding standards, best practices, and quality control procedures for the codebase. + +## Type Hints + +1. **Always Use Type Hints** + + - Every function parameter must be typed + - Every function return must be typed + - Use type hints for all variables where type is not obvious + - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] + - Use type hints for all fields + - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals + - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` + - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` + - Keep models focused and single-purpose + +2. **StrEnum** + - Import from `pipelex.types`: + ```python + from pipelex.types import StrEnum + ``` + +3. **Self type** + - Import from `pipelex.types`: + ```python + from pipelex.types import Self + ``` + +## Factory Pattern + + - Use Factory Pattern for object creation when dealing with multiple implementations + - Our factory methods are named `make_from_...` and such + +## Error Handling + + - Always catch exceptions at the place where you can add useful context to it. + - Use try/except blocks with specific exceptions + - Convert third-party exceptions to our custom ones + - Never catch Exception, only catch specific exceptions + - Always add `from exc` to the exception + + ```python + try: + self.models_manager.setup() + except RoutingProfileLibraryNotFoundError as exc: + msg = "The routing library could not be found, please call `pipelex init config` to create it" + raise PipelexSetupError(msg) from exc + ``` + + **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. + +## Documentation + +1. **Docstring Format** + ```python + def process_image(image_path: str, size: Tuple[int, int]) -> bytes: + """Process and resize an image. + + Args: + image_path: Path to the source image + size: Tuple of (width, height) for resizing + + Returns: + Processed image as bytes + """ + pass + ``` + +2. **Class Documentation** + ```python + class ImageProcessor: + """Handles image processing operations. + + Provides methods for resizing, converting, and optimizing images. + """ + ``` + +## Code Quality Checks + +### Linting and Type Checking + +Before finalizing a task, run: +```bash +make fix-unused-imports +make check +``` + +This runs multiple code quality tools: +- Pyright: Static type checking +- Ruff: Fast Python linter +- Mypy: Static type checker + +Always fix any issues reported by these tools before proceeding. + +### Running Tests + +1. **Quick Test Run** (no LLM/image generation): + ```bash + make tp + ``` + Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` + +2. **Specific Tests**: + ```bash + make tp TEST=TestClassName + # or + make tp TEST=test_function_name + ``` + Note: Matches names starting with the provided string. + +**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. + +## Pipelines + +- All pipeline definitions go in `pipelex/libraries/pipelines/` +- Always validate pipelines after creation/edit with `make validate`. + Iterate if there are errors. + +## Project Structure + +- **Pipelines**: `pipelex/libraries/pipelines/` +- **Tests**: `tests/` directory +- **Documentation**: `docs/` directory + +--- + +# Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +## Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for structures +- Use descriptive names in `snake_case` + +## Pipeline File Structure +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +### Concept Definitions +```plx +[concept] +ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output +``` + +Important Rules: +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") +- Avoid adjectives (no "LargeText", use "Text") +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) +yes +### Pipe Definitions + +## Pipe Base Structure + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +DO NOT WRITE: +```plx +[pipe.your_pipe_name] +type = "pipe_sequence" +``` + +But it should be: + +```plx +[pipe.your_pipe_name] +type = "PipeSequence" +description = "....." +``` + +The pipes will all have at least this base structure. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +## Structuring Models + +### Model Location and Registration + +- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Models must inherit from `StructuredContent` or appropriate content type + +## Model Structure + +Concepts and their structure classes are meant to indicate an idea. +A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. + +**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** + +DO NOT create structures like: +```python +class Joke(TextContent): + """A humorous text that makes people laugh.""" + pass +``` + +If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: +```plx +[concept] +Joke = "A humorous text that makes people laugh." +``` +If you simply need to refine another native concept, construct it like this: +```plx +[concept.Landscape] +refines = "Image" +``` +Only create a Python structure class when you need to add specific fields: + +```python +from datetime import datetime +from typing import List, Optional +from pydantic import Field + +from pipelex.core.stuffs.structured_content import StructuredContent + +# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +class YourModel(StructuredContent): # Always be a subclass of StructuredContent + # Required fields + field1: str + field2: int + + # Optional fields with defaults + field3: Optional[str] = Field(None, "Description of field3") + field4: List[str] = Field(default_factory=list) + + # Date fields should remove timezone + date_field: Optional[datetime] = None +``` +### Usage + +Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. + +Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. + + +### Best Practices for structures + +- Respect Pydantic v2 standards +- Use type hints for all fields +- Use `Field` declaration and write the description + + +## Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +## PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +### Basic Structure +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +## PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +## PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +## PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: Optional[TextContent] + images: Optional[List[ImageContent]] + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: Optional[ImageContent] = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +## PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +# Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +## PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +## PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +--- + +# Guide to write an example to execute a pipeline + +## Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +# start Pipelex +Pipelex.make() +# run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +## Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + +from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +# start Pipelex +Pipelex.make() + +# run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +## Setting up the input memory + +### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str +ImplicitMemory = Dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +# Here we have a single input and it's a Text. +# If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +# Here we have a single input and it's a PDF. +# Because PDFContent is a native concept, we can use it directly as a value, +# the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +# Here we have a single input and it's an Image. +# Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +# Here we have a single input, it's an image but +# its actually a more specific concept gantt.GanttImage which refines Image, +# so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +# Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +## Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +# Writing unit tests + +## Unit test generalities + +NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. + +### Test file structure + +- Name test files with `test_` prefix +- Use descriptive names that match the functionality being tested +- Place test files in the appropriate test category directory: + - `tests/unit/` - for unit tests that test individual functions/classes in isolation + - `tests/integration/` - for integration tests that test component interactions + - `tests/e2e/` - for end-to-end tests that test complete workflows + - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) +- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest +- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. +- Always put test inside Test classes. +- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` + +### Markers + +Apply the appropriate markers: +- "llm: uses an LLM to generate text or objects" +- "img_gen: uses an image generation AI" +- "extract: uses text/image extraction from documents" +- "inference: uses either an LLM or an image generation AI" +- "gha_disabled: will not be able to run properly on GitHub Actions" + +Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. + +### Important rules + +- Never use the unittest.mock. Use pytest-mock. + +### Test Class Structure + +Always group the tests of a module into a test class: + +```python +@pytest.mark.llm +@pytest.mark.inference +@pytest.mark.asyncio(loop_scope="class") +class TestFooBar: + @pytest.mark.parametrize( + "topic test_case_blueprint", + [ + TestCases.CASE_1, + TestCases.CASE_2, + ], + ) + async def test_pipe_processing( + self, + request: FixtureRequest, + topic: str, + test_case_blueprint: StuffBlueprint, + ): + # Test implementation +``` + +Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. + +## Writing integration test to test pipes + +### Required imports for pipe tests + +```python +import pytest +from pytest import FixtureRequest +from pipelex import log, pretty_print +from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory +from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory +from pipelex.hub import get_report_delegate +from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt +from pipelex.config_pipelex import get_config + +from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe +from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +``` + +### Pipe test implementation steps + +1. Create Stuff from blueprint: + +```python +stuff = StuffFactory.make_stuff( + concept_code="RetrievedExcerpt", + domain="retrieve", + content=RetrievedExcerpt(text="", justification="") + name="retrieved_text", +) +``` + +2. Create Working Memory: + +```python +working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +``` + +3. Run the pipe: + +```python +pipe_output = await pipe_router.run_pipe( + pipe_code="pipe_name", + pipe_run_params=PipeRunParamsFactory.make_run_params(), + working_memory=working_memory, + job_metadata=JobMetadata(), +) +``` + +4. Basic assertions: + +```python +assert pipe_output is not None +assert pipe_output.working_memory is not None +assert pipe_output.main_stuff is not None +``` + +### Test Data Organization + +- If it's not already there, create a `test_data.py` file in the test directory +- Define test cases using `StuffBlueprint`: + +```python +class TestCases: + CASE_BLUEPRINT_1 = StuffBlueprint( + name="test_case_1", + concept_code="domain.ConceptName1", + value="test_value" + ) + CASE_BLUEPRINT_2 = StuffBlueprint( + name="test_case_2", + concept_code="domain.ConceptName2", + value="test_value" + ) + + CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" + ("topic1", CASE_BLUEPRINT_1), + ("topic2", CASE_BLUEPRINT_2), + ] +``` + +Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. +Also note that we provide a topic for the test case, which is purely for convenience. + +## Best Practices for Testing + +- Use parametrize for multiple test cases +- Test both success and failure cases +- Verify working memory state +- Check output structure and content +- Use meaningful test case names +- Include docstrings explaining test purpose +- Log outputs for debugging +- Generate reports for cost tracking diff --git a/pipelex/kit/agents/write_pipelex.md b/pipelex/kit/agents/write_pipelex.md new file mode 100644 index 000000000..f79c85f79 --- /dev/null +++ b/pipelex/kit/agents/write_pipelex.md @@ -0,0 +1,793 @@ +# Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +## Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for structures +- Use descriptive names in `snake_case` + +## Pipeline File Structure +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +### Concept Definitions +```plx +[concept] +ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output +``` + +Important Rules: +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") +- Avoid adjectives (no "LargeText", use "Text") +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) +yes +### Pipe Definitions + +## Pipe Base Structure + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +DO NOT WRITE: +```plx +[pipe.your_pipe_name] +type = "pipe_sequence" +``` + +But it should be: + +```plx +[pipe.your_pipe_name] +type = "PipeSequence" +description = "....." +``` + +The pipes will all have at least this base structure. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +## Structuring Models + +### Model Location and Registration + +- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Models must inherit from `StructuredContent` or appropriate content type + +## Model Structure + +Concepts and their structure classes are meant to indicate an idea. +A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. + +**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** + +DO NOT create structures like: +```python +class Joke(TextContent): + """A humorous text that makes people laugh.""" + pass +``` + +If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: +```plx +[concept] +Joke = "A humorous text that makes people laugh." +``` +If you simply need to refine another native concept, construct it like this: +```plx +[concept.Landscape] +refines = "Image" +``` +Only create a Python structure class when you need to add specific fields: + +```python +from datetime import datetime +from typing import List, Optional +from pydantic import Field + +from pipelex.core.stuffs.structured_content import StructuredContent + +# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +class YourModel(StructuredContent): # Always be a subclass of StructuredContent + # Required fields + field1: str + field2: int + + # Optional fields with defaults + field3: Optional[str] = Field(None, "Description of field3") + field4: List[str] = Field(default_factory=list) + + # Date fields should remove timezone + date_field: Optional[datetime] = None +``` +### Usage + +Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. + +Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. + + +### Best Practices for structures + +- Respect Pydantic v2 standards +- Use type hints for all fields +- Use `Field` declaration and write the description + + +## Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +## PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +### Basic Structure +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +## PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +## PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +## PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: Optional[TextContent] + images: Optional[List[ImageContent]] + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: Optional[ImageContent] = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +## PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +# Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +## PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +## PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. diff --git a/pipelex/config_template/inference/backends.toml b/pipelex/kit/configs/inference/backends.toml similarity index 100% rename from pipelex/config_template/inference/backends.toml rename to pipelex/kit/configs/inference/backends.toml diff --git a/pipelex/config_template/inference/backends/anthropic.toml b/pipelex/kit/configs/inference/backends/anthropic.toml similarity index 100% rename from pipelex/config_template/inference/backends/anthropic.toml rename to pipelex/kit/configs/inference/backends/anthropic.toml diff --git a/pipelex/config_template/inference/backends/azure_openai.toml b/pipelex/kit/configs/inference/backends/azure_openai.toml similarity index 100% rename from pipelex/config_template/inference/backends/azure_openai.toml rename to pipelex/kit/configs/inference/backends/azure_openai.toml diff --git a/pipelex/config_template/inference/backends/bedrock.toml b/pipelex/kit/configs/inference/backends/bedrock.toml similarity index 100% rename from pipelex/config_template/inference/backends/bedrock.toml rename to pipelex/kit/configs/inference/backends/bedrock.toml diff --git a/pipelex/config_template/inference/backends/blackboxai.toml b/pipelex/kit/configs/inference/backends/blackboxai.toml similarity index 100% rename from pipelex/config_template/inference/backends/blackboxai.toml rename to pipelex/kit/configs/inference/backends/blackboxai.toml diff --git a/pipelex/config_template/inference/backends/fal.toml b/pipelex/kit/configs/inference/backends/fal.toml similarity index 100% rename from pipelex/config_template/inference/backends/fal.toml rename to pipelex/kit/configs/inference/backends/fal.toml diff --git a/pipelex/config_template/inference/backends/google.toml b/pipelex/kit/configs/inference/backends/google.toml similarity index 100% rename from pipelex/config_template/inference/backends/google.toml rename to pipelex/kit/configs/inference/backends/google.toml diff --git a/pipelex/config_template/inference/backends/internal.toml b/pipelex/kit/configs/inference/backends/internal.toml similarity index 100% rename from pipelex/config_template/inference/backends/internal.toml rename to pipelex/kit/configs/inference/backends/internal.toml diff --git a/pipelex/config_template/inference/backends/mistral.toml b/pipelex/kit/configs/inference/backends/mistral.toml similarity index 100% rename from pipelex/config_template/inference/backends/mistral.toml rename to pipelex/kit/configs/inference/backends/mistral.toml diff --git a/pipelex/config_template/inference/backends/ollama.toml b/pipelex/kit/configs/inference/backends/ollama.toml similarity index 100% rename from pipelex/config_template/inference/backends/ollama.toml rename to pipelex/kit/configs/inference/backends/ollama.toml diff --git a/pipelex/config_template/inference/backends/openai.toml b/pipelex/kit/configs/inference/backends/openai.toml similarity index 100% rename from pipelex/config_template/inference/backends/openai.toml rename to pipelex/kit/configs/inference/backends/openai.toml diff --git a/pipelex/config_template/inference/backends/perplexity.toml b/pipelex/kit/configs/inference/backends/perplexity.toml similarity index 100% rename from pipelex/config_template/inference/backends/perplexity.toml rename to pipelex/kit/configs/inference/backends/perplexity.toml diff --git a/pipelex/config_template/inference/backends/pipelex_inference.toml b/pipelex/kit/configs/inference/backends/pipelex_inference.toml similarity index 100% rename from pipelex/config_template/inference/backends/pipelex_inference.toml rename to pipelex/kit/configs/inference/backends/pipelex_inference.toml diff --git a/pipelex/config_template/inference/backends/vertexai.toml b/pipelex/kit/configs/inference/backends/vertexai.toml similarity index 100% rename from pipelex/config_template/inference/backends/vertexai.toml rename to pipelex/kit/configs/inference/backends/vertexai.toml diff --git a/pipelex/config_template/inference/backends/xai.toml b/pipelex/kit/configs/inference/backends/xai.toml similarity index 100% rename from pipelex/config_template/inference/backends/xai.toml rename to pipelex/kit/configs/inference/backends/xai.toml diff --git a/pipelex/config_template/inference/deck/base_deck.toml b/pipelex/kit/configs/inference/deck/base_deck.toml similarity index 100% rename from pipelex/config_template/inference/deck/base_deck.toml rename to pipelex/kit/configs/inference/deck/base_deck.toml diff --git a/pipelex/config_template/inference/deck/overrides.toml b/pipelex/kit/configs/inference/deck/overrides.toml similarity index 100% rename from pipelex/config_template/inference/deck/overrides.toml rename to pipelex/kit/configs/inference/deck/overrides.toml diff --git a/pipelex/config_template/inference/routing_profiles.toml b/pipelex/kit/configs/inference/routing_profiles.toml similarity index 100% rename from pipelex/config_template/inference/routing_profiles.toml rename to pipelex/kit/configs/inference/routing_profiles.toml diff --git a/pipelex/config_template/pipelex.toml b/pipelex/kit/configs/pipelex.toml similarity index 73% rename from pipelex/config_template/pipelex.toml rename to pipelex/kit/configs/pipelex.toml index dddcbf095..0f6eaf0a1 100644 --- a/pipelex/config_template/pipelex.toml +++ b/pipelex/kit/configs/pipelex.toml @@ -10,10 +10,6 @@ api_key_method = "env" [cogt] -#################################################################################################### -# OCR config -#################################################################################################### - [cogt.extract_config] page_output_text_file_name = "page_text.md" @@ -22,4 +18,3 @@ page_output_text_file_name = "page_text.md" is_pipeline_tracking_enabled = false is_activity_tracking_enabled = false is_reporting_enabled = true - diff --git a/pipelex/kit/index.toml b/pipelex/kit/index.toml new file mode 100644 index 000000000..e69de29bb diff --git a/MIGRATION.md b/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md similarity index 100% rename from MIGRATION.md rename to pipelex/kit/migrations/migrate_0.11.0_0.12.0.md diff --git a/pipelex/tools/config/manager.py b/pipelex/tools/config/manager.py index c3cfb4403..f57d58460 100644 --- a/pipelex/tools/config/manager.py +++ b/pipelex/tools/config/manager.py @@ -13,8 +13,6 @@ CONFIG_DIR_NAME = ".pipelex" CONFIG_NAME = "pipelex.toml" -CONFIG_TEMPLATE_SUBPATH = "config_template" -INFERENCE_CONFIG_SUBPATH = "inference" class ConfigError(Exception): diff --git a/pyproject.toml b/pyproject.toml index 5bc984b96..5bdb38133 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "json2html>=1.3.0", "kajson==0.3.1", "markdown>=3.6", + "markdown-parser-py==1.0.1", "networkx>=3.4.2", "openai>=1.60.1", "openpyxl>=3.1.5", diff --git a/uv.lock b/uv.lock index 61acfe018..8f998a3e9 100644 --- a/uv.lock +++ b/uv.lock @@ -1292,6 +1292,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] +[[package]] +name = "markdown-parser-py" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/29/d680c74bf64dc510ffd83c0321db569cf17f4a752cb086a47e18401bc6a1/markdown_parser_py-1.0.1.tar.gz", hash = "sha256:ed05765c8ba9d9459280aba7a6af6314fdadeb814e0258b17809da29dd4f957e", size = 6718, upload-time = "2025-09-26T15:32:41.831Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/ec/23a5d686200344695258d15fa12be9e3e87004dfb80d62b0f1515546278f/markdown_parser_py-1.0.1-py3-none-any.whl", hash = "sha256:687792834352afc1fe9dc37c2ad20f7182612159e5c5222a34261093686e1d3a", size = 7032, upload-time = "2025-09-26T15:32:40.46Z" }, +] + [[package]] name = "markupsafe" version = "3.0.3" @@ -2151,6 +2163,7 @@ dependencies = [ { name = "json2html" }, { name = "kajson" }, { name = "markdown" }, + { name = "markdown-parser-py" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "openai" }, @@ -2240,6 +2253,7 @@ requires-dist = [ { name = "json2html", specifier = ">=1.3.0" }, { name = "kajson", specifier = "==0.3.1" }, { name = "markdown", specifier = ">=3.6" }, + { name = "markdown-parser-py", specifier = "==1.0.1" }, { name = "mistralai", marker = "extra == 'mistralai'", specifier = "==1.5.2" }, { name = "mkdocs", marker = "extra == 'docs'", specifier = "==1.6.1" }, { name = "mkdocs-glightbox", marker = "extra == 'docs'", specifier = "==0.4.0" }, @@ -2878,6 +2892,113 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/11/432f32f8097b03e3cd5fe57e88efb685d964e2e5178a48ed61e841f7fdce/pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04", size = 4722, upload-time = "2025-05-13T15:23:59.629Z" }, ] +[[package]] +name = "regex" +version = "2025.9.18" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/49/d3/eaa0d28aba6ad1827ad1e716d9a93e1ba963ada61887498297d3da715133/regex-2025.9.18.tar.gz", hash = "sha256:c5ba23274c61c6fef447ba6a39333297d0c247f53059dba0bca415cac511edc4", size = 400917, upload-time = "2025-09-19T00:38:35.79Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/d8/7e06171db8e55f917c5b8e89319cea2d86982e3fc46b677f40358223dece/regex-2025.9.18-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:12296202480c201c98a84aecc4d210592b2f55e200a1d193235c4db92b9f6788", size = 484829, upload-time = "2025-09-19T00:35:05.215Z" }, + { url = "https://files.pythonhosted.org/packages/8d/70/bf91bb39e5bedf75ce730ffbaa82ca585584d13335306d637458946b8b9f/regex-2025.9.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:220381f1464a581f2ea988f2220cf2a67927adcef107d47d6897ba5a2f6d51a4", size = 288993, upload-time = "2025-09-19T00:35:08.154Z" }, + { url = "https://files.pythonhosted.org/packages/fe/89/69f79b28365eda2c46e64c39d617d5f65a2aa451a4c94de7d9b34c2dc80f/regex-2025.9.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:87f681bfca84ebd265278b5daa1dcb57f4db315da3b5d044add7c30c10442e61", size = 286624, upload-time = "2025-09-19T00:35:09.717Z" }, + { url = "https://files.pythonhosted.org/packages/44/31/81e62955726c3a14fcc1049a80bc716765af6c055706869de5e880ddc783/regex-2025.9.18-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34d674cbba70c9398074c8a1fcc1a79739d65d1105de2a3c695e2b05ea728251", size = 780473, upload-time = "2025-09-19T00:35:11.013Z" }, + { url = "https://files.pythonhosted.org/packages/fb/23/07072b7e191fbb6e213dc03b2f5b96f06d3c12d7deaded84679482926fc7/regex-2025.9.18-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:385c9b769655cb65ea40b6eea6ff763cbb6d69b3ffef0b0db8208e1833d4e746", size = 849290, upload-time = "2025-09-19T00:35:12.348Z" }, + { url = "https://files.pythonhosted.org/packages/b3/f0/aec7f6a01f2a112210424d77c6401b9015675fb887ced7e18926df4ae51e/regex-2025.9.18-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8900b3208e022570ae34328712bef6696de0804c122933414014bae791437ab2", size = 897335, upload-time = "2025-09-19T00:35:14.058Z" }, + { url = "https://files.pythonhosted.org/packages/cc/90/2e5f9da89d260de7d0417ead91a1bc897f19f0af05f4f9323313b76c47f2/regex-2025.9.18-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c204e93bf32cd7a77151d44b05eb36f469d0898e3fba141c026a26b79d9914a0", size = 789946, upload-time = "2025-09-19T00:35:15.403Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d5/1c712c7362f2563d389be66bae131c8bab121a3fabfa04b0b5bfc9e73c51/regex-2025.9.18-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3acc471d1dd7e5ff82e6cacb3b286750decd949ecd4ae258696d04f019817ef8", size = 780787, upload-time = "2025-09-19T00:35:17.061Z" }, + { url = "https://files.pythonhosted.org/packages/4f/92/c54cdb4aa41009632e69817a5aa452673507f07e341076735a2f6c46a37c/regex-2025.9.18-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6479d5555122433728760e5f29edb4c2b79655a8deb681a141beb5c8a025baea", size = 773632, upload-time = "2025-09-19T00:35:18.57Z" }, + { url = "https://files.pythonhosted.org/packages/db/99/75c996dc6a2231a8652d7ad0bfbeaf8a8c77612d335580f520f3ec40e30b/regex-2025.9.18-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:431bd2a8726b000eb6f12429c9b438a24062a535d06783a93d2bcbad3698f8a8", size = 844104, upload-time = "2025-09-19T00:35:20.259Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f7/25aba34cc130cb6844047dbfe9716c9b8f9629fee8b8bec331aa9241b97b/regex-2025.9.18-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0cc3521060162d02bd36927e20690129200e5ac9d2c6d32b70368870b122db25", size = 834794, upload-time = "2025-09-19T00:35:22.002Z" }, + { url = "https://files.pythonhosted.org/packages/51/eb/64e671beafa0ae29712268421597596d781704973551312b2425831d4037/regex-2025.9.18-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a021217b01be2d51632ce056d7a837d3fa37c543ede36e39d14063176a26ae29", size = 778535, upload-time = "2025-09-19T00:35:23.298Z" }, + { url = "https://files.pythonhosted.org/packages/26/33/c0ebc0b07bd0bf88f716cca240546b26235a07710ea58e271cfe390ae273/regex-2025.9.18-cp310-cp310-win32.whl", hash = "sha256:4a12a06c268a629cb67cc1d009b7bb0be43e289d00d5111f86a2efd3b1949444", size = 264115, upload-time = "2025-09-19T00:35:25.206Z" }, + { url = "https://files.pythonhosted.org/packages/59/39/aeb11a4ae68faaec2498512cadae09f2d8a91f1f65730fe62b9bffeea150/regex-2025.9.18-cp310-cp310-win_amd64.whl", hash = "sha256:47acd811589301298c49db2c56bde4f9308d6396da92daf99cba781fa74aa450", size = 276143, upload-time = "2025-09-19T00:35:26.785Z" }, + { url = "https://files.pythonhosted.org/packages/29/04/37f2d3fc334a1031fc2767c9d89cec13c2e72207c7e7f6feae8a47f4e149/regex-2025.9.18-cp310-cp310-win_arm64.whl", hash = "sha256:16bd2944e77522275e5ee36f867e19995bcaa533dcb516753a26726ac7285442", size = 268473, upload-time = "2025-09-19T00:35:28.39Z" }, + { url = "https://files.pythonhosted.org/packages/58/61/80eda662fc4eb32bfedc331f42390974c9e89c7eac1b79cd9eea4d7c458c/regex-2025.9.18-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:51076980cd08cd13c88eb7365427ae27f0d94e7cebe9ceb2bb9ffdae8fc4d82a", size = 484832, upload-time = "2025-09-19T00:35:30.011Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d9/33833d9abddf3f07ad48504ddb53fe3b22f353214bbb878a72eee1e3ddbf/regex-2025.9.18-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:828446870bd7dee4e0cbeed767f07961aa07f0ea3129f38b3ccecebc9742e0b8", size = 288994, upload-time = "2025-09-19T00:35:31.733Z" }, + { url = "https://files.pythonhosted.org/packages/2a/b3/526ee96b0d70ea81980cbc20c3496fa582f775a52e001e2743cc33b2fa75/regex-2025.9.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c28821d5637866479ec4cc23b8c990f5bc6dd24e5e4384ba4a11d38a526e1414", size = 286619, upload-time = "2025-09-19T00:35:33.221Z" }, + { url = "https://files.pythonhosted.org/packages/65/4f/c2c096b02a351b33442aed5895cdd8bf87d372498d2100927c5a053d7ba3/regex-2025.9.18-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:726177ade8e481db669e76bf99de0b278783be8acd11cef71165327abd1f170a", size = 792454, upload-time = "2025-09-19T00:35:35.361Z" }, + { url = "https://files.pythonhosted.org/packages/24/15/b562c9d6e47c403c4b5deb744f8b4bf6e40684cf866c7b077960a925bdff/regex-2025.9.18-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f5cca697da89b9f8ea44115ce3130f6c54c22f541943ac8e9900461edc2b8bd4", size = 858723, upload-time = "2025-09-19T00:35:36.949Z" }, + { url = "https://files.pythonhosted.org/packages/f2/01/dba305409849e85b8a1a681eac4c03ed327d8de37895ddf9dc137f59c140/regex-2025.9.18-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dfbde38f38004703c35666a1e1c088b778e35d55348da2b7b278914491698d6a", size = 905899, upload-time = "2025-09-19T00:35:38.723Z" }, + { url = "https://files.pythonhosted.org/packages/fe/d0/c51d1e6a80eab11ef96a4cbad17fc0310cf68994fb01a7283276b7e5bbd6/regex-2025.9.18-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f2f422214a03fab16bfa495cfec72bee4aaa5731843b771860a471282f1bf74f", size = 798981, upload-time = "2025-09-19T00:35:40.416Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5e/72db90970887bbe02296612bd61b0fa31e6d88aa24f6a4853db3e96c575e/regex-2025.9.18-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a295916890f4df0902e4286bc7223ee7f9e925daa6dcdec4192364255b70561a", size = 781900, upload-time = "2025-09-19T00:35:42.077Z" }, + { url = "https://files.pythonhosted.org/packages/50/ff/596be45eea8e9bc31677fde243fa2904d00aad1b32c31bce26c3dbba0b9e/regex-2025.9.18-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5db95ff632dbabc8c38c4e82bf545ab78d902e81160e6e455598014f0abe66b9", size = 852952, upload-time = "2025-09-19T00:35:43.751Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1b/2dfa348fa551e900ed3f5f63f74185b6a08e8a76bc62bc9c106f4f92668b/regex-2025.9.18-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fb967eb441b0f15ae610b7069bdb760b929f267efbf522e814bbbfffdf125ce2", size = 844355, upload-time = "2025-09-19T00:35:45.309Z" }, + { url = "https://files.pythonhosted.org/packages/f4/bf/aefb1def27fe33b8cbbb19c75c13aefccfbef1c6686f8e7f7095705969c7/regex-2025.9.18-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f04d2f20da4053d96c08f7fde6e1419b7ec9dbcee89c96e3d731fca77f411b95", size = 787254, upload-time = "2025-09-19T00:35:46.904Z" }, + { url = "https://files.pythonhosted.org/packages/e3/4e/8ef042e7cf0dbbb401e784e896acfc1b367b95dfbfc9ada94c2ed55a081f/regex-2025.9.18-cp311-cp311-win32.whl", hash = "sha256:895197241fccf18c0cea7550c80e75f185b8bd55b6924fcae269a1a92c614a07", size = 264129, upload-time = "2025-09-19T00:35:48.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/7d/c4fcabf80dcdd6821c0578ad9b451f8640b9110fb3dcb74793dd077069ff/regex-2025.9.18-cp311-cp311-win_amd64.whl", hash = "sha256:7e2b414deae99166e22c005e154a5513ac31493db178d8aec92b3269c9cce8c9", size = 276160, upload-time = "2025-09-19T00:36:00.45Z" }, + { url = "https://files.pythonhosted.org/packages/64/f8/0e13c8ae4d6df9d128afaba138342d532283d53a4c1e7a8c93d6756c8f4a/regex-2025.9.18-cp311-cp311-win_arm64.whl", hash = "sha256:fb137ec7c5c54f34a25ff9b31f6b7b0c2757be80176435bf367111e3f71d72df", size = 268471, upload-time = "2025-09-19T00:36:02.149Z" }, + { url = "https://files.pythonhosted.org/packages/b0/99/05859d87a66ae7098222d65748f11ef7f2dff51bfd7482a4e2256c90d72b/regex-2025.9.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:436e1b31d7efd4dcd52091d076482031c611dde58bf9c46ca6d0a26e33053a7e", size = 486335, upload-time = "2025-09-19T00:36:03.661Z" }, + { url = "https://files.pythonhosted.org/packages/97/7e/d43d4e8b978890932cf7b0957fce58c5b08c66f32698f695b0c2c24a48bf/regex-2025.9.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c190af81e5576b9c5fdc708f781a52ff20f8b96386c6e2e0557a78402b029f4a", size = 289720, upload-time = "2025-09-19T00:36:05.471Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3b/ff80886089eb5dcf7e0d2040d9aaed539e25a94300403814bb24cc775058/regex-2025.9.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e4121f1ce2b2b5eec4b397cc1b277686e577e658d8f5870b7eb2d726bd2300ab", size = 287257, upload-time = "2025-09-19T00:36:07.072Z" }, + { url = "https://files.pythonhosted.org/packages/ee/66/243edf49dd8720cba8d5245dd4d6adcb03a1defab7238598c0c97cf549b8/regex-2025.9.18-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:300e25dbbf8299d87205e821a201057f2ef9aa3deb29caa01cd2cac669e508d5", size = 797463, upload-time = "2025-09-19T00:36:08.399Z" }, + { url = "https://files.pythonhosted.org/packages/df/71/c9d25a1142c70432e68bb03211d4a82299cd1c1fbc41db9409a394374ef5/regex-2025.9.18-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b47fcf9f5316c0bdaf449e879407e1b9937a23c3b369135ca94ebc8d74b1742", size = 862670, upload-time = "2025-09-19T00:36:10.101Z" }, + { url = "https://files.pythonhosted.org/packages/f8/8f/329b1efc3a64375a294e3a92d43372bf1a351aa418e83c21f2f01cf6ec41/regex-2025.9.18-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:57a161bd3acaa4b513220b49949b07e252165e6b6dc910ee7617a37ff4f5b425", size = 910881, upload-time = "2025-09-19T00:36:12.223Z" }, + { url = "https://files.pythonhosted.org/packages/35/9e/a91b50332a9750519320ed30ec378b74c996f6befe282cfa6bb6cea7e9fd/regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f130c3a7845ba42de42f380fff3c8aebe89a810747d91bcf56d40a069f15352", size = 802011, upload-time = "2025-09-19T00:36:13.901Z" }, + { url = "https://files.pythonhosted.org/packages/a4/1d/6be3b8d7856b6e0d7ee7f942f437d0a76e0d5622983abbb6d21e21ab9a17/regex-2025.9.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f96fa342b6f54dcba928dd452e8d8cb9f0d63e711d1721cd765bb9f73bb048d", size = 786668, upload-time = "2025-09-19T00:36:15.391Z" }, + { url = "https://files.pythonhosted.org/packages/cb/ce/4a60e53df58bd157c5156a1736d3636f9910bdcc271d067b32b7fcd0c3a8/regex-2025.9.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f0d676522d68c207828dcd01fb6f214f63f238c283d9f01d85fc664c7c85b56", size = 856578, upload-time = "2025-09-19T00:36:16.845Z" }, + { url = "https://files.pythonhosted.org/packages/86/e8/162c91bfe7217253afccde112868afb239f94703de6580fb235058d506a6/regex-2025.9.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:40532bff8a1a0621e7903ae57fce88feb2e8a9a9116d341701302c9302aef06e", size = 849017, upload-time = "2025-09-19T00:36:18.597Z" }, + { url = "https://files.pythonhosted.org/packages/35/34/42b165bc45289646ea0959a1bc7531733e90b47c56a72067adfe6b3251f6/regex-2025.9.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:039f11b618ce8d71a1c364fdee37da1012f5a3e79b1b2819a9f389cd82fd6282", size = 788150, upload-time = "2025-09-19T00:36:20.464Z" }, + { url = "https://files.pythonhosted.org/packages/79/5d/cdd13b1f3c53afa7191593a7ad2ee24092a5a46417725ffff7f64be8342d/regex-2025.9.18-cp312-cp312-win32.whl", hash = "sha256:e1dd06f981eb226edf87c55d523131ade7285137fbde837c34dc9d1bf309f459", size = 264536, upload-time = "2025-09-19T00:36:21.922Z" }, + { url = "https://files.pythonhosted.org/packages/e0/f5/4a7770c9a522e7d2dc1fa3ffc83ab2ab33b0b22b447e62cffef186805302/regex-2025.9.18-cp312-cp312-win_amd64.whl", hash = "sha256:3d86b5247bf25fa3715e385aa9ff272c307e0636ce0c9595f64568b41f0a9c77", size = 275501, upload-time = "2025-09-19T00:36:23.4Z" }, + { url = "https://files.pythonhosted.org/packages/df/05/9ce3e110e70d225ecbed455b966003a3afda5e58e8aec2964042363a18f4/regex-2025.9.18-cp312-cp312-win_arm64.whl", hash = "sha256:032720248cbeeae6444c269b78cb15664458b7bb9ed02401d3da59fe4d68c3a5", size = 268601, upload-time = "2025-09-19T00:36:25.092Z" }, + { url = "https://files.pythonhosted.org/packages/d2/c7/5c48206a60ce33711cf7dcaeaed10dd737733a3569dc7e1dce324dd48f30/regex-2025.9.18-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2a40f929cd907c7e8ac7566ac76225a77701a6221bca937bdb70d56cb61f57b2", size = 485955, upload-time = "2025-09-19T00:36:26.822Z" }, + { url = "https://files.pythonhosted.org/packages/e9/be/74fc6bb19a3c491ec1ace943e622b5a8539068771e8705e469b2da2306a7/regex-2025.9.18-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c90471671c2cdf914e58b6af62420ea9ecd06d1554d7474d50133ff26ae88feb", size = 289583, upload-time = "2025-09-19T00:36:28.577Z" }, + { url = "https://files.pythonhosted.org/packages/25/c4/9ceaa433cb5dc515765560f22a19578b95b92ff12526e5a259321c4fc1a0/regex-2025.9.18-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a351aff9e07a2dabb5022ead6380cff17a4f10e4feb15f9100ee56c4d6d06af", size = 287000, upload-time = "2025-09-19T00:36:30.161Z" }, + { url = "https://files.pythonhosted.org/packages/7d/e6/68bc9393cb4dc68018456568c048ac035854b042bc7c33cb9b99b0680afa/regex-2025.9.18-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc4b8e9d16e20ddfe16430c23468a8707ccad3365b06d4536142e71823f3ca29", size = 797535, upload-time = "2025-09-19T00:36:31.876Z" }, + { url = "https://files.pythonhosted.org/packages/6a/1c/ebae9032d34b78ecfe9bd4b5e6575b55351dc8513485bb92326613732b8c/regex-2025.9.18-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4b8cdbddf2db1c5e80338ba2daa3cfa3dec73a46fff2a7dda087c8efbf12d62f", size = 862603, upload-time = "2025-09-19T00:36:33.344Z" }, + { url = "https://files.pythonhosted.org/packages/3b/74/12332c54b3882557a4bcd2b99f8be581f5c6a43cf1660a85b460dd8ff468/regex-2025.9.18-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a276937d9d75085b2c91fb48244349c6954f05ee97bba0963ce24a9d915b8b68", size = 910829, upload-time = "2025-09-19T00:36:34.826Z" }, + { url = "https://files.pythonhosted.org/packages/86/70/ba42d5ed606ee275f2465bfc0e2208755b06cdabd0f4c7c4b614d51b57ab/regex-2025.9.18-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92a8e375ccdc1256401c90e9dc02b8642894443d549ff5e25e36d7cf8a80c783", size = 802059, upload-time = "2025-09-19T00:36:36.664Z" }, + { url = "https://files.pythonhosted.org/packages/da/c5/fcb017e56396a7f2f8357412638d7e2963440b131a3ca549be25774b3641/regex-2025.9.18-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0dc6893b1f502d73037cf807a321cdc9be29ef3d6219f7970f842475873712ac", size = 786781, upload-time = "2025-09-19T00:36:38.168Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ee/21c4278b973f630adfb3bcb23d09d83625f3ab1ca6e40ebdffe69901c7a1/regex-2025.9.18-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a61e85bfc63d232ac14b015af1261f826260c8deb19401c0597dbb87a864361e", size = 856578, upload-time = "2025-09-19T00:36:40.129Z" }, + { url = "https://files.pythonhosted.org/packages/87/0b/de51550dc7274324435c8f1539373ac63019b0525ad720132866fff4a16a/regex-2025.9.18-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1ef86a9ebc53f379d921fb9a7e42b92059ad3ee800fcd9e0fe6181090e9f6c23", size = 849119, upload-time = "2025-09-19T00:36:41.651Z" }, + { url = "https://files.pythonhosted.org/packages/60/52/383d3044fc5154d9ffe4321696ee5b2ee4833a28c29b137c22c33f41885b/regex-2025.9.18-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d3bc882119764ba3a119fbf2bd4f1b47bc56c1da5d42df4ed54ae1e8e66fdf8f", size = 788219, upload-time = "2025-09-19T00:36:43.575Z" }, + { url = "https://files.pythonhosted.org/packages/20/bd/2614fc302671b7359972ea212f0e3a92df4414aaeacab054a8ce80a86073/regex-2025.9.18-cp313-cp313-win32.whl", hash = "sha256:3810a65675845c3bdfa58c3c7d88624356dd6ee2fc186628295e0969005f928d", size = 264517, upload-time = "2025-09-19T00:36:45.503Z" }, + { url = "https://files.pythonhosted.org/packages/07/0f/ab5c1581e6563a7bffdc1974fb2d25f05689b88e2d416525271f232b1946/regex-2025.9.18-cp313-cp313-win_amd64.whl", hash = "sha256:16eaf74b3c4180ede88f620f299e474913ab6924d5c4b89b3833bc2345d83b3d", size = 275481, upload-time = "2025-09-19T00:36:46.965Z" }, + { url = "https://files.pythonhosted.org/packages/49/22/ee47672bc7958f8c5667a587c2600a4fba8b6bab6e86bd6d3e2b5f7cac42/regex-2025.9.18-cp313-cp313-win_arm64.whl", hash = "sha256:4dc98ba7dd66bd1261927a9f49bd5ee2bcb3660f7962f1ec02617280fc00f5eb", size = 268598, upload-time = "2025-09-19T00:36:48.314Z" }, + { url = "https://files.pythonhosted.org/packages/e8/83/6887e16a187c6226cb85d8301e47d3b73ecc4505a3a13d8da2096b44fd76/regex-2025.9.18-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:fe5d50572bc885a0a799410a717c42b1a6b50e2f45872e2b40f4f288f9bce8a2", size = 489765, upload-time = "2025-09-19T00:36:49.996Z" }, + { url = "https://files.pythonhosted.org/packages/51/c5/e2f7325301ea2916ff301c8d963ba66b1b2c1b06694191df80a9c4fea5d0/regex-2025.9.18-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b9d9a2d6cda6621551ca8cf7a06f103adf72831153f3c0d982386110870c4d3", size = 291228, upload-time = "2025-09-19T00:36:51.654Z" }, + { url = "https://files.pythonhosted.org/packages/91/60/7d229d2bc6961289e864a3a3cfebf7d0d250e2e65323a8952cbb7e22d824/regex-2025.9.18-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:13202e4c4ac0ef9a317fff817674b293c8f7e8c68d3190377d8d8b749f566e12", size = 289270, upload-time = "2025-09-19T00:36:53.118Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d7/b4f06868ee2958ff6430df89857fbf3d43014bbf35538b6ec96c2704e15d/regex-2025.9.18-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:874ff523b0fecffb090f80ae53dc93538f8db954c8bb5505f05b7787ab3402a0", size = 806326, upload-time = "2025-09-19T00:36:54.631Z" }, + { url = "https://files.pythonhosted.org/packages/d6/e4/bca99034a8f1b9b62ccf337402a8e5b959dd5ba0e5e5b2ead70273df3277/regex-2025.9.18-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d13ab0490128f2bb45d596f754148cd750411afc97e813e4b3a61cf278a23bb6", size = 871556, upload-time = "2025-09-19T00:36:56.208Z" }, + { url = "https://files.pythonhosted.org/packages/6d/df/e06ffaf078a162f6dd6b101a5ea9b44696dca860a48136b3ae4a9caf25e2/regex-2025.9.18-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:05440bc172bc4b4b37fb9667e796597419404dbba62e171e1f826d7d2a9ebcef", size = 913817, upload-time = "2025-09-19T00:36:57.807Z" }, + { url = "https://files.pythonhosted.org/packages/9e/05/25b05480b63292fd8e84800b1648e160ca778127b8d2367a0a258fa2e225/regex-2025.9.18-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5514b8e4031fdfaa3d27e92c75719cbe7f379e28cacd939807289bce76d0e35a", size = 811055, upload-time = "2025-09-19T00:36:59.762Z" }, + { url = "https://files.pythonhosted.org/packages/70/97/7bc7574655eb651ba3a916ed4b1be6798ae97af30104f655d8efd0cab24b/regex-2025.9.18-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:65d3c38c39efce73e0d9dc019697b39903ba25b1ad45ebbd730d2cf32741f40d", size = 794534, upload-time = "2025-09-19T00:37:01.405Z" }, + { url = "https://files.pythonhosted.org/packages/b4/c2/d5da49166a52dda879855ecdba0117f073583db2b39bb47ce9a3378a8e9e/regex-2025.9.18-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ae77e447ebc144d5a26d50055c6ddba1d6ad4a865a560ec7200b8b06bc529368", size = 866684, upload-time = "2025-09-19T00:37:03.441Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2d/0a5c4e6ec417de56b89ff4418ecc72f7e3feca806824c75ad0bbdae0516b/regex-2025.9.18-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e3ef8cf53dc8df49d7e28a356cf824e3623764e9833348b655cfed4524ab8a90", size = 853282, upload-time = "2025-09-19T00:37:04.985Z" }, + { url = "https://files.pythonhosted.org/packages/f4/8e/d656af63e31a86572ec829665d6fa06eae7e144771e0330650a8bb865635/regex-2025.9.18-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9feb29817df349c976da9a0debf775c5c33fc1c8ad7b9f025825da99374770b7", size = 797830, upload-time = "2025-09-19T00:37:06.697Z" }, + { url = "https://files.pythonhosted.org/packages/db/ce/06edc89df8f7b83ffd321b6071be4c54dc7332c0f77860edc40ce57d757b/regex-2025.9.18-cp313-cp313t-win32.whl", hash = "sha256:168be0d2f9b9d13076940b1ed774f98595b4e3c7fc54584bba81b3cc4181742e", size = 267281, upload-time = "2025-09-19T00:37:08.568Z" }, + { url = "https://files.pythonhosted.org/packages/83/9a/2b5d9c8b307a451fd17068719d971d3634ca29864b89ed5c18e499446d4a/regex-2025.9.18-cp313-cp313t-win_amd64.whl", hash = "sha256:d59ecf3bb549e491c8104fea7313f3563c7b048e01287db0a90485734a70a730", size = 278724, upload-time = "2025-09-19T00:37:10.023Z" }, + { url = "https://files.pythonhosted.org/packages/3d/70/177d31e8089a278a764f8ec9a3faac8d14a312d622a47385d4b43905806f/regex-2025.9.18-cp313-cp313t-win_arm64.whl", hash = "sha256:dbef80defe9fb21310948a2595420b36c6d641d9bea4c991175829b2cc4bc06a", size = 269771, upload-time = "2025-09-19T00:37:13.041Z" }, + { url = "https://files.pythonhosted.org/packages/44/b7/3b4663aa3b4af16819f2ab6a78c4111c7e9b066725d8107753c2257448a5/regex-2025.9.18-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c6db75b51acf277997f3adcd0ad89045d856190d13359f15ab5dda21581d9129", size = 486130, upload-time = "2025-09-19T00:37:14.527Z" }, + { url = "https://files.pythonhosted.org/packages/80/5b/4533f5d7ac9c6a02a4725fe8883de2aebc713e67e842c04cf02626afb747/regex-2025.9.18-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8f9698b6f6895d6db810e0bda5364f9ceb9e5b11328700a90cae573574f61eea", size = 289539, upload-time = "2025-09-19T00:37:16.356Z" }, + { url = "https://files.pythonhosted.org/packages/b8/8d/5ab6797c2750985f79e9995fad3254caa4520846580f266ae3b56d1cae58/regex-2025.9.18-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29cd86aa7cb13a37d0f0d7c21d8d949fe402ffa0ea697e635afedd97ab4b69f1", size = 287233, upload-time = "2025-09-19T00:37:18.025Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/95afcb02ba8d3a64e6ffeb801718ce73471ad6440c55d993f65a4a5e7a92/regex-2025.9.18-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c9f285a071ee55cd9583ba24dde006e53e17780bb309baa8e4289cd472bcc47", size = 797876, upload-time = "2025-09-19T00:37:19.609Z" }, + { url = "https://files.pythonhosted.org/packages/c8/fb/720b1f49cec1f3b5a9fea5b34cd22b88b5ebccc8c1b5de9cc6f65eed165a/regex-2025.9.18-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5adf266f730431e3be9021d3e5b8d5ee65e563fec2883ea8093944d21863b379", size = 863385, upload-time = "2025-09-19T00:37:21.65Z" }, + { url = "https://files.pythonhosted.org/packages/a9/ca/e0d07ecf701e1616f015a720dc13b84c582024cbfbb3fc5394ae204adbd7/regex-2025.9.18-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1137cabc0f38807de79e28d3f6e3e3f2cc8cfb26bead754d02e6d1de5f679203", size = 910220, upload-time = "2025-09-19T00:37:23.723Z" }, + { url = "https://files.pythonhosted.org/packages/b6/45/bba86413b910b708eca705a5af62163d5d396d5f647ed9485580c7025209/regex-2025.9.18-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cc9e5525cada99699ca9223cce2d52e88c52a3d2a0e842bd53de5497c604164", size = 801827, upload-time = "2025-09-19T00:37:25.684Z" }, + { url = "https://files.pythonhosted.org/packages/b8/a6/740fbd9fcac31a1305a8eed30b44bf0f7f1e042342be0a4722c0365ecfca/regex-2025.9.18-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bbb9246568f72dce29bcd433517c2be22c7791784b223a810225af3b50d1aafb", size = 786843, upload-time = "2025-09-19T00:37:27.62Z" }, + { url = "https://files.pythonhosted.org/packages/80/a7/0579e8560682645906da640c9055506465d809cb0f5415d9976f417209a6/regex-2025.9.18-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6a52219a93dd3d92c675383efff6ae18c982e2d7651c792b1e6d121055808743", size = 857430, upload-time = "2025-09-19T00:37:29.362Z" }, + { url = "https://files.pythonhosted.org/packages/8d/9b/4dc96b6c17b38900cc9fee254fc9271d0dde044e82c78c0811b58754fde5/regex-2025.9.18-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ae9b3840c5bd456780e3ddf2f737ab55a79b790f6409182012718a35c6d43282", size = 848612, upload-time = "2025-09-19T00:37:31.42Z" }, + { url = "https://files.pythonhosted.org/packages/b3/6a/6f659f99bebb1775e5ac81a3fb837b85897c1a4ef5acffd0ff8ffe7e67fb/regex-2025.9.18-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d488c236ac497c46a5ac2005a952c1a0e22a07be9f10c3e735bc7d1209a34773", size = 787967, upload-time = "2025-09-19T00:37:34.019Z" }, + { url = "https://files.pythonhosted.org/packages/61/35/9e35665f097c07cf384a6b90a1ac11b0b1693084a0b7a675b06f760496c6/regex-2025.9.18-cp314-cp314-win32.whl", hash = "sha256:0c3506682ea19beefe627a38872d8da65cc01ffa25ed3f2e422dffa1474f0788", size = 269847, upload-time = "2025-09-19T00:37:35.759Z" }, + { url = "https://files.pythonhosted.org/packages/af/64/27594dbe0f1590b82de2821ebfe9a359b44dcb9b65524876cd12fabc447b/regex-2025.9.18-cp314-cp314-win_amd64.whl", hash = "sha256:57929d0f92bebb2d1a83af372cd0ffba2263f13f376e19b1e4fa32aec4efddc3", size = 278755, upload-time = "2025-09-19T00:37:37.367Z" }, + { url = "https://files.pythonhosted.org/packages/30/a3/0cd8d0d342886bd7d7f252d701b20ae1a3c72dc7f34ef4b2d17790280a09/regex-2025.9.18-cp314-cp314-win_arm64.whl", hash = "sha256:6a4b44df31d34fa51aa5c995d3aa3c999cec4d69b9bd414a8be51984d859f06d", size = 271873, upload-time = "2025-09-19T00:37:39.125Z" }, + { url = "https://files.pythonhosted.org/packages/99/cb/8a1ab05ecf404e18b54348e293d9b7a60ec2bd7aa59e637020c5eea852e8/regex-2025.9.18-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b176326bcd544b5e9b17d6943f807697c0cb7351f6cfb45bf5637c95ff7e6306", size = 489773, upload-time = "2025-09-19T00:37:40.968Z" }, + { url = "https://files.pythonhosted.org/packages/93/3b/6543c9b7f7e734d2404fa2863d0d710c907bef99d4598760ed4563d634c3/regex-2025.9.18-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:0ffd9e230b826b15b369391bec167baed57c7ce39efc35835448618860995946", size = 291221, upload-time = "2025-09-19T00:37:42.901Z" }, + { url = "https://files.pythonhosted.org/packages/cd/91/e9fdee6ad6bf708d98c5d17fded423dcb0661795a49cba1b4ffb8358377a/regex-2025.9.18-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ec46332c41add73f2b57e2f5b642f991f6b15e50e9f86285e08ffe3a512ac39f", size = 289268, upload-time = "2025-09-19T00:37:44.823Z" }, + { url = "https://files.pythonhosted.org/packages/94/a6/bc3e8a918abe4741dadeaeb6c508e3a4ea847ff36030d820d89858f96a6c/regex-2025.9.18-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b80fa342ed1ea095168a3f116637bd1030d39c9ff38dc04e54ef7c521e01fc95", size = 806659, upload-time = "2025-09-19T00:37:46.684Z" }, + { url = "https://files.pythonhosted.org/packages/2b/71/ea62dbeb55d9e6905c7b5a49f75615ea1373afcad95830047e4e310db979/regex-2025.9.18-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4d97071c0ba40f0cf2a93ed76e660654c399a0a04ab7d85472239460f3da84b", size = 871701, upload-time = "2025-09-19T00:37:48.882Z" }, + { url = "https://files.pythonhosted.org/packages/6a/90/fbe9dedb7dad24a3a4399c0bae64bfa932ec8922a0a9acf7bc88db30b161/regex-2025.9.18-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0ac936537ad87cef9e0e66c5144484206c1354224ee811ab1519a32373e411f3", size = 913742, upload-time = "2025-09-19T00:37:51.015Z" }, + { url = "https://files.pythonhosted.org/packages/f0/1c/47e4a8c0e73d41eb9eb9fdeba3b1b810110a5139a2526e82fd29c2d9f867/regex-2025.9.18-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dec57f96d4def58c422d212d414efe28218d58537b5445cf0c33afb1b4768571", size = 811117, upload-time = "2025-09-19T00:37:52.686Z" }, + { url = "https://files.pythonhosted.org/packages/2a/da/435f29fddfd015111523671e36d30af3342e8136a889159b05c1d9110480/regex-2025.9.18-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:48317233294648bf7cd068857f248e3a57222259a5304d32c7552e2284a1b2ad", size = 794647, upload-time = "2025-09-19T00:37:54.626Z" }, + { url = "https://files.pythonhosted.org/packages/23/66/df5e6dcca25c8bc57ce404eebc7342310a0d218db739d7882c9a2b5974a3/regex-2025.9.18-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:274687e62ea3cf54846a9b25fc48a04459de50af30a7bd0b61a9e38015983494", size = 866747, upload-time = "2025-09-19T00:37:56.367Z" }, + { url = "https://files.pythonhosted.org/packages/82/42/94392b39b531f2e469b2daa40acf454863733b674481fda17462a5ffadac/regex-2025.9.18-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a78722c86a3e7e6aadf9579e3b0ad78d955f2d1f1a8ca4f67d7ca258e8719d4b", size = 853434, upload-time = "2025-09-19T00:37:58.39Z" }, + { url = "https://files.pythonhosted.org/packages/a8/f8/dcc64c7f7bbe58842a8f89622b50c58c3598fbbf4aad0a488d6df2c699f1/regex-2025.9.18-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:06104cd203cdef3ade989a1c45b6215bf42f8b9dd705ecc220c173233f7cba41", size = 798024, upload-time = "2025-09-19T00:38:00.397Z" }, + { url = "https://files.pythonhosted.org/packages/20/8d/edf1c5d5aa98f99a692313db813ec487732946784f8f93145e0153d910e5/regex-2025.9.18-cp314-cp314t-win32.whl", hash = "sha256:2e1eddc06eeaffd249c0adb6fafc19e2118e6308c60df9db27919e96b5656096", size = 273029, upload-time = "2025-09-19T00:38:02.383Z" }, + { url = "https://files.pythonhosted.org/packages/a7/24/02d4e4f88466f17b145f7ea2b2c11af3a942db6222429c2c146accf16054/regex-2025.9.18-cp314-cp314t-win_amd64.whl", hash = "sha256:8620d247fb8c0683ade51217b459cb4a1081c0405a3072235ba43a40d355c09a", size = 282680, upload-time = "2025-09-19T00:38:04.102Z" }, + { url = "https://files.pythonhosted.org/packages/1f/a3/c64894858aaaa454caa7cc47e2f225b04d3ed08ad649eacf58d45817fad2/regex-2025.9.18-cp314-cp314t-win_arm64.whl", hash = "sha256:b7531a8ef61de2c647cdf68b3229b071e46ec326b3138b2180acb4275f470b01", size = 273034, upload-time = "2025-09-19T00:38:05.807Z" }, +] + [[package]] name = "requests" version = "2.32.5" From b66324fb16d527988a705f47fde3c23aa4648185 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 16:40:37 +0200 Subject: [PATCH 032/115] WIP kit system --- .../{pytest.mdc => pytest_standards.mdc} | 2 +- ...ing_standards.mdc => python_standards.mdc} | 4 +- .cursor/rules/run_pipelines.mdc | 230 ++++ .../rules/{pipelex.mdc => write_pipelex.mdc} | 568 +++------ PLAN.md | 292 +++++ pipelex/cli/_cli.py | 2 + pipelex/cli/commands/init_cmd.py | 3 +- pipelex/cli/commands/kit_cmd.py | 57 + pipelex/cogt/models/model_manager.py | 2 +- pipelex/kit/agents/run_pipelines.md | 1084 +---------------- pipelex/kit/cursor_export.py | 66 + pipelex/kit/index.toml | 52 + pipelex/kit/index_loader.py | 20 + pipelex/kit/index_models.py | 45 + pipelex/kit/markers.py | 54 + pipelex/kit/paths.py | 31 + pipelex/kit/targets_update.py | 190 +++ 17 files changed, 1234 insertions(+), 1468 deletions(-) rename .cursor/rules/{pytest.mdc => pytest_standards.mdc} (99%) rename .cursor/rules/{coding_standards.mdc => python_standards.mdc} (98%) create mode 100644 .cursor/rules/run_pipelines.mdc rename .cursor/rules/{pipelex.mdc => write_pipelex.mdc} (63%) create mode 100644 PLAN.md create mode 100644 pipelex/cli/commands/kit_cmd.py create mode 100644 pipelex/kit/cursor_export.py create mode 100644 pipelex/kit/index_loader.py create mode 100644 pipelex/kit/index_models.py create mode 100644 pipelex/kit/markers.py create mode 100644 pipelex/kit/paths.py create mode 100644 pipelex/kit/targets_update.py diff --git a/.cursor/rules/pytest.mdc b/.cursor/rules/pytest_standards.mdc similarity index 99% rename from .cursor/rules/pytest.mdc rename to .cursor/rules/pytest_standards.mdc index 89a009295..30ceaca93 100644 --- a/.cursor/rules/pytest.mdc +++ b/.cursor/rules/pytest_standards.mdc @@ -1,5 +1,5 @@ --- -description: +description: Guide for writing unit tests globs: tests/**/*.py alwaysApply: false --- diff --git a/.cursor/rules/coding_standards.mdc b/.cursor/rules/python_standards.mdc similarity index 98% rename from .cursor/rules/coding_standards.mdc rename to .cursor/rules/python_standards.mdc index 73c0f02db..f58979554 100644 --- a/.cursor/rules/coding_standards.mdc +++ b/.cursor/rules/python_standards.mdc @@ -1,6 +1,6 @@ --- -description: -globs: +description: Guide for writing Python code +globs: *.py alwaysApply: true --- # Coding Standards & Best Practices diff --git a/.cursor/rules/run_pipelines.mdc b/.cursor/rules/run_pipelines.mdc new file mode 100644 index 000000000..31d474aea --- /dev/null +++ b/.cursor/rules/run_pipelines.mdc @@ -0,0 +1,230 @@ +--- +alwaysApply: false +description: Guidelines for running Pipelex pipelines +globs: +- examples/**/*.py +--- +# Guide to execute a pipeline and write example code + +## Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +# start Pipelex +Pipelex.make() +# run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +## Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + +from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +# start Pipelex +Pipelex.make() + +# run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +## Setting up the input memory + +### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str +ImplicitMemory = Dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +# Here we have a single input and it's a Text. +# If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +# Here we have a single input and it's a PDF. +# Because PDFContent is a native concept, we can use it directly as a value, +# the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +# Here we have a single input and it's an Image. +# Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +# Here we have a single input, it's an image but +# its actually a more specific concept gantt.GanttImage which refines Image, +# so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +# Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +## Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + diff --git a/.cursor/rules/pipelex.mdc b/.cursor/rules/write_pipelex.mdc similarity index 63% rename from .cursor/rules/pipelex.mdc rename to .cursor/rules/write_pipelex.mdc index ca99c0a6d..a802af442 100644 --- a/.cursor/rules/pipelex.mdc +++ b/.cursor/rules/write_pipelex.mdc @@ -1,14 +1,16 @@ --- alwaysApply: false +description: Guidelines for writing Pipelex pipelines +globs: +- '**/*.plx' +- '**/pipelines/**/*.py' --- -# Pipeline Guide +# Guide to write or edit pipelines using the Pipelex language in .plx files - Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) -# Pipeline Structure Guide - ## Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) - Files must be `.py` for structures @@ -84,9 +86,9 @@ inputs = { - `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: -# Structured Models Rules +## Structuring Models -## Model Location and Registration +### Model Location and Registration - Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` - Models must inherit from `StructuredContent` or appropriate content type @@ -115,7 +117,6 @@ If you simply need to refine another native concept, construct it like this: [concept.Landscape] refines = "Image" ``` - Only create a Python structure class when you need to add specific fields: ```python @@ -138,21 +139,21 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent # Date fields should remove timezone date_field: Optional[datetime] = None ``` -## Usage +### Usage Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. -## Best Practices for structures +### Best Practices for structures - Respect Pydantic v2 standards - Use type hints for all fields - Use `Field` declaration and write the description -## Pipe Controllers and Pipe Operator +## Pipe Controllers and Pipe Operators Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: @@ -160,7 +161,6 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ - `PipeSequence` - For creating a sequence of multiple steps - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - `PipeParallel` - For parallelizing pipes - - `PipeBatch` - For running pipes in Batch over a ListContent 2. **Operators** - For specific tasks: - `PipeLLM` - Generate Text and Objects (include Vision LLM) @@ -169,12 +169,11 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ - `PipeImgGen` - Generate Images - `PipeFunc` - For running classic python scripts -# PipeSequence Guide +## PipeSequence controller -## Purpose -PipeSequence executes multiple pipes in a defined order, where each step can use results from previous steps. +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -## Basic Structure +### Basic Structure ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -188,13 +187,13 @@ steps = [ ] ``` -## Key Components +### Key Components 1. **Steps Array**: List of pipes to execute in sequence - `pipe`: Name of the pipe to execute - `result`: Name to assign to the pipe's output that will be in the working memory -## Using PipeBatch in Steps +### Using PipeBatch in Steps You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: @@ -215,13 +214,11 @@ steps = [ The result of a batched step will be a `ListContent` containing the outputs from processing each item. -# PipeCondition Controller +## PipeCondition controller The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. -## Usage in PLX Configuration - -### Basic Usage with Direct Expression +### Basic usage ```plx [pipe.conditional_operation] @@ -230,8 +227,9 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" @@ -244,120 +242,173 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" ``` -## Key Parameters +### Key Parameters - `expression`: Direct boolean or string expression (mutually exclusive with expression_template) - `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `pipe_map`: Dictionary mapping expression results to pipe codes : -1 - The key on the left (`small`, `medium`) is the result of `expression` or `expression_template`. -2 - The value on the right (`process_small`, `process_medium`, ..) is the name of the pipce to trigger - -# PipeBatch Controller - -The PipeBatch controller allows you to apply a pipe operation to each element in a list of inputs in parallele. It is created via a PipeSequence. - -## Usage in PLX Configuration +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found +Example with fail as default: ```plx -[pipe.sequence_with_batch] -type = "PipeSequence" -description = "A Sequence of pipes" -inputs = { input_data = "ConceptName" } -output = "OutputConceptName" -steps = [ - { pipe = "pipe_to_apply", batch_over = "input_list", batch_as = "current_item", result = "batch_results" } -] -``` - -## Key Parameters - -- `pipe`: The pipe operation to apply to each element in the batch -- `batch_over`: The name of the list in the context to iterate over -- `batch_as`: The name to use for the current element in the pipe's context -- `result`: Where to store the results of the batch operation +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" -# PipeLLM Guide +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` -## Purpose +## PipeLLM operator PipeLLM is used to: 1. Generate text or objects with LLMs 2. Process images with Vision LLMs -## Basic Usage +### Basic Usage -### Simple Text Generation +Simple Text Generation: ```plx [pipe.write_story] type = "PipeLLM" description = "Write a short story" output = "Text" -prompt_template = """ +prompt = """ Write a short story about a programmer. """ ``` -### Structured Data Extraction +Structured Data Extraction: ```plx [pipe.extract_info] type = "PipeLLM" description = "Extract information" inputs = { text = "Text" } output = "PersonInfo" -prompt_template = """ +prompt = """ Extract person information from this text: @text """ ``` -### System Prompts -Add system-level instructions: +Supports system instructions: ```plx [pipe.expert_analysis] type = "PipeLLM" description = "Expert analysis" output = "Analysis" system_prompt = "You are a data analysis expert" -prompt_template = "Analyze this data" +prompt = "Analyze this data" ``` ### Multiple Outputs -Generate multiple results: + +Generate multiple outputs (fixed number): ```plx [pipe.generate_ideas] type = "PipeLLM" description = "Generate ideas" output = "Idea" nb_output = 3 # Generate exactly 3 ideas -# OR +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" multiple_output = true # Let the LLM decide how many to generate ``` -### Vision Tasks -Process images with VLMs: +### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): ```plx [pipe.analyze_image] type = "PipeLLM" description = "Analyze image" -inputs = { image = "Image" } # `image` is the name of the stuff that contains the Image. If its in a stuff, you can add something like `{ "page.image": "Image" } +inputs = { image = "Image" } output = "ImageAnalysis" -prompt_template = "Describe what you see in this image" +prompt = """ +Describe what you see in this image: + +$image +""" ``` -# PipeOCR Guide +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` -## Purpose +### Writing prompts for PipeLLM -The PipeExtract operator is used to extract text and images from an image or a PDF +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** -## Basic Usage +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +## PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF ### Simple Text Extraction ```plx @@ -368,7 +419,17 @@ inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is output = "Page" ``` -Only one input is allowed and it must either be an `Image` or a `PDF`. +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. The output concept `Page` is a native concept, with the structure `PageContent`: It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` @@ -385,42 +446,54 @@ class PageContent(StructuredContent): # CONCEPT IS "Page" - `text_and_images` are the text, and the related images found in the input image or PDF. - `page_view` is the screenshot of the whole pdf page/image. -# PipeCompose Guide - -## Purpose +## PipeCompose operator The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. -## Basic Usage +### Basic Usage -### Simple Template Composition +Simple Template Composition: ```plx [pipe.compose_report] type = "PipeCompose" description = "Compose a report using template" inputs = { data = "ReportData" } output = "Text" -jinja2 = """ +template = """ # Report Summary Based on the analysis: -@data +$data Generated on: {{ current_date }} """ ``` -### Using Named Templates +Using Named Templates: ```plx [pipe.use_template] type = "PipeCompose" description = "Use a predefined template" inputs = { content = "Text" } output = "Text" -jinja2_name = "standard_report_template" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } ``` -### CRM Email Template +CRM Email Template: ```plx [pipe.compose_follow_up_email] type = "PipeCompose" @@ -429,7 +502,7 @@ inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } output = "Text" template_category = "html" templating_style = { tag_style = "square_brackets", text_format = "html" } -jinja2 = """ +template = """ Subject: Following up on our $deal.product_name discussion Hi $customer.first_name, @@ -459,29 +532,32 @@ $sales_rep.phone | $sales_rep.email """ ``` -## Key Parameters +### Key Parameters -- `jinja2`: Inline Jinja2 template (mutually exclusive with jinja2_name) -- `jinja2_name`: Name of a predefined template (mutually exclusive with jinja2) +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) - `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) - `templating_style`: Styling options for template rendering - `extra_context`: Additional context variables for template -## Template Variables +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +### Template Variables Use the same variable insertion rules as PipeLLM: - `@variable` for block insertion (multi-line content) - `$variable` for inline insertion (short text) -# PipeImgGen Guide - -## Purpose +## PipeImgGen operator The PipeImgGen operator is used to generate images using AI image generation models. -## Basic Usage +### Basic Usage -### Simple Image Generation +Simple Image Generation: ```plx [pipe.generate_image] type = "PipeImgGen" @@ -490,19 +566,19 @@ inputs = { prompt = "ImgGenPrompt" } output = "Image" ``` -### Using Image Generation Settings +Using Image Generation Settings: ```plx [pipe.generate_photo] type = "PipeImgGen" description = "Generate a high-quality photo" inputs = { prompt = "ImgGenPrompt" } output = "Photo" -model = { model = "flux-pro/v1.1-ultra", quality = "hd" } +model = { model = "fast-img-gen" } aspect_ratio = "16:9" -nb_steps = 8 +quality = "hd" ``` -### Multiple Image Generation +Multiple Image Generation: ```plx [pipe.generate_variations] type = "PipeImgGen" @@ -513,7 +589,7 @@ nb_output = 3 seed = "auto" ``` -### Advanced Configuration +Advanced Configuration: ```plx [pipe.generate_custom] type = "PipeImgGen" @@ -529,28 +605,25 @@ is_raw = false safety_tolerance = 3 ``` -## Key Parameters +### Key Parameters -### Image Generation Settings -- `img_gen`: ImgGenChoice (preset name or inline settings) -- `img_gen_handle`: Direct model handle (legacy) +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) - `quality`: Image quality ("standard", "hd") -- `nb_steps`: Number of generation steps -- `guidance_scale`: How closely to follow the prompt -### Output Configuration +**Output Configuration:** - `nb_output`: Number of images to generate - `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) - `output_format`: File format ("png", "jpeg", "webp") - `background`: Background type ("default", "transparent") -### Generation Control +**Generation Control:** - `seed`: Random seed (integer or "auto") - `is_raw`: Whether to apply post-processing - `is_moderated`: Enable content moderation - `safety_tolerance`: Content safety level (1-6) -## Input Requirements +### Input Requirements PipeImgGen requires exactly one input that must be either: - An `ImgGenPrompt` concept @@ -558,15 +631,13 @@ PipeImgGen requires exactly one input that must be either: The input can be named anything but must contain the prompt text for image generation. -# PipeFunc Guide - -## Purpose +## PipeFunc operator The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. -## Basic Usage +### Basic Usage -### Simple Function Call +Simple Function Call: ```plx [pipe.process_data] type = "PipeFunc" @@ -576,7 +647,7 @@ output = "ProcessedData" function_name = "process_data_function" ``` -### File Processing Example +File Processing Example: ```plx [pipe.read_file] type = "PipeFunc" @@ -586,11 +657,11 @@ output = "FileContent" function_name = "read_file_content" ``` -## Key Parameters +### Key Parameters - `function_name`: Name of the Python function to call (must be registered in func_registry) -## Function Requirements +### Function Requirements The Python function must: @@ -607,7 +678,7 @@ The Python function must: - `list[StuffContent]`: Multiple content objects (becomes ListContent) - `str`: Simple string (becomes TextContent) -## Function Registration +### Function Registration Functions must be registered in the function registry before use: @@ -626,7 +697,7 @@ async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: return MyResultContent(data=result) ``` -## Working Memory Access +### Working Memory Access Inside the function, access pipeline inputs through working memory: @@ -643,269 +714,6 @@ async def process_function(working_memory: WorkingMemory) -> TextContent: return TextContent(text=processed_text) ``` -This rule explains how to write prompt templates in PipeLLM definitions. - -## Insert stuff inside a tagged block - -If the inserted text is supposedly long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt_template = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In this example, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt template. - -**DO NOT write things like "Here is the expense: @expense".** -**DO write simply "@expense" alone in an isolated line.** - -## Insert stuff inline - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt_template = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -Here, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -**DO NOT write "$topic" alone in an isolated line.** -**DO write things like "Write an essay about $topic" included in an actual sentence.** - -# Guide to write an example to execute a pipeline - -## Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -# start Pipelex -Pipelex.make() -# run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -## Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -# start Pipelex -Pipelex.make() - -# run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -## Setting up the input memory - -### Explanation of input memory - -The input memory is a dictionary of key-value pairs, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -# Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -## Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - --- ## Rules to choose LLM models used in PipeLLMs. @@ -937,7 +745,7 @@ The system first looks for direct model names, then checks aliases if no direct ### Using an LLM Handle in a PipeLLM -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: +Here is an example of using a model to specify which LLM to use in a PipeLLM: ```plx [pipe.hello_world] @@ -945,7 +753,7 @@ type = "PipeLLM" description = "Write text about Hello World." output = "Text" model = { model = "gpt-5", temperature = 0.9 } -prompt_template = """ +prompt = """ Write a haiku about Hello World. """ ``` @@ -971,7 +779,7 @@ description = "Extract invoice information from an invoice text transcript" inputs = { invoice_text = "InvoiceText" } output = "Invoice" model = "llm_to_extract_invoice" -prompt_template = """ +prompt = """ Extract invoice information from this invoice: The category of this invoice is: $invoice_details.category. diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 000000000..900544b65 --- /dev/null +++ b/PLAN.md @@ -0,0 +1,292 @@ +# Goal + +Add a `kit` command group that: + +1. Exports each `pipelex.kit/agents/*.md` to **Cursor** as `.cursor/rules/*.mdc` (adds YAML front-matter from `index.toml`). +2. Builds a **merged single-file rules block** from ordered fragments (with `demote=1`) and inserts/updates it in targets (`AGENTS.md`, `CLAUDE.md`, etc.) using **marker spans**. + All parsing/merging via **markdown-parser-py**; config via **TOML → dict** (`toml_utils`) → **Pydantic v2** models. + +--- + +# Package layout (as agreed) + +``` +pipelex/kit/ + __init__.py + index.toml + agents/ + pytest_standards.md + python_standards.md + run_pipelines.md + write_pipelex.md + configs/ + ... (as provided) + migrations/ + ... (optional) +``` + +--- + +# Pydantic v2 models (dict → object via `model_validate`) + +```python +# pipelex/kit/index_models.py +from pydantic import BaseModel, Field +from typing import Optional, List, Dict, Any + +class AgentsMerge(BaseModel): + order: List[str] + demote: int = 1 + +class CursorFileOverride(BaseModel): + front_matter: Dict[str, Any] = Field(default_factory=dict) + +class CursorSpec(BaseModel): + front_matter: Dict[str, Any] = Field(default_factory=dict) + files: Dict[str, CursorFileOverride] = Field(default_factory=dict) + +class Target(BaseModel): + id: str + path: str + strategy: str # "merge" (for now) + marker_begin: str + marker_end: str + parent: Optional[str] = None + +class KitIndex(BaseModel): + meta: Dict[str, Any] = Field(default_factory=dict) + agents: AgentsMerge + cursor: CursorSpec + targets: List[Target] +``` + +**Load `index.toml` using your utils:** + +```python +# pipelex/kit/index_loader.py +from pipelex.tools.misc.toml_utils import load_toml_from_path, TomlError +from importlib.resources import files +from .index_models import KitIndex + +def load_index() -> KitIndex: + path = files("pipelex.kit") / "index.toml" + data = load_toml_from_path(str(path)) + return KitIndex.model_validate(data) +``` + +--- + +# CLI integration (Typer) + +Add a **`kit`** sub-app under your existing CLI: + +```python +# pipelex/cli/commands/kit_cmd.py +import typer +from pathlib import Path +from typing_extensions import Annotated +from pipelex.kit.index_loader import load_index +from pipelex.kit.paths import get_agents_dir # small helper shown below +from pipelex.kit.cursor_export import export_cursor_rules +from pipelex.kit.targets_update import build_merged_rules, update_targets + +kit_app = typer.Typer(help="Manage kit assets: export Cursor rules and merge agent docs") + +@kit_app.command("sync") +def sync( + repo_root: Annotated[Path, typer.Option("--repo-root", dir_okay=True, writable=True)] = Path("."), + cursor: Annotated[bool, typer.Option("--cursor/--no-cursor")] = True, + single_files: Annotated[bool, typer.Option("--single-files/--no-single-files")] = True, + dry_run: Annotated[bool, typer.Option("--dry-run")] = False, + diff: Annotated[bool, typer.Option("--diff")] = False, + backup: Annotated[str | None, typer.Option("--backup")] = None, + force: Annotated[bool, typer.Option("--force")] = False, +): + idx = load_index() + agents_dir = get_agents_dir() + + if cursor: + export_cursor_rules(agents_dir, repo_root / ".cursor" / "rules", idx, dry_run=dry_run) + + if single_files: + merged_md = build_merged_rules(agents_dir, idx) # ordered + demoted + update_targets(repo_root, merged_md, idx.targets, dry_run=dry_run, diff=diff, backup=backup, force=force) +``` + +Wire it into your main CLI: + +```python +# in your main cli module +from pipelex.cli.commands.kit_cmd import kit_app +app.add_typer(kit_app, name="kit", help="Manage kit assets") +``` + +--- + +# Core helpers + +## Paths + +```python +# pipelex/kit/paths.py +from importlib.resources import files +from typing import Any +from importlib.abc import Traversable + +def get_kit_root() -> Traversable: + return files("pipelex.kit") + +def get_agents_dir() -> Traversable: + return get_kit_root() / "agents" +``` + +## Cursor export (.md → .mdc with YAML front matter) + +```python +# pipelex/kit/cursor_export.py +from typing import Iterable +from pathlib import Path +import textwrap +import yaml # pyyaml (MIT) +from importlib.abc import Traversable +from .index_models import KitIndex +from pipelex.tools.misc.toml_utils import TomlError # (for symmetry if needed) + +def _iter_agent_files(agents_dir: Traversable) -> Iterable[tuple[str, str]]: + for child in agents_dir.iterdir(): + if child.name.endswith(".md") and child.is_file(): + yield child.name, child.read_text(encoding="utf-8") + +def _front_matter_for(name: str, idx: KitIndex) -> dict: + base = dict(idx.cursor.front_matter) + key = name.removesuffix(".md") + if key in idx.cursor.files: + base |= idx.cursor.files[key].front_matter + return base + +def export_cursor_rules(agents_dir: Traversable, out_dir: Path, idx: KitIndex, dry_run: bool=False) -> None: + out_dir.mkdir(parents=True, exist_ok=True) + for fname, body in _iter_agent_files(agents_dir): + fm = _front_matter_for(fname, idx) + yaml_block = "---\n" + yaml.safe_dump(fm, sort_keys=False).rstrip() + "\n---\n" + mdc = yaml_block + body + out_path = out_dir / (fname.removesuffix(".md") + ".mdc") + if dry_run: + typer.echo(f"[DRY] write {out_path}") + else: + out_path.write_text(mdc, encoding="utf-8") +``` + +## Merging for single-file targets (markdown-parser-py) + +```python +# pipelex/kit/targets_update.py +from dataclasses import dataclass +from pathlib import Path +from importlib.abc import Traversable +from typing import List +import difflib +import typer + +from markdown_parser_py import Doc # replace with actual import names +from markdown_parser_py import parse as md_parse, render as md_render # adjust to real API + +from .index_models import KitIndex, Target +from .markers import find_span, wrap, replace_span +from pipelex.tools.misc.toml_utils import load_toml_from_path # (not strictly needed here) + +def _read_agent_file(agents_dir: Traversable, name: str) -> str: + return (agents_dir / name).read_text(encoding="utf-8") + +def _demote(doc: Doc, n: int) -> Doc: + # Walk headings and +n to their level (implementation per markdown-parser-py API) + # pseudo: + # for h in doc.headings(): h.level += n + return doc + +def build_merged_rules(agents_dir: Traversable, idx: KitIndex) -> str: + parts: List[str] = [] + for name in idx.agents.order: + md = _read_agent_file(agents_dir, name) + d = md_parse(md) + d = _demote(d, idx.agents.demote) + parts.append(md_render(d).rstrip()) + return ("\n\n".join(parts)).strip() + "\n" + +def _insert_block_with_ast(target_md: str, block_md: str, parent: str | None) -> str: + # Parse both; locate parent heading (if any) or choose heuristic (after first H1 or end) + # Attach block as a new section. Return rendered string. + tdoc = md_parse(target_md or "") + bdoc = md_parse(block_md) + # ... attach via library’s attach API (level handling already demoted) + return md_render(tdoc) + +def _diff(before: str, after: str, path: str) -> str: + return "".join(difflib.unified_diff(before.splitlines(True), after.splitlines(True), fromfile=path, tofile=path)) + +def update_targets(repo_root: Path, merged_rules: str, targets: List[Target], + dry_run: bool, diff: bool, backup: str | None, force: bool) -> None: + for t in targets: + p = repo_root / t.path + before = p.read_text(encoding="utf-8") if p.exists() else "" + span = find_span(before, t.marker_begin, t.marker_end) + + if span: + after_block = wrap(t.marker_begin, t.marker_end, merged_rules) + after = replace_span(before, span, after_block) + else: + inserted = _insert_block_with_ast(before, merged_rules, t.parent) + after = inserted if inserted.endswith("\n") else inserted + "\n" + # wrap only the inserted region — simplest approach is to wrap merged_rules before insert + # or insert markers during AST attach (implementation detail) + + if dry_run: + typer.echo(f"[DRY] update {p}") + if diff: + typer.echo(_diff(before, after, str(p))) + else: + if backup and p.exists(): + (p.with_suffix(p.suffix + backup)).write_text(before, encoding="utf-8") + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(after, encoding="utf-8") +``` + +## Markers + +```python +# pipelex/kit/markers.py +def find_span(text: str, begin: str, end: str): + s = text.find(begin) + if s == -1: return None + e = text.find(end, s) + if e == -1: return None + e += len(end) + return (s, e) + +def wrap(begin: str, end: str, md: str) -> str: + return f"{begin}\n{md.rstrip()}\n{end}" + +def replace_span(text: str, span: tuple[int,int], replacement: str) -> str: + s, e = span + return text[:s] + replacement + text[e:] +``` + +--- + +# Behavior rules (quick) + +* **Cursor**: overwrite `.cursor/rules/*.mdc` each run (we own them). +* **Single-file**: only mutate content within our markers; if absent, **insert via AST** and add markers. +* **Demote before concat** (per `index.toml`). +* Preserve front-matter/encoding/line endings. +* `--dry-run` prints plan; `--diff` shows unified diff; `--backup` writes `*.bak`. + +--- + +# Exit codes + +* `0` success / no changes +* `1` failures (I/O, parse, invalid index, write blocked) +* `2` ambiguous parent resolution (if you choose to enforce) + +This plugs directly into your existing CLI, uses your TOML loader + Pydantic v2, and scopes the implementation clearly for another SWE to take over. diff --git a/pipelex/cli/_cli.py b/pipelex/cli/_cli.py index 85b041cf0..6382ea9e4 100644 --- a/pipelex/cli/_cli.py +++ b/pipelex/cli/_cli.py @@ -5,6 +5,7 @@ from pipelex.cli.commands import init_app, show_app, validate_app from pipelex.cli.commands.build_cmd import build_app +from pipelex.cli.commands.kit_cmd import kit_app class PipelexCLI(TyperGroup): @@ -35,3 +36,4 @@ def main() -> None: app.add_typer(validate_app, name="validate", help="Validation and dry-run commands") app.add_typer(show_app, name="show", help="Show and list commands") app.add_typer(build_app, name="build", help="Build artifacts like pipeline blueprints") +app.add_typer(kit_app, name="kit", help="Manage kit assets") diff --git a/pipelex/cli/commands/init_cmd.py b/pipelex/cli/commands/init_cmd.py index 60861e604..e61a6dcea 100644 --- a/pipelex/cli/commands/init_cmd.py +++ b/pipelex/cli/commands/init_cmd.py @@ -6,6 +6,7 @@ import typer from pipelex.exceptions import PipelexCLIError +from pipelex.kit.paths import get_configs_dir from pipelex.libraries.library_config import LibraryConfig from pipelex.tools.config.manager import config_manager @@ -32,7 +33,7 @@ def do_init_libraries(directory: str = ".", overwrite: bool = False) -> None: def do_init_config(reset: bool = False) -> None: """Initialize pipelex configuration in the current directory.""" - config_template_dir = os.path.join(config_manager.pipelex_root_dir, "config_template") + config_template_dir = str(get_configs_dir()) target_config_dir = config_manager.pipelex_config_dir os.makedirs(target_config_dir, exist_ok=True) diff --git a/pipelex/cli/commands/kit_cmd.py b/pipelex/cli/commands/kit_cmd.py new file mode 100644 index 000000000..d7d293128 --- /dev/null +++ b/pipelex/cli/commands/kit_cmd.py @@ -0,0 +1,57 @@ +"""CLI commands for kit asset management.""" + +from pathlib import Path + +import typer +from typing_extensions import Annotated + +from pipelex.exceptions import PipelexCLIError +from pipelex.kit.cursor_export import export_cursor_rules +from pipelex.kit.index_loader import load_index +from pipelex.kit.paths import get_agents_dir +from pipelex.kit.targets_update import build_merged_rules, update_targets + +kit_app = typer.Typer(help="Manage kit assets: export Cursor rules and merge agent docs", no_args_is_help=True) + + +@kit_app.command("sync") +def sync( + repo_root: Annotated[Path | None, typer.Option("--repo-root", dir_okay=True, writable=True, help="Repository root directory")] = None, + cursor: Annotated[bool, typer.Option("--cursor/--no-cursor", help="Export Cursor rules to .cursor/rules")] = True, + single_files: Annotated[bool, typer.Option("--single-files/--no-single-files", help="Update single-file agent documentation targets")] = True, + dry_run: Annotated[bool, typer.Option("--dry-run", help="Show what would be done without making changes")] = False, + diff: Annotated[bool, typer.Option("--diff", help="Show unified diff of changes")] = False, + backup: Annotated[str | None, typer.Option("--backup", help="Backup suffix (e.g., '.bak')")] = None, +) -> None: + """Sync kit assets: export Cursor rules and merge agent documentation. + + This command: + 1. Exports agent markdown files to Cursor .mdc files with YAML front-matter + 2. Builds merged agent documentation and updates target files using markers + """ + try: + if repo_root is None: + repo_root = Path() + + idx = load_index() + agents_dir = get_agents_dir() + + if cursor: + typer.echo("📤 Exporting Cursor rules...") + cursor_rules_dir = repo_root / ".cursor" / "rules" + export_cursor_rules(agents_dir, cursor_rules_dir, idx, dry_run=dry_run) + + if single_files: + typer.echo("📝 Building merged agent documentation...") + merged_md = build_merged_rules(agents_dir, idx) + typer.echo("📝 Updating target files...") + update_targets(repo_root, merged_md, idx.targets, dry_run=dry_run, diff=diff, backup=backup) + + if dry_run: + typer.echo("✅ Dry run completed - no changes made") + else: + typer.echo("✅ Kit sync completed successfully") + + except Exception as exc: + msg = f"Failed to sync kit assets: {exc}" + raise PipelexCLIError(msg) from exc diff --git a/pipelex/cogt/models/model_manager.py b/pipelex/cogt/models/model_manager.py index f669131c8..1d14f3dbe 100644 --- a/pipelex/cogt/models/model_manager.py +++ b/pipelex/cogt/models/model_manager.py @@ -14,7 +14,7 @@ from pipelex.cogt.models.model_manager_abstract import ModelManagerAbstract from pipelex.config import get_config from pipelex.tools.misc.json_utils import deep_update -from pipelex.tools.misc.toml_utils import load_toml_from_path, load_toml_from_path_if_exists, load_toml_from_content, TomlError +from pipelex.tools.misc.toml_utils import load_toml_from_path class ModelManager(ModelManagerAbstract): diff --git a/pipelex/kit/agents/run_pipelines.md b/pipelex/kit/agents/run_pipelines.md index 6c6930a3e..7fe22547f 100644 --- a/pipelex/kit/agents/run_pipelines.md +++ b/pipelex/kit/agents/run_pipelines.md @@ -1,928 +1,4 @@ -# Coding Standards & Best Practices - -This document outlines the core coding standards, best practices, and quality control procedures for the codebase. - -## Type Hints - -1. **Always Use Type Hints** - - - Every function parameter must be typed - - Every function return must be typed - - Use type hints for all variables where type is not obvious - - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] - - Use type hints for all fields - - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals - - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` - - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose - -2. **StrEnum** - - Import from `pipelex.types`: - ```python - from pipelex.types import StrEnum - ``` - -3. **Self type** - - Import from `pipelex.types`: - ```python - from pipelex.types import Self - ``` - -## Factory Pattern - - - Use Factory Pattern for object creation when dealing with multiple implementations - - Our factory methods are named `make_from_...` and such - -## Error Handling - - - Always catch exceptions at the place where you can add useful context to it. - - Use try/except blocks with specific exceptions - - Convert third-party exceptions to our custom ones - - Never catch Exception, only catch specific exceptions - - Always add `from exc` to the exception - - ```python - try: - self.models_manager.setup() - except RoutingProfileLibraryNotFoundError as exc: - msg = "The routing library could not be found, please call `pipelex init config` to create it" - raise PipelexSetupError(msg) from exc - ``` - - **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. - -## Documentation - -1. **Docstring Format** - ```python - def process_image(image_path: str, size: Tuple[int, int]) -> bytes: - """Process and resize an image. - - Args: - image_path: Path to the source image - size: Tuple of (width, height) for resizing - - Returns: - Processed image as bytes - """ - pass - ``` - -2. **Class Documentation** - ```python - class ImageProcessor: - """Handles image processing operations. - - Provides methods for resizing, converting, and optimizing images. - """ - ``` - -## Code Quality Checks - -### Linting and Type Checking - -Before finalizing a task, run: -```bash -make fix-unused-imports -make check -``` - -This runs multiple code quality tools: -- Pyright: Static type checking -- Ruff: Fast Python linter -- Mypy: Static type checker - -Always fix any issues reported by these tools before proceeding. - -### Running Tests - -1. **Quick Test Run** (no LLM/image generation): - ```bash - make tp - ``` - Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` - -2. **Specific Tests**: - ```bash - make tp TEST=TestClassName - # or - make tp TEST=test_function_name - ``` - Note: Matches names starting with the provided string. - -**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. - -## Pipelines - -- All pipeline definitions go in `pipelex/libraries/pipelines/` -- Always validate pipelines after creation/edit with `make validate`. - Iterate if there are errors. - -## Project Structure - -- **Pipelines**: `pipelex/libraries/pipelines/` -- **Tests**: `tests/` directory -- **Documentation**: `docs/` directory - ---- - -# Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -## Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures -- Use descriptive names in `snake_case` - -## Pipeline File Structure -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -### Pipe Definitions - -## Pipe Base Structure - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -## Structuring Models - -### Model Location and Registration - -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` -- Models must inherit from `StructuredContent` or appropriate content type - -## Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -## Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -## PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -### Basic Structure -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -## PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -## PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -## PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -## PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -# Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -## PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -## PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - ---- - -# Guide to write an example to execute a pipeline +# Guide to execute a pipeline and write example code ## Example to execute a pipeline with text output @@ -1146,161 +222,3 @@ result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) --- -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Use descriptive names that match the functionality being tested -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "extract: uses text/image extraction from documents" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure - -Always group the tests of a module into a test class: - -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` - -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. - -## Writing integration test to test pipes - -### Required imports for pipe tests - -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol -``` - -### Pipe test implementation steps - -1. Create Stuff from blueprint: - -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` - -2. Create Working Memory: - -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) -``` - -3. Run the pipe: - -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` - -4. Basic assertions: - -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None -``` - -### Test Data Organization - -- If it's not already there, create a `test_data.py` file in the test directory -- Define test cases using `StuffBlueprint`: - -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) - - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] -``` - -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. - -## Best Practices for Testing - -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose -- Log outputs for debugging -- Generate reports for cost tracking diff --git a/pipelex/kit/cursor_export.py b/pipelex/kit/cursor_export.py new file mode 100644 index 000000000..4c2ed7e12 --- /dev/null +++ b/pipelex/kit/cursor_export.py @@ -0,0 +1,66 @@ +"""Export agent markdown files to Cursor rules with YAML front-matter.""" + +from collections.abc import Iterable +from importlib.abc import Traversable +from pathlib import Path +from typing import Any + +import typer +import yaml + +from pipelex.kit.index_models import KitIndex + + +def _iter_agent_files(agents_dir: Traversable) -> Iterable[tuple[str, str]]: + """Iterate over agent markdown files. + + Args: + agents_dir: Traversable pointing to agents directory + + Yields: + Tuples of (filename, file_content) + """ + for child in agents_dir.iterdir(): + if child.name.endswith(".md") and child.is_file(): + yield child.name, child.read_text(encoding="utf-8") + + +def _front_matter_for(name: str, idx: KitIndex) -> dict[str, Any]: + """Build front-matter for a specific file. + + Args: + name: Filename (e.g., "pytest_standards.md") + idx: Kit index configuration + + Returns: + Merged front-matter dictionary + """ + base = dict(idx.cursor.front_matter) + key = name.removesuffix(".md") + if key in idx.cursor.files: + base |= idx.cursor.files[key].front_matter + return base + + +def export_cursor_rules(agents_dir: Traversable, out_dir: Path, idx: KitIndex, dry_run: bool = False) -> None: + """Export agent markdown files to Cursor .mdc files with YAML front-matter. + + Args: + agents_dir: Traversable pointing to agents directory + out_dir: Output directory for .mdc files + idx: Kit index configuration + dry_run: If True, only print what would be done + """ + out_dir.mkdir(parents=True, exist_ok=True) + + for fname, body in _iter_agent_files(agents_dir): + fm = _front_matter_for(fname, idx) + yaml_block = "---\n" + yaml.safe_dump(fm, sort_keys=False).rstrip() + "\n---\n" + mdc = yaml_block + body + out_path = out_dir / (fname.removesuffix(".md") + ".mdc") + + if dry_run: + typer.echo(f"[DRY] write {out_path}") + else: + out_path.write_text(mdc, encoding="utf-8") + typer.echo(f"✅ Exported {out_path}") diff --git a/pipelex/kit/index.toml b/pipelex/kit/index.toml index e69de29bb..2d19d428f 100644 --- a/pipelex/kit/index.toml +++ b/pipelex/kit/index.toml @@ -0,0 +1,52 @@ +[meta] +version = "1.0.0" +description = "Pipelex kit configuration for agent documentation and cursor rules" + +[agents] +order = [ + "python_standards.md", + "pytest_standards.md", + "write_pipelex.md", + "run_pipelines.md", +] +demote = 1 + +[cursor.front_matter] +alwaysApply = false + +[cursor.files.python_standards] +front_matter = { description = "Python coding standards and best practices", globs = [ + "**/*.py", +] } + +[cursor.files.pytest_standards] +front_matter = { description = "Pytest testing standards", globs = [ + "tests/**/*.py", +] } + +[cursor.files.write_pipelex] +front_matter = { description = "Guidelines for writing Pipelex pipelines", globs = [ + "**/*.plx", + "**/pipelines/**/*.py", +] } + +[cursor.files.run_pipelines] +front_matter = { description = "Guidelines for running Pipelex pipelines", globs = [ + "examples/**/*.py", +] } + +[[targets]] +id = "agents_md" +path = "AGENTS.md" +strategy = "merge" +marker_begin = "" +marker_end = "" +parent = "# Coding Standards & Best Practices" + +[[targets]] +id = "claude_md" +path = "CLAUDE.md" +strategy = "merge" +marker_begin = "" +marker_end = "" +parent = "# Coding Standards & Best Practices" diff --git a/pipelex/kit/index_loader.py b/pipelex/kit/index_loader.py new file mode 100644 index 000000000..572e26eb1 --- /dev/null +++ b/pipelex/kit/index_loader.py @@ -0,0 +1,20 @@ +"""Index loader for kit configuration.""" + +from pipelex.kit.index_models import KitIndex +from pipelex.kit.paths import get_kit_root +from pipelex.tools.misc.toml_utils import load_toml_from_path + + +def load_index() -> KitIndex: + """Load and validate the kit index.toml configuration. + + Returns: + Validated KitIndex model + + Raises: + TomlError: If TOML parsing fails + ValidationError: If validation fails + """ + index_path = get_kit_root() / "index.toml" + data = load_toml_from_path(str(index_path)) + return KitIndex.model_validate(data) diff --git a/pipelex/kit/index_models.py b/pipelex/kit/index_models.py new file mode 100644 index 000000000..408108d87 --- /dev/null +++ b/pipelex/kit/index_models.py @@ -0,0 +1,45 @@ +"""Pydantic models for kit index configuration.""" + +from typing import Any + +from pydantic import BaseModel, Field + + +class AgentsMerge(BaseModel): + """Configuration for merging agent documentation files.""" + + order: list[str] = Field(description="Ordered list of agent markdown files to merge") + demote: int = Field(default=1, description="Number of levels to demote headings when merging") + + +class CursorFileOverride(BaseModel): + """Per-file front-matter overrides for Cursor export.""" + + front_matter: dict[str, Any] = Field(default_factory=dict, description="Front-matter to override for this file") + + +class CursorSpec(BaseModel): + """Configuration for Cursor rules export.""" + + front_matter: dict[str, Any] = Field(default_factory=dict, description="Default YAML front-matter for all Cursor files") + files: dict[str, CursorFileOverride] = Field(default_factory=dict, description="Per-file front-matter overrides") + + +class Target(BaseModel): + """Configuration for a single-file merge target.""" + + id: str = Field(description="Unique identifier for this target") + path: str = Field(description="Path to the target file relative to repo root") + strategy: str = Field(description="Merge strategy (currently only 'merge' supported)") + marker_begin: str = Field(description="Beginning marker for content insertion") + marker_end: str = Field(description="Ending marker for content insertion") + parent: str | None = Field(default=None, description="Parent heading to insert under if markers not found") + + +class KitIndex(BaseModel): + """Root configuration model for kit index.toml.""" + + meta: dict[str, Any] = Field(default_factory=dict, description="Metadata about the kit configuration") + agents: AgentsMerge = Field(description="Agent documentation merge configuration") + cursor: CursorSpec = Field(description="Cursor rules export configuration") + targets: list[Target] = Field(description="List of single-file merge targets") diff --git a/pipelex/kit/markers.py b/pipelex/kit/markers.py new file mode 100644 index 000000000..c2ad9a056 --- /dev/null +++ b/pipelex/kit/markers.py @@ -0,0 +1,54 @@ +"""Marker utilities for content insertion and replacement.""" + + +def find_span(text: str, begin: str, end: str) -> tuple[int, int] | None: + """Find the span of text between begin and end markers. + + Args: + text: Text to search in + begin: Beginning marker string + end: Ending marker string + + Returns: + Tuple of (start, end) indices if both markers found, None otherwise + The end index includes the end marker itself + """ + start = text.find(begin) + if start == -1: + return None + + end_pos = text.find(end, start) + if end_pos == -1: + return None + + end_pos += len(end) + return (start, end_pos) + + +def wrap(begin: str, end: str, content: str) -> str: + """Wrap content with begin and end markers. + + Args: + begin: Beginning marker + end: Ending marker + content: Content to wrap + + Returns: + Wrapped content with markers and newlines + """ + return f"{begin}\n{content.rstrip()}\n{end}" + + +def replace_span(text: str, span: tuple[int, int], replacement: str) -> str: + """Replace the text at the given span with replacement. + + Args: + text: Original text + span: Tuple of (start, end) indices + replacement: Replacement text + + Returns: + Text with span replaced by replacement + """ + start, end = span + return text[:start] + replacement + text[end:] diff --git a/pipelex/kit/paths.py b/pipelex/kit/paths.py new file mode 100644 index 000000000..ff29a1eb8 --- /dev/null +++ b/pipelex/kit/paths.py @@ -0,0 +1,31 @@ +"""Path utilities for the kit system.""" + +from importlib.abc import Traversable +from importlib.resources import files + + +def get_kit_root() -> Traversable: + """Get the root directory of the kit package. + + Returns: + Traversable object pointing to pipelex.kit package + """ + return files("pipelex.kit") + + +def get_agents_dir() -> Traversable: + """Get the agents directory within the kit package. + + Returns: + Traversable object pointing to pipelex.kit/agents + """ + return get_kit_root() / "agents" + + +def get_configs_dir() -> Traversable: + """Get the configs directory within the kit package. + + Returns: + Traversable object pointing to pipelex.kit/configs + """ + return get_kit_root() / "configs" diff --git a/pipelex/kit/targets_update.py b/pipelex/kit/targets_update.py new file mode 100644 index 000000000..552915864 --- /dev/null +++ b/pipelex/kit/targets_update.py @@ -0,0 +1,190 @@ +"""Build and update merged agent documentation in target files.""" + +import difflib +import re +from importlib.abc import Traversable +from pathlib import Path + +import typer + +from pipelex.kit.index_models import KitIndex, Target +from pipelex.kit.markers import find_span, replace_span, wrap + + +def _read_agent_file(agents_dir: Traversable, name: str) -> str: + """Read an agent markdown file. + + Args: + agents_dir: Traversable pointing to agents directory + name: Filename to read + + Returns: + File content as string + """ + return (agents_dir / name).read_text(encoding="utf-8") + + +def _demote_headings(md_content: str, levels: int) -> str: + """Demote all headings in markdown content by specified levels. + + Args: + md_content: Markdown content + levels: Number of levels to demote + + Returns: + Markdown with demoted headings + """ + if levels == 0: + return md_content + + # Use regex to add extra # to ATX-style headings + def demote_match(match: re.Match[str]) -> str: + hashes = match.group(1) + rest = match.group(2) + return f"{'#' * levels}{hashes}{rest}" + + # Match lines starting with # (ATX-style headings) + pattern = r"^(#{1,6})(.*)$" + return re.sub(pattern, demote_match, md_content, flags=re.MULTILINE) + + +def build_merged_rules(agents_dir: Traversable, idx: KitIndex) -> str: + """Build merged agent documentation from ordered files. + + Args: + agents_dir: Traversable pointing to agents directory + idx: Kit index configuration + + Returns: + Merged markdown content with demoted headings + """ + parts: list[str] = [] + + for name in idx.agents.order: + md = _read_agent_file(agents_dir, name) + demoted = _demote_headings(md, idx.agents.demote) + parts.append(demoted.rstrip()) + + return ("\n\n".join(parts)).strip() + "\n" + + +def _insert_block_with_ast(target_md: str, block_md: str, parent: str | None, markers: tuple[str, str]) -> str: + """Insert block into target markdown with heuristic placement. + + Args: + target_md: Existing target markdown content + block_md: Block to insert + parent: Parent heading to insert under (if specified) + markers: Tuple of (begin_marker, end_marker) + + Returns: + Updated markdown with block inserted and markers added + """ + marker_begin, marker_end = markers + wrapped_block = wrap(marker_begin, marker_end, block_md) + + if not target_md: + # Empty file - just insert the wrapped block + return wrapped_block + "\n" + + # If parent heading is specified, try to find it and insert after + if parent: + # Escape special regex characters in parent + escaped_parent = re.escape(parent.strip()) + # Look for the parent heading line + pattern = rf"^({escaped_parent})\s*$" + match = re.search(pattern, target_md, flags=re.MULTILINE | re.IGNORECASE) + if match: + # Insert after the parent heading line + insert_pos = match.end() + return target_md[:insert_pos] + "\n\n" + wrapped_block + "\n" + target_md[insert_pos:] + + # Fallback: insert after first H1 heading + h1_pattern = r"^#\s+.+$" + match = re.search(h1_pattern, target_md, flags=re.MULTILINE) + if match: + insert_pos = match.end() + return target_md[:insert_pos] + "\n\n" + wrapped_block + "\n" + target_md[insert_pos:] + + # Last resort: append at the end + return target_md.rstrip() + "\n\n" + wrapped_block + "\n" + + +def _diff(before: str, after: str, path: str) -> str: + """Generate unified diff between before and after. + + Args: + before: Original content + after: Modified content + path: File path for diff header + + Returns: + Unified diff string + """ + return "".join( + difflib.unified_diff( + before.splitlines(keepends=True), + after.splitlines(keepends=True), + fromfile=path, + tofile=path, + ) + ) + + +def update_targets( + repo_root: Path, + merged_rules: str, + targets: list[Target], + dry_run: bool, + diff: bool, + backup: str | None, +) -> None: + """Update target files with merged agent documentation. + + Args: + repo_root: Repository root directory + merged_rules: Merged markdown content to insert + targets: List of target file configurations + dry_run: If True, only print what would be done + diff: If True, show unified diff + backup: Backup suffix (e.g., ".bak"), or None for no backup + """ + for target in targets: + target_path = repo_root / target.path + before = target_path.read_text(encoding="utf-8") if target_path.exists() else "" + + span = find_span(before, target.marker_begin, target.marker_end) + + if span: + # Markers exist - replace content between them + wrapped_block = wrap(target.marker_begin, target.marker_end, merged_rules) + after = replace_span(before, span, wrapped_block) + else: + # No markers - insert via AST and add markers + after = _insert_block_with_ast( + before, + merged_rules, + target.parent, + (target.marker_begin, target.marker_end), + ) + + if dry_run: + typer.echo(f"[DRY] update {target_path}") + if diff: + diff_output = _diff(before, after, str(target_path)) + if diff_output: + typer.echo(diff_output) + else: + if backup and target_path.exists(): + backup_path = target_path.with_suffix(target_path.suffix + backup) + backup_path.write_text(before, encoding="utf-8") + typer.echo(f"📦 Backup saved to {backup_path}") + + target_path.parent.mkdir(parents=True, exist_ok=True) + target_path.write_text(after, encoding="utf-8") + typer.echo(f"✅ Updated {target_path}") + + if diff: + diff_output = _diff(before, after, str(target_path)) + if diff_output: + typer.echo(diff_output) From 60c170cca1a425cea87b5944298eedc587a94c46 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 16:48:41 +0200 Subject: [PATCH 033/115] WIP kit --- .cursor/rules/docs.mdc | 6 +- .cursor/rules/llms.mdc | 5 +- .cursor/rules/pytest_standards.mdc | 16 +- .cursor/rules/python_standards.mdc | 1190 ++++++++++++++++- .cursor/rules/run_pipelex.mdc | 230 ++++ .cursor/rules/tdd.mdc | 5 +- pipelex/kit/agents/docs.md | 8 + pipelex/kit/agents/llms.md | 78 ++ .../{run_pipelines.md => run_pipelex.md} | 0 pipelex/kit/agents/tdd.md | 24 + pipelex/kit/index.toml | 66 +- pipelex/kit/index_models.py | 3 +- pipelex/kit/targets_update.py | 12 +- .../pipelex/cli/commands/test_init_cmd.py | 64 +- .../tools/config/test_config_manager.py | 2 +- 15 files changed, 1632 insertions(+), 77 deletions(-) create mode 100644 .cursor/rules/run_pipelex.mdc create mode 100644 pipelex/kit/agents/docs.md create mode 100644 pipelex/kit/agents/llms.md rename pipelex/kit/agents/{run_pipelines.md => run_pipelex.md} (100%) create mode 100644 pipelex/kit/agents/tdd.md diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc index ac9c5191e..c4140ae5c 100644 --- a/.cursor/rules/docs.mdc +++ b/.cursor/rules/docs.mdc @@ -1,7 +1,8 @@ --- -description: -globs: docs/**/*.md alwaysApply: false +description: '' +globs: +- docs/**/*.md --- Write docs and answer questions about writing docs. @@ -10,3 +11,4 @@ We use Material for MkDocs. All markdown in our docs must be compatible with Mat ## MkDocs Markdown Requirements - Always add a blank line before any bullet lists or numbered lists in MkDocs markdown. + diff --git a/.cursor/rules/llms.mdc b/.cursor/rules/llms.mdc index d93ee4a08..d9ce2ff1e 100644 --- a/.cursor/rules/llms.mdc +++ b/.cursor/rules/llms.mdc @@ -1,6 +1,8 @@ --- -globs: *.plx,*.toml alwaysApply: false +globs: +- '*.plx' +- '*.toml' --- # Rules to choose LLM models used in PipeLLMs. @@ -79,3 +81,4 @@ You must not use an LLM preset in a PipeLLM that does not exist in the deck. If You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc index 30ceaca93..857faee25 100644 --- a/.cursor/rules/pytest_standards.mdc +++ b/.cursor/rules/pytest_standards.mdc @@ -1,7 +1,8 @@ --- -description: Guide for writing unit tests -globs: tests/**/*.py alwaysApply: false +description: Pytest testing standards +globs: +- tests/**/*.py --- # Writing unit tests @@ -9,10 +10,7 @@ alwaysApply: false NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. -These rules apply when writing unit tests. -- Always use pytest - -## Test file structure +### Test file structure - Name test files with `test_` prefix - Use descriptive names that match the functionality being tested @@ -26,12 +24,12 @@ These rules apply when writing unit tests. - Always put test inside Test classes. - The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` -## Markers +### Markers Apply the appropriate markers: - "llm: uses an LLM to generate text or objects" - "img_gen: uses an image generation AI" -- "extract: uses ocr models" +- "extract: uses text/image extraction from documents" - "inference: uses either an LLM or an image generation AI" - "gha_disabled: will not be able to run properly on GitHub Actions" @@ -41,7 +39,7 @@ Several markers may be applied. For instance, if the test uses an LLM, then it u - Never use the unittest.mock. Use pytest-mock. -## Test Class Structure +### Test Class Structure Always group the tests of a module into a test class: diff --git a/.cursor/rules/python_standards.mdc b/.cursor/rules/python_standards.mdc index f58979554..8fb9315ca 100644 --- a/.cursor/rules/python_standards.mdc +++ b/.cursor/rules/python_standards.mdc @@ -1,7 +1,8 @@ --- -description: Guide for writing Python code -globs: *.py -alwaysApply: true +alwaysApply: false +description: Python coding standards and best practices +globs: +- '**/*.py' --- # Coding Standards & Best Practices @@ -127,4 +128,1185 @@ Always fix any issues reported by these tools before proceeding. - **Pipelines**: `pipelex/libraries/pipelines/` - **Tests**: `tests/` directory -- **Documentation**: `docs/` directory \ No newline at end of file +- **Documentation**: `docs/` directory + +--- + +# Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +## Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for structures +- Use descriptive names in `snake_case` + +## Pipeline File Structure +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +### Concept Definitions +```plx +[concept] +ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output +``` + +Important Rules: +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") +- Avoid adjectives (no "LargeText", use "Text") +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) +yes +### Pipe Definitions + +## Pipe Base Structure + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +DO NOT WRITE: +```plx +[pipe.your_pipe_name] +type = "pipe_sequence" +``` + +But it should be: + +```plx +[pipe.your_pipe_name] +type = "PipeSequence" +description = "....." +``` + +The pipes will all have at least this base structure. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +## Structuring Models + +### Model Location and Registration + +- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Models must inherit from `StructuredContent` or appropriate content type + +## Model Structure + +Concepts and their structure classes are meant to indicate an idea. +A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. + +**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** + +DO NOT create structures like: +```python +class Joke(TextContent): + """A humorous text that makes people laugh.""" + pass +``` + +If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: +```plx +[concept] +Joke = "A humorous text that makes people laugh." +``` +If you simply need to refine another native concept, construct it like this: +```plx +[concept.Landscape] +refines = "Image" +``` +Only create a Python structure class when you need to add specific fields: + +```python +from datetime import datetime +from typing import List, Optional +from pydantic import Field + +from pipelex.core.stuffs.structured_content import StructuredContent + +# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +class YourModel(StructuredContent): # Always be a subclass of StructuredContent + # Required fields + field1: str + field2: int + + # Optional fields with defaults + field3: Optional[str] = Field(None, "Description of field3") + field4: List[str] = Field(default_factory=list) + + # Date fields should remove timezone + date_field: Optional[datetime] = None +``` +### Usage + +Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. + +Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. + + +### Best Practices for structures + +- Respect Pydantic v2 standards +- Use type hints for all fields +- Use `Field` declaration and write the description + + +## Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +## PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +### Basic Structure +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +## PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +## PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +## PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: Optional[TextContent] + images: Optional[List[ImageContent]] + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: Optional[ImageContent] = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +## PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +# Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +## PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +## PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +--- + +# Guide to write an example to execute a pipeline + +## Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +# start Pipelex +Pipelex.make() +# run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +## Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + +from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +# start Pipelex +Pipelex.make() + +# run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +## Setting up the input memory + +### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str +ImplicitMemory = Dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +# Here we have a single input and it's a Text. +# If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +# Here we have a single input and it's a PDF. +# Because PDFContent is a native concept, we can use it directly as a value, +# the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +# Here we have a single input and it's an Image. +# Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +# Here we have a single input, it's an image but +# its actually a more specific concept gantt.GanttImage which refines Image, +# so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +# Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +## Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +# Writing unit tests + +## Unit test generalities + +NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. + +### Test file structure + +- Name test files with `test_` prefix +- Use descriptive names that match the functionality being tested +- Place test files in the appropriate test category directory: + - `tests/unit/` - for unit tests that test individual functions/classes in isolation + - `tests/integration/` - for integration tests that test component interactions + - `tests/e2e/` - for end-to-end tests that test complete workflows + - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) +- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest +- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. +- Always put test inside Test classes. +- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` + +### Markers + +Apply the appropriate markers: +- "llm: uses an LLM to generate text or objects" +- "img_gen: uses an image generation AI" +- "extract: uses text/image extraction from documents" +- "inference: uses either an LLM or an image generation AI" +- "gha_disabled: will not be able to run properly on GitHub Actions" + +Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. + +### Important rules + +- Never use the unittest.mock. Use pytest-mock. + +### Test Class Structure + +Always group the tests of a module into a test class: + +```python +@pytest.mark.llm +@pytest.mark.inference +@pytest.mark.asyncio(loop_scope="class") +class TestFooBar: + @pytest.mark.parametrize( + "topic test_case_blueprint", + [ + TestCases.CASE_1, + TestCases.CASE_2, + ], + ) + async def test_pipe_processing( + self, + request: FixtureRequest, + topic: str, + test_case_blueprint: StuffBlueprint, + ): + # Test implementation +``` + +Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. + +## Writing integration test to test pipes + +### Required imports for pipe tests + +```python +import pytest +from pytest import FixtureRequest +from pipelex import log, pretty_print +from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory +from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory +from pipelex.hub import get_report_delegate +from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt +from pipelex.config_pipelex import get_config + +from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe +from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +``` + +### Pipe test implementation steps + +1. Create Stuff from blueprint: + +```python +stuff = StuffFactory.make_stuff( + concept_code="RetrievedExcerpt", + domain="retrieve", + content=RetrievedExcerpt(text="", justification="") + name="retrieved_text", +) +``` + +2. Create Working Memory: + +```python +working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +``` + +3. Run the pipe: + +```python +pipe_output = await pipe_router.run_pipe( + pipe_code="pipe_name", + pipe_run_params=PipeRunParamsFactory.make_run_params(), + working_memory=working_memory, + job_metadata=JobMetadata(), +) +``` + +4. Basic assertions: + +```python +assert pipe_output is not None +assert pipe_output.working_memory is not None +assert pipe_output.main_stuff is not None +``` + +### Test Data Organization + +- If it's not already there, create a `test_data.py` file in the test directory +- Define test cases using `StuffBlueprint`: + +```python +class TestCases: + CASE_BLUEPRINT_1 = StuffBlueprint( + name="test_case_1", + concept_code="domain.ConceptName1", + value="test_value" + ) + CASE_BLUEPRINT_2 = StuffBlueprint( + name="test_case_2", + concept_code="domain.ConceptName2", + value="test_value" + ) + + CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" + ("topic1", CASE_BLUEPRINT_1), + ("topic2", CASE_BLUEPRINT_2), + ] +``` + +Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. +Also note that we provide a topic for the test case, which is purely for convenience. + +## Best Practices for Testing + +- Use parametrize for multiple test cases +- Test both success and failure cases +- Verify working memory state +- Check output structure and content +- Use meaningful test case names +- Include docstrings explaining test purpose +- Log outputs for debugging +- Generate reports for cost tracking diff --git a/.cursor/rules/run_pipelex.mdc b/.cursor/rules/run_pipelex.mdc new file mode 100644 index 000000000..31d474aea --- /dev/null +++ b/.cursor/rules/run_pipelex.mdc @@ -0,0 +1,230 @@ +--- +alwaysApply: false +description: Guidelines for running Pipelex pipelines +globs: +- examples/**/*.py +--- +# Guide to execute a pipeline and write example code + +## Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +# start Pipelex +Pipelex.make() +# run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +## Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + +from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +# start Pipelex +Pipelex.make() + +# run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +## Setting up the input memory + +### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str +ImplicitMemory = Dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +# Here we have a single input and it's a Text. +# If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +# Here we have a single input and it's a PDF. +# Because PDFContent is a native concept, we can use it directly as a value, +# the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +# Here we have a single input and it's an Image. +# Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +# Here we have a single input, it's an image but +# its actually a more specific concept gantt.GanttImage which refines Image, +# so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +# Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +## Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + diff --git a/.cursor/rules/tdd.mdc b/.cursor/rules/tdd.mdc index 575544a6f..46e9e34b6 100644 --- a/.cursor/rules/tdd.mdc +++ b/.cursor/rules/tdd.mdc @@ -1,7 +1,7 @@ --- -description: -globs: alwaysApply: false +description: '' +globs: [] --- # Test-Driven Development Guide @@ -26,3 +26,4 @@ If the code needs refactoring, with the best practices [coding_standards.mdc](co 5. **Validate tests** Remember: The key to TDD is writing the test first and letting it drive your implementation. Always run the full test suite and quality checks before considering a feature complete. + diff --git a/pipelex/kit/agents/docs.md b/pipelex/kit/agents/docs.md new file mode 100644 index 000000000..929bce93e --- /dev/null +++ b/pipelex/kit/agents/docs.md @@ -0,0 +1,8 @@ +Write docs and answer questions about writing docs. + +We use Material for MkDocs. All markdown in our docs must be compatible with Material for MkDocs and done using best practices to get the best results with Material for MkDocs. + +## MkDocs Markdown Requirements + +- Always add a blank line before any bullet lists or numbered lists in MkDocs markdown. + diff --git a/pipelex/kit/agents/llms.md b/pipelex/kit/agents/llms.md new file mode 100644 index 000000000..e10ee10f2 --- /dev/null +++ b/pipelex/kit/agents/llms.md @@ -0,0 +1,78 @@ +# Rules to choose LLM models used in PipeLLMs. + +## LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +## LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +## Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt_template = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +## LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt_template = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/pipelex/kit/agents/run_pipelines.md b/pipelex/kit/agents/run_pipelex.md similarity index 100% rename from pipelex/kit/agents/run_pipelines.md rename to pipelex/kit/agents/run_pipelex.md diff --git a/pipelex/kit/agents/tdd.md b/pipelex/kit/agents/tdd.md new file mode 100644 index 000000000..0f41c71e8 --- /dev/null +++ b/pipelex/kit/agents/tdd.md @@ -0,0 +1,24 @@ +# Test-Driven Development Guide + +This document outlines our test-driven development (TDD) process and the tools available for testing. + +## TDD Cycle + +1. **Write a Test First** +[pytest.mdc](pytest.mdc) + +2. **Write the Code** + - Implement the minimum amount of code needed to pass the test + - Follow the project's coding standards + - Keep it simple - don't write more than needed + +3. **Run Linting and Type Checking** +[coding_standards.mdc](coding_standards.mdc) + +4. **Refactor if needed** +If the code needs refactoring, with the best practices [coding_standards.mdc](coding_standards.mdc) + +5. **Validate tests** + +Remember: The key to TDD is writing the test first and letting it drive your implementation. Always run the full test suite and quality checks before considering a feature complete. + diff --git a/pipelex/kit/index.toml b/pipelex/kit/index.toml index 2d19d428f..585f5f6d2 100644 --- a/pipelex/kit/index.toml +++ b/pipelex/kit/index.toml @@ -3,37 +3,57 @@ version = "1.0.0" description = "Pipelex kit configuration for agent documentation and cursor rules" [agents] -order = [ +demote = 1 +default_set = "pipelex_language" + +[agents.sets] +coding_standards = [ + "python_standards.md", + "pytest_standards.md", + "docs.md", + "tdd.md", +] +pipelex_language = ["write_pipelex.md", "run_pipelex.md", "llms.md"] +all = [ "python_standards.md", "pytest_standards.md", + "docs.md", + "tdd.md", "write_pipelex.md", - "run_pipelines.md", + "run_pipelex.md", + "llms.md", ] -demote = 1 [cursor.front_matter] alwaysApply = false -[cursor.files.python_standards] -front_matter = { description = "Python coding standards and best practices", globs = [ - "**/*.py", -] } - -[cursor.files.pytest_standards] -front_matter = { description = "Pytest testing standards", globs = [ - "tests/**/*.py", -] } - -[cursor.files.write_pipelex] -front_matter = { description = "Guidelines for writing Pipelex pipelines", globs = [ - "**/*.plx", - "**/pipelines/**/*.py", -] } - -[cursor.files.run_pipelines] -front_matter = { description = "Guidelines for running Pipelex pipelines", globs = [ - "examples/**/*.py", -] } +[cursor.files.python_standards.front_matter] +description = "Python coding standards and best practices" +globs = ["**/*.py"] + +[cursor.files.pytest_standards.front_matter] +description = "Guidelines for writing unit tests" +globs = ["tests/**/*.py"] + +[cursor.files.docs.front_matter] +description = "Guidelines for writing documentation" +globs = ["docs/**/*.md"] + +[cursor.files.tdd.front_matter] +description = "Guidelines for writing test-driven development code" +globs = [] + +[cursor.files.write_pipelex.front_matter] +description = "Guidelines for writing Pipelex pipelines" +globs = ["**/*.plx", "**/pipelines/**/*.py"] + +[cursor.files.run_pipelex.front_matter] +description = "Guidelines for running Pipelex pipelines" +globs = ["examples/**/*.py"] + +[cursor.files.llms.front_matter] +description = "LLM configuration and usage guidelines" +globs = ["*.plx", "*.toml"] [[targets]] id = "agents_md" diff --git a/pipelex/kit/index_models.py b/pipelex/kit/index_models.py index 408108d87..3f297dec9 100644 --- a/pipelex/kit/index_models.py +++ b/pipelex/kit/index_models.py @@ -8,7 +8,8 @@ class AgentsMerge(BaseModel): """Configuration for merging agent documentation files.""" - order: list[str] = Field(description="Ordered list of agent markdown files to merge") + sets: dict[str, list[str]] = Field(description="Named sets of agent_rules files (e.g., coding_standards, pipelex_language, all)") + default_set: str = Field(default="pipelex_language", description="Default set to use when syncing") demote: int = Field(default=1, description="Number of levels to demote headings when merging") diff --git a/pipelex/kit/targets_update.py b/pipelex/kit/targets_update.py index 552915864..18ed0695c 100644 --- a/pipelex/kit/targets_update.py +++ b/pipelex/kit/targets_update.py @@ -48,19 +48,27 @@ def demote_match(match: re.Match[str]) -> str: return re.sub(pattern, demote_match, md_content, flags=re.MULTILINE) -def build_merged_rules(agents_dir: Traversable, idx: KitIndex) -> str: +def build_merged_rules(agents_dir: Traversable, idx: KitIndex, agent_set: str | None = None) -> str: """Build merged agent documentation from ordered files. Args: agents_dir: Traversable pointing to agents directory idx: Kit index configuration + agent_set: Name of the agent set to use (defaults to idx.agents.default_set) Returns: Merged markdown content with demoted headings """ + if agent_set is None: + agent_set = idx.agents.default_set + + if agent_set not in idx.agents.sets: + msg = f"Agent set '{agent_set}' not found in index.toml. Available sets: {list(idx.agents.sets.keys())}" + raise ValueError(msg) + parts: list[str] = [] - for name in idx.agents.order: + for name in idx.agents.sets[agent_set]: md = _read_agent_file(agents_dir, name) demoted = _demote_headings(md, idx.agents.demote) parts.append(demoted.rstrip()) diff --git a/tests/unit/pipelex/cli/commands/test_init_cmd.py b/tests/unit/pipelex/cli/commands/test_init_cmd.py index a160e7025..fecd8fade 100644 --- a/tests/unit/pipelex/cli/commands/test_init_cmd.py +++ b/tests/unit/pipelex/cli/commands/test_init_cmd.py @@ -17,16 +17,16 @@ class TestInitCmd: def test_do_init_config_copies_all_files(self, tmp_path: Path, mocker: MockerFixture) -> None: # Setup directories - template_dir = tmp_path / "config_template" - template_dir.mkdir() - (template_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") + kit_configs_dir = tmp_path / "kit" / "configs" + kit_configs_dir.mkdir(parents=True) + (kit_configs_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") target_dir = tmp_path / ".pipelex" target_dir.mkdir() - # Mock config manager + # Mock get_configs_dir and config manager + mocker.patch("pipelex.cli.commands.init_cmd.get_configs_dir", return_value=kit_configs_dir) mock_config_manager = mocker.MagicMock() - mock_config_manager.pipelex_root_dir = str(tmp_path) mock_config_manager.pipelex_config_dir = str(target_dir) mocker.patch("pipelex.cli.commands.init_cmd.config_manager", mock_config_manager) @@ -41,18 +41,18 @@ def test_do_init_config_copies_all_files(self, tmp_path: Path, mocker: MockerFix def test_do_init_config_skips_existing_files(self, tmp_path: Path, mocker: MockerFixture) -> None: # Setup directories with existing file - template_dir = tmp_path / "config_template" - template_dir.mkdir() - (template_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") - (template_dir / "new_file.toml").write_text("[new]\nconfig = 'value'") + kit_configs_dir = tmp_path / "kit" / "configs" + kit_configs_dir.mkdir(parents=True) + (kit_configs_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") + (kit_configs_dir / "new_file.toml").write_text("[new]\nconfig = 'value'") target_dir = tmp_path / ".pipelex" target_dir.mkdir() (target_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '0.9'") - # Mock config manager + # Mock get_configs_dir and config manager + mocker.patch("pipelex.cli.commands.init_cmd.get_configs_dir", return_value=kit_configs_dir) mock_config_manager = mocker.MagicMock() - mock_config_manager.pipelex_root_dir = str(tmp_path) mock_config_manager.pipelex_config_dir = str(target_dir) mocker.patch("pipelex.cli.commands.init_cmd.config_manager", mock_config_manager) @@ -75,17 +75,17 @@ def test_do_init_config_skips_existing_files(self, tmp_path: Path, mocker: Mocke def test_do_init_config_reset_overwrites_files(self, tmp_path: Path, mocker: MockerFixture) -> None: # Setup directories with existing file - template_dir = tmp_path / "config_template" - template_dir.mkdir() - (template_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") + kit_configs_dir = tmp_path / "kit" / "configs" + kit_configs_dir.mkdir(parents=True) + (kit_configs_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") target_dir = tmp_path / ".pipelex" target_dir.mkdir() (target_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '0.9'") - # Mock config manager + # Mock get_configs_dir and config manager + mocker.patch("pipelex.cli.commands.init_cmd.get_configs_dir", return_value=kit_configs_dir) mock_config_manager = mocker.MagicMock() - mock_config_manager.pipelex_root_dir = str(tmp_path) mock_config_manager.pipelex_config_dir = str(target_dir) mocker.patch("pipelex.cli.commands.init_cmd.config_manager", mock_config_manager) @@ -105,11 +105,11 @@ def test_do_init_config_reset_overwrites_files(self, tmp_path: Path, mocker: Moc def test_do_init_config_nested_directory_structure(self, tmp_path: Path, mocker: MockerFixture) -> None: # Setup complex nested structure - template_dir = tmp_path / "config_template" - template_dir.mkdir() - (template_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") + kit_configs_dir = tmp_path / "kit" / "configs" + kit_configs_dir.mkdir(parents=True) + (kit_configs_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") - inference_dir = template_dir / "inference" + inference_dir = kit_configs_dir / "inference" inference_dir.mkdir() (inference_dir / "backends.toml").write_text("[backends]\nconfig = 'value'") @@ -120,9 +120,9 @@ def test_do_init_config_nested_directory_structure(self, tmp_path: Path, mocker: target_dir = tmp_path / ".pipelex" target_dir.mkdir() - # Mock config manager + # Mock get_configs_dir and config manager + mocker.patch("pipelex.cli.commands.init_cmd.get_configs_dir", return_value=kit_configs_dir) mock_config_manager = mocker.MagicMock() - mock_config_manager.pipelex_root_dir = str(tmp_path) mock_config_manager.pipelex_config_dir = str(target_dir) mocker.patch("pipelex.cli.commands.init_cmd.config_manager", mock_config_manager) @@ -144,16 +144,16 @@ def test_do_init_config_nested_directory_structure(self, tmp_path: Path, mocker: def test_do_init_config_handles_permission_error(self, tmp_path: Path, mocker: MockerFixture) -> None: # Setup directories - template_dir = tmp_path / "config_template" - template_dir.mkdir() - (template_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") + kit_configs_dir = tmp_path / "kit" / "configs" + kit_configs_dir.mkdir(parents=True) + (kit_configs_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") target_dir = tmp_path / ".pipelex" target_dir.mkdir() - # Mock config manager + # Mock get_configs_dir and config manager + mocker.patch("pipelex.cli.commands.init_cmd.get_configs_dir", return_value=kit_configs_dir) mock_config_manager = mocker.MagicMock() - mock_config_manager.pipelex_root_dir = str(tmp_path) mock_config_manager.pipelex_config_dir = str(target_dir) mocker.patch("pipelex.cli.commands.init_cmd.config_manager", mock_config_manager) @@ -167,15 +167,15 @@ def test_do_init_config_handles_permission_error(self, tmp_path: Path, mocker: M def test_do_init_config_creates_target_directory(self, tmp_path: Path, mocker: MockerFixture) -> None: # Setup template directory only - template_dir = tmp_path / "config_template" - template_dir.mkdir() - (template_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") + kit_configs_dir = tmp_path / "kit" / "configs" + kit_configs_dir.mkdir(parents=True) + (kit_configs_dir / "pipelex.toml").write_text("[tool.pipelex]\nversion = '1.0'") target_dir = tmp_path / ".pipelex" # Don't create this - # Mock config manager + # Mock get_configs_dir and config manager + mocker.patch("pipelex.cli.commands.init_cmd.get_configs_dir", return_value=kit_configs_dir) mock_config_manager = mocker.MagicMock() - mock_config_manager.pipelex_root_dir = str(tmp_path) mock_config_manager.pipelex_config_dir = str(target_dir) mocker.patch("pipelex.cli.commands.init_cmd.config_manager", mock_config_manager) diff --git a/tests/unit/pipelex/tools/config/test_config_manager.py b/tests/unit/pipelex/tools/config/test_config_manager.py index 78c8c274e..82e70136b 100644 --- a/tests/unit/pipelex/tools/config/test_config_manager.py +++ b/tests/unit/pipelex/tools/config/test_config_manager.py @@ -12,4 +12,4 @@ def test_get_project_name(self): def test_load_pipelex_template_config(self): hub = get_pipelex_hub() - hub.setup_config(config_cls=PipelexConfig, specific_config_path="pipelex/config_template/pipelex.toml") + hub.setup_config(config_cls=PipelexConfig, specific_config_path="pipelex/kit/configs/pipelex.toml") From 60ebb5a987b7283aae299bffbc75a3b1cf065180 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 17:00:12 +0200 Subject: [PATCH 034/115] Cleanup before Kit --- .cursor/rules/docs.mdc | 2 +- .cursor/rules/llms.mdc | 5 +- .cursor/rules/pytest_standards.mdc | 2 +- .cursor/rules/tdd.mdc | 2 +- .github/copilot-instructions.md | 575 +++++++---------- AGENTS.md | 459 ++++---------- CLAUDE.md | 577 +++++++----------- README.md | 6 +- .../design_and_run_pipelines.md | 6 +- .../kick-off-a-knowledge-pipeline-project.md | 2 +- .../pipe-controllers/PipeCondition.md | 12 +- .../pipe-operators/PipeLLM.md | 16 +- docs/pages/cookbook-examples/extract-dpe.md | 2 +- docs/pages/cookbook-examples/extract-gantt.md | 2 +- .../extract-proof-of-purchase.md | 2 +- docs/pages/cookbook-examples/extract-table.md | 2 +- .../cookbook-examples/invoice-extractor.md | 2 +- docs/pages/cookbook-examples/write-tweet.md | 2 +- docs/pages/quick-start/index.md | 10 +- pipelex/kit/agents/llms.md | 4 +- pipelex/kit/index.toml | 42 +- tests/data/test_migrate_v0_1_0_to_v0_2_0.toml | 31 - 22 files changed, 623 insertions(+), 1140 deletions(-) delete mode 100644 tests/data/test_migrate_v0_1_0_to_v0_2_0.toml diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc index c4140ae5c..1400c5cd6 100644 --- a/.cursor/rules/docs.mdc +++ b/.cursor/rules/docs.mdc @@ -1,6 +1,6 @@ --- alwaysApply: false -description: '' +description: Guidelines for writing documentation globs: - docs/**/*.md --- diff --git a/.cursor/rules/llms.mdc b/.cursor/rules/llms.mdc index d9ce2ff1e..a21831a30 100644 --- a/.cursor/rules/llms.mdc +++ b/.cursor/rules/llms.mdc @@ -1,5 +1,6 @@ --- alwaysApply: false +description: LLM configuration and usage guidelines globs: - '*.plx' - '*.toml' @@ -41,7 +42,7 @@ type = "PipeLLM" description = "Write text about Hello World." output = "Text" model = { model = "gpt-5", temperature = 0.9 } -prompt_template = """ +prompt = """ Write a haiku about Hello World. """ ``` @@ -67,7 +68,7 @@ description = "Extract invoice information from an invoice text transcript" inputs = { invoice_text = "InvoiceText" } output = "Invoice" model = "llm_to_extract_invoice" -prompt_template = """ +prompt = """ Extract invoice information from this invoice: The category of this invoice is: $invoice_details.category. diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc index 857faee25..9b5df04a6 100644 --- a/.cursor/rules/pytest_standards.mdc +++ b/.cursor/rules/pytest_standards.mdc @@ -1,6 +1,6 @@ --- alwaysApply: false -description: Pytest testing standards +description: Guidelines for writing unit tests globs: - tests/**/*.py --- diff --git a/.cursor/rules/tdd.mdc b/.cursor/rules/tdd.mdc index 46e9e34b6..aa317290e 100644 --- a/.cursor/rules/tdd.mdc +++ b/.cursor/rules/tdd.mdc @@ -1,6 +1,6 @@ --- alwaysApply: false -description: '' +description: Guidelines for writing test-driven development code globs: [] --- # Test-Driven Development Guide diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index f54bd0b20..0573dba49 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,156 +1,29 @@ -# Coding Standards & Best Practices - -This document outlines the core coding standards, best practices, and quality control procedures for the codebase. - -## Type Hints - -1. **Always Use Type Hints** - - - Every function parameter must be typed - - Every function return must be typed - - Use type hints for all variables where type is not obvious - - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] - - Use type hints for all fields - - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals - - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` - - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose - -2. **StrEnum** - - Import from `pipelex.types`: - ```python - from pipelex.types import StrEnum - ``` - -3. **Self type** - - Import from `pipelex.types`: - ```python - from pipelex.types import Self - ``` - -## Factory Pattern - - - Use Factory Pattern for object creation when dealing with multiple implementations - - Our factory methods are named `make_from_...` and such - -## Error Handling - - - Always catch exceptions at the place where you can add useful context to it. - - Use try/except blocks with specific exceptions - - Convert third-party exceptions to our custom ones - - Never catch Exception, only catch specific exceptions - - Always add `from exc` to the exception - - ```python - try: - self.models_manager.setup() - except RoutingProfileLibraryNotFoundError as exc: - msg = "The routing library could not be found, please call `pipelex init config` to create it" - raise PipelexSetupError(msg) from exc - ``` - - **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. - -## Documentation - -1. **Docstring Format** - ```python - def process_image(image_path: str, size: Tuple[int, int]) -> bytes: - """Process and resize an image. - - Args: - image_path: Path to the source image - size: Tuple of (width, height) for resizing - - Returns: - Processed image as bytes - """ - pass - ``` - -2. **Class Documentation** - ```python - class ImageProcessor: - """Handles image processing operations. - - Provides methods for resizing, converting, and optimizing images. - """ - ``` - -## Code Quality Checks - -### Linting and Type Checking - -Before finalizing a task, run: -```bash -make fix-unused-imports -make check -``` - -This runs multiple code quality tools: -- Pyright: Static type checking -- Ruff: Fast Python linter -- Mypy: Static type checker - -Always fix any issues reported by these tools before proceeding. - -### Running Tests - -1. **Quick Test Run** (no LLM/image generation): - ```bash - make tp - ``` - Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or ocr)) and not (needs_output or pipelex_api)` - -2. **Specific Tests**: - ```bash - make tp TEST=TestClassName - # or - make tp TEST=test_function_name - ``` - Note: Matches names starting with the provided string. - -**Important**: Never run `make ti`, `make test-inference`, `make to`, `make test-ocr`, `make tg`, or `make test-img-gen` - these use costly inference. - -## Pipelines - -- All pipeline definitions go in `pipelex/libraries/pipelines/` -- Always validate pipelines after creation/edit with `make validate`. - Iterate if there are errors. - -## Project Structure - -- **Pipelines**: `pipelex/libraries/pipelines/` -- **Tests**: `tests/` directory -- **Documentation**: `docs/` directory - ---- - -# Guide to write or edit pipelines using the Pipelex language in .plx files + +## Guide to write or edit pipelines using the Pipelex language in .plx files - Always first write your "plan" in natural langage, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) -## Pipeline File Naming +### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) - Files must be `.py` for structures - Use descriptive names in `snake_case` -## Pipeline File Structure +### Pipeline File Structure A pipeline file has three main sections: 1. Domain statement 2. Concept definitions 3. Pipe definitions -### Domain Statement +#### Domain Statement ```plx domain = "domain_name" description = "Description of the domain" # Optional ``` Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. -### Concept Definitions +#### Concept Definitions ```plx [concept] ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output @@ -162,9 +35,9 @@ Important Rules: - Avoid adjectives (no "LargeText", use "Text") - Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) yes -### Pipe Definitions +#### Pipe Definitions -## Pipe Base Structure +### Pipe Base Structure ```plx [pipe.your_pipe_name] @@ -207,14 +80,14 @@ inputs = { - `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: -## Structuring Models +### Structuring Models -### Model Location and Registration +#### Model Location and Registration - Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` - Models must inherit from `StructuredContent` or appropriate content type -## Model Structure +### Model Structure Concepts and their structure classes are meant to indicate an idea. A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. @@ -247,7 +120,7 @@ from pydantic import Field from pipelex.core.stuffs.structured_content import StructuredContent -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent class YourModel(StructuredContent): # Always be a subclass of StructuredContent # Required fields field1: str @@ -260,21 +133,21 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent # Date fields should remove timezone date_field: Optional[datetime] = None ``` -### Usage +#### Usage Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. -### Best Practices for structures +#### Best Practices for structures - Respect Pydantic v2 standards - Use type hints for all fields - Use `Field` declaration and write the description -## Pipe Controllers and Pipe Operators +### Pipe Controllers and Pipe Operators Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: @@ -290,11 +163,11 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ - `PipeImgGen` - Generate Images - `PipeFunc` - For running classic python scripts -## PipeSequence controller +### PipeSequence controller Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -### Basic Structure +#### Basic Structure ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -308,13 +181,13 @@ steps = [ ] ``` -### Key Components +#### Key Components 1. **Steps Array**: List of pipes to execute in sequence - `pipe`: Name of the pipe to execute - `result`: Name to assign to the pipe's output that will be in the working memory -### Using PipeBatch in Steps +#### Using PipeBatch in Steps You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: @@ -335,11 +208,11 @@ steps = [ The result of a batched step will be a `ListContent` containing the outputs from processing each item. -## PipeCondition controller +### PipeCondition controller The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. -### Basic usage +#### Basic usage ```plx [pipe.conditional_operation] @@ -348,8 +221,9 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" @@ -362,28 +236,45 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" ``` -### Key Parameters +#### Key Parameters - `expression`: Direct boolean or string expression (mutually exclusive with expression_template) - `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `pipe_map`: Dictionary mapping expression results to pipe codes : -1 - The key on the left (`small`, `medium`) is the result of `expression` or `expression_template`. -2 - The value on the right (`process_small`, `process_medium`, ..) is the name of the pipce to trigger +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found -## PipeLLM operator +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator PipeLLM is used to: 1. Generate text or objects with LLMs 2. Process images with Vision LLMs -### Basic Usage +#### Basic Usage Simple Text Generation: ```plx @@ -391,7 +282,7 @@ Simple Text Generation: type = "PipeLLM" description = "Write a short story" output = "Text" -prompt_template = """ +prompt = """ Write a short story about a programmer. """ ``` @@ -403,7 +294,7 @@ type = "PipeLLM" description = "Extract information" inputs = { text = "Text" } output = "PersonInfo" -prompt_template = """ +prompt = """ Extract person information from this text: @text """ @@ -416,10 +307,10 @@ type = "PipeLLM" description = "Expert analysis" output = "Analysis" system_prompt = "You are a data analysis expert" -prompt_template = "Analyze this data" +prompt = "Analyze this data" ``` -### Multiple Outputs +#### Multiple Outputs Generate multiple outputs (fixed number): ```plx @@ -439,19 +330,33 @@ output = "Idea" multiple_output = true # Let the LLM decide how many to generate ``` -### Vision +#### Vision -Process images with VLMs: +Process images with VLMs (image inputs must be tagged in the prompt): ```plx [pipe.analyze_image] type = "PipeLLM" description = "Analyze image" -inputs = { image = "Image" } # `image` is the name of the stuff that contains the Image. If its in an attribute within a stuff, you can add something like `{ "page.image": "Image" } +inputs = { image = "Image" } output = "ImageAnalysis" -prompt_template = "Describe what you see in this image" +prompt = """ +Describe what you see in this image: + +$image +""" ``` -### Writing prompts for PipeLLM +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM **Insert stuff inside a tagged block** @@ -459,7 +364,7 @@ If the inserted text is supposedly a long text, made of several lines or paragra Example template: ```plx -prompt_template = """ +prompt = """ Match the expense with its corresponding invoice: @expense @@ -467,7 +372,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt template. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -478,7 +383,7 @@ If the inserted text is short text and it makes sense to have it inserted direct Example template: ```plx -prompt_template = """ +prompt = """ Your goal is to summarize everything related to $topic in the provided text: @text @@ -495,11 +400,11 @@ DO NOT write "$topic" alone in an isolated line. DO write things like "Write an essay about $topic" to include text into an actual sentence. -## PipeExtract operator +### PipeExtract operator The PipeExtract operator is used to extract text and images from an image or a PDF -### Simple Text Extraction +#### Simple Text Extraction ```plx [pipe.extract_info] type = "PipeExtract" @@ -508,7 +413,17 @@ inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is output = "Page" ``` -Only one input is allowed and it must either be an `Image` or a `PDF`. +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. The output concept `Page` is a native concept, with the structure `PageContent`: It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` @@ -525,11 +440,11 @@ class PageContent(StructuredContent): # CONCEPT IS "Page" - `text_and_images` are the text, and the related images found in the input image or PDF. - `page_view` is the screenshot of the whole pdf page/image. -## PipeCompose operator +### PipeCompose operator The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. -### Basic Usage +#### Basic Usage Simple Template Composition: ```plx @@ -538,8 +453,8 @@ type = "PipeCompose" description = "Compose a report using template" inputs = { data = "ReportData" } output = "Text" -jinja2 = """ -# Report Summary +template = """ +## Report Summary Based on the analysis: $data @@ -555,7 +470,21 @@ type = "PipeCompose" description = "Use a predefined template" inputs = { content = "Text" } output = "Text" -jinja2_name = "standard_report_template" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } ``` CRM Email Template: @@ -567,7 +496,7 @@ inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } output = "Text" template_category = "html" templating_style = { tag_style = "square_brackets", text_format = "html" } -jinja2 = """ +template = """ Subject: Following up on our $deal.product_name discussion Hi $customer.first_name, @@ -597,25 +526,30 @@ $sales_rep.phone | $sales_rep.email """ ``` -### Key Parameters +#### Key Parameters -- `jinja2`: Inline Jinja2 template (mutually exclusive with jinja2_name) -- `jinja2_name`: Name of a predefined template (mutually exclusive with jinja2) +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) - `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) - `templating_style`: Styling options for template rendering - `extra_context`: Additional context variables for template -### Template Variables +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables Use the same variable insertion rules as PipeLLM: - `@variable` for block insertion (multi-line content) - `$variable` for inline insertion (short text) -## PipeImgGen operator +### PipeImgGen operator The PipeImgGen operator is used to generate images using AI image generation models. -### Basic Usage +#### Basic Usage Simple Image Generation: ```plx @@ -633,9 +567,9 @@ type = "PipeImgGen" description = "Generate a high-quality photo" inputs = { prompt = "ImgGenPrompt" } output = "Photo" -model = { model = "flux-pro/v1.1-ultra", quality = "hd" } +model = { model = "fast-img-gen" } aspect_ratio = "16:9" -nb_steps = 8 +quality = "hd" ``` Multiple Image Generation: @@ -665,14 +599,11 @@ is_raw = false safety_tolerance = 3 ``` -### Key Parameters +#### Key Parameters **Image Generation Settings:** -- `img_gen`: ImgGenChoice (preset name or inline settings) -- `img_gen_handle`: Direct model handle (legacy) +- `model`: Model choice (preset name or inline settings with model name) - `quality`: Image quality ("standard", "hd") -- `nb_steps`: Number of generation steps -- `guidance_scale`: How closely to follow the prompt **Output Configuration:** - `nb_output`: Number of images to generate @@ -686,7 +617,7 @@ safety_tolerance = 3 - `is_moderated`: Enable content moderation - `safety_tolerance`: Content safety level (1-6) -### Input Requirements +#### Input Requirements PipeImgGen requires exactly one input that must be either: - An `ImgGenPrompt` concept @@ -694,11 +625,11 @@ PipeImgGen requires exactly one input that must be either: The input can be named anything but must contain the prompt text for image generation. -## PipeFunc operator +### PipeFunc operator The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. -### Basic Usage +#### Basic Usage Simple Function Call: ```plx @@ -720,11 +651,11 @@ output = "FileContent" function_name = "read_file_content" ``` -### Key Parameters +#### Key Parameters - `function_name`: Name of the Python function to call (must be registered in func_registry) -### Function Requirements +#### Function Requirements The Python function must: @@ -741,7 +672,7 @@ The Python function must: - `list[StuffContent]`: Multiple content objects (becomes ListContent) - `str`: Simple string (becomes TextContent) -### Function Registration +#### Function Registration Functions must be registered in the function registry before use: @@ -760,7 +691,7 @@ async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: return MyResultContent(data=result) ``` -### Working Memory Access +#### Working Memory Access Inside the function, access pipeline inputs through working memory: @@ -779,9 +710,9 @@ async def process_function(working_memory: WorkingMemory) -> TextContent: --- -## Rules to choose LLM models used in PipeLLMs. +### Rules to choose LLM models used in PipeLLMs. -### LLM Configuration System +#### LLM Configuration System In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: @@ -790,7 +721,7 @@ LLM configurations are managed through the new inference backend system with fil - **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` - **Routing**: `.pipelex/inference/routing_profiles.toml` -### LLM Handles +#### LLM Handles An llm_handle can be either: 1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system @@ -806,9 +737,9 @@ base-mistral = "mistral-medium" The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. -### Using an LLM Handle in a PipeLLM +#### Using an LLM Handle in a PipeLLM -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: +Here is an example of using a model to specify which LLM to use in a PipeLLM: ```plx [pipe.hello_world] @@ -816,14 +747,14 @@ type = "PipeLLM" description = "Write text about Hello World." output = "Text" model = { model = "gpt-5", temperature = 0.9 } -prompt_template = """ +prompt = """ Write a haiku about Hello World. """ ``` As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). -### LLM Presets +#### LLM Presets Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. @@ -842,7 +773,7 @@ description = "Extract invoice information from an invoice text transcript" inputs = { invoice_text = "InvoiceText" } output = "Invoice" model = "llm_to_extract_invoice" -prompt_template = """ +prompt = """ Extract invoice information from this invoice: The category of this invoice is: $invoice_details.category. @@ -862,11 +793,9 @@ ALWAYS RUN `make validate` when you are finished writing pipelines: This checks Then, create an example file to run the pipeline in the `examples` folder. But don't write documentation unless asked explicitly to. ---- - -# Guide to write an example to execute a pipeline +## Guide to execute a pipeline and write example code -## Example to execute a pipeline with text output +### Example to execute a pipeline with text output ```python import asyncio @@ -888,14 +817,14 @@ async def hello_world() -> str: return pipe_output.main_stuff_as_str -# start Pipelex +## start Pipelex Pipelex.make() -# run sample using asyncio +## run sample using asyncio output_text = asyncio.run(hello_world()) pretty_print(output_text, title="Your first Pipelex output") ``` -## Example to execute a pipeline with structured output +### Example to execute a pipeline with structured output ```python import asyncio @@ -925,17 +854,17 @@ async def extract_gantt(image_url: str) -> GanttChart: return pipe_output.main_stuff_as(content_type=GanttChart) -# start Pipelex +## start Pipelex Pipelex.make() -# run sample using asyncio +## run sample using asyncio gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) pretty_print(gantt_chart, title="Gantt Chart") ``` -## Setting up the input memory +### Setting up the input memory -### Explanation of input memory +#### Explanation of input memory The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: ```python @@ -944,13 +873,13 @@ ImplicitMemory = Dict[str, StuffContentOrData] ``` As you can seen, we made it so different ways can be used to define that stuff using structured content or data. -### Different ways to set up the input memory +#### Different ways to set up the input memory So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: ```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. pipe_output = await execute_pipeline( pipe_code="master_advisory_orchestrator", input_memory={ @@ -958,9 +887,9 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: pipe_output = await execute_pipeline( pipe_code="power_extractor_dpe", input_memory={ @@ -968,8 +897,8 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: pipe_output = await execute_pipeline( pipe_code="fashion_variation_pipeline", input_memory={ @@ -977,9 +906,9 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: pipe_output = await execute_pipeline( pipe_code="extract_gantt_by_steps", input_memory={ @@ -990,7 +919,7 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here is a more complex example with multiple inputs assigned using different ways: +## Here is a more complex example with multiple inputs assigned using different ways: pipe_output = await execute_pipeline( pipe_code="retrieve_then_answer", dynamic_output_concept_code="contracts.Fees", @@ -1005,7 +934,7 @@ So here are a few concrete examples of calls to execute_pipeline with various wa ) ``` -## Using the outputs of a pipeline +### Using the outputs of a pipeline All pipe executions return a `PipeOutput` object. It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. @@ -1061,7 +990,7 @@ class PipeOutput(BaseModel): As you can see, you can extarct any variable from the output working memory. -### Getting the main stuff as a specific type +#### Getting the main stuff as a specific type Simple text as a string: @@ -1088,161 +1017,81 @@ result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) --- -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Use descriptive names that match the functionality being tested -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "extract: uses ocr models" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure +## Rules to choose LLM models used in PipeLLMs. -Always group the tests of a module into a test class: +### LLM Configuration System -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` -## Writing integration test to test pipes +### LLM Handles -### Required imports for pipe tests +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" ``` -### Pipe test implementation steps - -1. Create Stuff from blueprint: +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` +### Using an LLM Handle in a PipeLLM -2. Create Working Memory: +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" ``` -3. Run the pipe: +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` +### LLM Presets -4. Basic assertions: +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } ``` -### Test Data Organization +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: -- If it's not already there, create a `test_data.py` file in the test directory -- Define test cases using `StuffBlueprint`: +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) +The category of this invoice is: $invoice_details.category. - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] +@invoice_text +""" ``` -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. -## Best Practices for Testing -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose -- Log outputs for debugging -- Generate reports for cost tracking +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/AGENTS.md b/AGENTS.md index 6c6930a3e..0573dba49 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,156 +1,29 @@ -# Coding Standards & Best Practices - -This document outlines the core coding standards, best practices, and quality control procedures for the codebase. - -## Type Hints - -1. **Always Use Type Hints** - - - Every function parameter must be typed - - Every function return must be typed - - Use type hints for all variables where type is not obvious - - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] - - Use type hints for all fields - - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals - - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` - - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose - -2. **StrEnum** - - Import from `pipelex.types`: - ```python - from pipelex.types import StrEnum - ``` - -3. **Self type** - - Import from `pipelex.types`: - ```python - from pipelex.types import Self - ``` - -## Factory Pattern - - - Use Factory Pattern for object creation when dealing with multiple implementations - - Our factory methods are named `make_from_...` and such - -## Error Handling - - - Always catch exceptions at the place where you can add useful context to it. - - Use try/except blocks with specific exceptions - - Convert third-party exceptions to our custom ones - - Never catch Exception, only catch specific exceptions - - Always add `from exc` to the exception - - ```python - try: - self.models_manager.setup() - except RoutingProfileLibraryNotFoundError as exc: - msg = "The routing library could not be found, please call `pipelex init config` to create it" - raise PipelexSetupError(msg) from exc - ``` - - **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. - -## Documentation - -1. **Docstring Format** - ```python - def process_image(image_path: str, size: Tuple[int, int]) -> bytes: - """Process and resize an image. - - Args: - image_path: Path to the source image - size: Tuple of (width, height) for resizing - - Returns: - Processed image as bytes - """ - pass - ``` - -2. **Class Documentation** - ```python - class ImageProcessor: - """Handles image processing operations. - - Provides methods for resizing, converting, and optimizing images. - """ - ``` - -## Code Quality Checks - -### Linting and Type Checking - -Before finalizing a task, run: -```bash -make fix-unused-imports -make check -``` - -This runs multiple code quality tools: -- Pyright: Static type checking -- Ruff: Fast Python linter -- Mypy: Static type checker - -Always fix any issues reported by these tools before proceeding. - -### Running Tests - -1. **Quick Test Run** (no LLM/image generation): - ```bash - make tp - ``` - Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` - -2. **Specific Tests**: - ```bash - make tp TEST=TestClassName - # or - make tp TEST=test_function_name - ``` - Note: Matches names starting with the provided string. - -**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. - -## Pipelines - -- All pipeline definitions go in `pipelex/libraries/pipelines/` -- Always validate pipelines after creation/edit with `make validate`. - Iterate if there are errors. - -## Project Structure - -- **Pipelines**: `pipelex/libraries/pipelines/` -- **Tests**: `tests/` directory -- **Documentation**: `docs/` directory - ---- - -# Guide to write or edit pipelines using the Pipelex language in .plx files + +## Guide to write or edit pipelines using the Pipelex language in .plx files - Always first write your "plan" in natural langage, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) -## Pipeline File Naming +### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) - Files must be `.py` for structures - Use descriptive names in `snake_case` -## Pipeline File Structure +### Pipeline File Structure A pipeline file has three main sections: 1. Domain statement 2. Concept definitions 3. Pipe definitions -### Domain Statement +#### Domain Statement ```plx domain = "domain_name" description = "Description of the domain" # Optional ``` Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. -### Concept Definitions +#### Concept Definitions ```plx [concept] ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output @@ -162,9 +35,9 @@ Important Rules: - Avoid adjectives (no "LargeText", use "Text") - Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) yes -### Pipe Definitions +#### Pipe Definitions -## Pipe Base Structure +### Pipe Base Structure ```plx [pipe.your_pipe_name] @@ -207,14 +80,14 @@ inputs = { - `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: -## Structuring Models +### Structuring Models -### Model Location and Registration +#### Model Location and Registration - Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` - Models must inherit from `StructuredContent` or appropriate content type -## Model Structure +### Model Structure Concepts and their structure classes are meant to indicate an idea. A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. @@ -247,7 +120,7 @@ from pydantic import Field from pipelex.core.stuffs.structured_content import StructuredContent -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent class YourModel(StructuredContent): # Always be a subclass of StructuredContent # Required fields field1: str @@ -260,21 +133,21 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent # Date fields should remove timezone date_field: Optional[datetime] = None ``` -### Usage +#### Usage Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. -### Best Practices for structures +#### Best Practices for structures - Respect Pydantic v2 standards - Use type hints for all fields - Use `Field` declaration and write the description -## Pipe Controllers and Pipe Operators +### Pipe Controllers and Pipe Operators Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: @@ -290,11 +163,11 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ - `PipeImgGen` - Generate Images - `PipeFunc` - For running classic python scripts -## PipeSequence controller +### PipeSequence controller Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -### Basic Structure +#### Basic Structure ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -308,13 +181,13 @@ steps = [ ] ``` -### Key Components +#### Key Components 1. **Steps Array**: List of pipes to execute in sequence - `pipe`: Name of the pipe to execute - `result`: Name to assign to the pipe's output that will be in the working memory -### Using PipeBatch in Steps +#### Using PipeBatch in Steps You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: @@ -335,11 +208,11 @@ steps = [ The result of a batched step will be a `ListContent` containing the outputs from processing each item. -## PipeCondition controller +### PipeCondition controller The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. -### Basic usage +#### Basic usage ```plx [pipe.conditional_operation] @@ -371,7 +244,7 @@ medium = "process_medium" large = "process_large" ``` -### Key Parameters +#### Key Parameters - `expression`: Direct boolean or string expression (mutually exclusive with expression_template) - `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) @@ -395,13 +268,13 @@ approved = "process_approved" rejected = "process_rejected" ``` -## PipeLLM operator +### PipeLLM operator PipeLLM is used to: 1. Generate text or objects with LLMs 2. Process images with Vision LLMs -### Basic Usage +#### Basic Usage Simple Text Generation: ```plx @@ -437,7 +310,7 @@ system_prompt = "You are a data analysis expert" prompt = "Analyze this data" ``` -### Multiple Outputs +#### Multiple Outputs Generate multiple outputs (fixed number): ```plx @@ -457,7 +330,7 @@ output = "Idea" multiple_output = true # Let the LLM decide how many to generate ``` -### Vision +#### Vision Process images with VLMs (image inputs must be tagged in the prompt): ```plx @@ -483,7 +356,7 @@ output = "Analysis" prompt = "Analyze the colors in $photo and the shapes in $painting." ``` -### Writing prompts for PipeLLM +#### Writing prompts for PipeLLM **Insert stuff inside a tagged block** @@ -527,11 +400,11 @@ DO NOT write "$topic" alone in an isolated line. DO write things like "Write an essay about $topic" to include text into an actual sentence. -## PipeExtract operator +### PipeExtract operator The PipeExtract operator is used to extract text and images from an image or a PDF -### Simple Text Extraction +#### Simple Text Extraction ```plx [pipe.extract_info] type = "PipeExtract" @@ -567,11 +440,11 @@ class PageContent(StructuredContent): # CONCEPT IS "Page" - `text_and_images` are the text, and the related images found in the input image or PDF. - `page_view` is the screenshot of the whole pdf page/image. -## PipeCompose operator +### PipeCompose operator The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. -### Basic Usage +#### Basic Usage Simple Template Composition: ```plx @@ -581,7 +454,7 @@ description = "Compose a report using template" inputs = { data = "ReportData" } output = "Text" template = """ -# Report Summary +## Report Summary Based on the analysis: $data @@ -653,7 +526,7 @@ $sales_rep.phone | $sales_rep.email """ ``` -### Key Parameters +#### Key Parameters - `template`: Inline template string (mutually exclusive with template_name) - `template_name`: Name of a predefined template (mutually exclusive with template) @@ -666,17 +539,17 @@ For more control, you can use a nested `template` section instead of the `templa - `template.category`: Template type - `template.templating_style`: Styling options -### Template Variables +#### Template Variables Use the same variable insertion rules as PipeLLM: - `@variable` for block insertion (multi-line content) - `$variable` for inline insertion (short text) -## PipeImgGen operator +### PipeImgGen operator The PipeImgGen operator is used to generate images using AI image generation models. -### Basic Usage +#### Basic Usage Simple Image Generation: ```plx @@ -726,7 +599,7 @@ is_raw = false safety_tolerance = 3 ``` -### Key Parameters +#### Key Parameters **Image Generation Settings:** - `model`: Model choice (preset name or inline settings with model name) @@ -744,7 +617,7 @@ safety_tolerance = 3 - `is_moderated`: Enable content moderation - `safety_tolerance`: Content safety level (1-6) -### Input Requirements +#### Input Requirements PipeImgGen requires exactly one input that must be either: - An `ImgGenPrompt` concept @@ -752,11 +625,11 @@ PipeImgGen requires exactly one input that must be either: The input can be named anything but must contain the prompt text for image generation. -## PipeFunc operator +### PipeFunc operator The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. -### Basic Usage +#### Basic Usage Simple Function Call: ```plx @@ -778,11 +651,11 @@ output = "FileContent" function_name = "read_file_content" ``` -### Key Parameters +#### Key Parameters - `function_name`: Name of the Python function to call (must be registered in func_registry) -### Function Requirements +#### Function Requirements The Python function must: @@ -799,7 +672,7 @@ The Python function must: - `list[StuffContent]`: Multiple content objects (becomes ListContent) - `str`: Simple string (becomes TextContent) -### Function Registration +#### Function Registration Functions must be registered in the function registry before use: @@ -818,7 +691,7 @@ async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: return MyResultContent(data=result) ``` -### Working Memory Access +#### Working Memory Access Inside the function, access pipeline inputs through working memory: @@ -837,9 +710,9 @@ async def process_function(working_memory: WorkingMemory) -> TextContent: --- -## Rules to choose LLM models used in PipeLLMs. +### Rules to choose LLM models used in PipeLLMs. -### LLM Configuration System +#### LLM Configuration System In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: @@ -848,7 +721,7 @@ LLM configurations are managed through the new inference backend system with fil - **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` - **Routing**: `.pipelex/inference/routing_profiles.toml` -### LLM Handles +#### LLM Handles An llm_handle can be either: 1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system @@ -864,7 +737,7 @@ base-mistral = "mistral-medium" The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. -### Using an LLM Handle in a PipeLLM +#### Using an LLM Handle in a PipeLLM Here is an example of using a model to specify which LLM to use in a PipeLLM: @@ -881,7 +754,7 @@ Write a haiku about Hello World. As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). -### LLM Presets +#### LLM Presets Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. @@ -920,11 +793,9 @@ ALWAYS RUN `make validate` when you are finished writing pipelines: This checks Then, create an example file to run the pipeline in the `examples` folder. But don't write documentation unless asked explicitly to. ---- - -# Guide to write an example to execute a pipeline +## Guide to execute a pipeline and write example code -## Example to execute a pipeline with text output +### Example to execute a pipeline with text output ```python import asyncio @@ -946,14 +817,14 @@ async def hello_world() -> str: return pipe_output.main_stuff_as_str -# start Pipelex +## start Pipelex Pipelex.make() -# run sample using asyncio +## run sample using asyncio output_text = asyncio.run(hello_world()) pretty_print(output_text, title="Your first Pipelex output") ``` -## Example to execute a pipeline with structured output +### Example to execute a pipeline with structured output ```python import asyncio @@ -983,17 +854,17 @@ async def extract_gantt(image_url: str) -> GanttChart: return pipe_output.main_stuff_as(content_type=GanttChart) -# start Pipelex +## start Pipelex Pipelex.make() -# run sample using asyncio +## run sample using asyncio gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) pretty_print(gantt_chart, title="Gantt Chart") ``` -## Setting up the input memory +### Setting up the input memory -### Explanation of input memory +#### Explanation of input memory The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: ```python @@ -1002,13 +873,13 @@ ImplicitMemory = Dict[str, StuffContentOrData] ``` As you can seen, we made it so different ways can be used to define that stuff using structured content or data. -### Different ways to set up the input memory +#### Different ways to set up the input memory So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: ```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. pipe_output = await execute_pipeline( pipe_code="master_advisory_orchestrator", input_memory={ @@ -1016,9 +887,9 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: pipe_output = await execute_pipeline( pipe_code="power_extractor_dpe", input_memory={ @@ -1026,8 +897,8 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: pipe_output = await execute_pipeline( pipe_code="fashion_variation_pipeline", input_memory={ @@ -1035,9 +906,9 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: pipe_output = await execute_pipeline( pipe_code="extract_gantt_by_steps", input_memory={ @@ -1048,7 +919,7 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here is a more complex example with multiple inputs assigned using different ways: +## Here is a more complex example with multiple inputs assigned using different ways: pipe_output = await execute_pipeline( pipe_code="retrieve_then_answer", dynamic_output_concept_code="contracts.Fees", @@ -1063,7 +934,7 @@ So here are a few concrete examples of calls to execute_pipeline with various wa ) ``` -## Using the outputs of a pipeline +### Using the outputs of a pipeline All pipe executions return a `PipeOutput` object. It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. @@ -1119,7 +990,7 @@ class PipeOutput(BaseModel): As you can see, you can extarct any variable from the output working memory. -### Getting the main stuff as a specific type +#### Getting the main stuff as a specific type Simple text as a string: @@ -1146,161 +1017,81 @@ result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) --- -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Use descriptive names that match the functionality being tested -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "extract: uses text/image extraction from documents" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure +## Rules to choose LLM models used in PipeLLMs. -Always group the tests of a module into a test class: +### LLM Configuration System -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` -## Writing integration test to test pipes +### LLM Handles -### Required imports for pipe tests +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" ``` -### Pipe test implementation steps - -1. Create Stuff from blueprint: +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` +### Using an LLM Handle in a PipeLLM -2. Create Working Memory: +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" ``` -3. Run the pipe: +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` +### LLM Presets -4. Basic assertions: +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } ``` -### Test Data Organization +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: -- If it's not already there, create a `test_data.py` file in the test directory -- Define test cases using `StuffBlueprint`: +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) +The category of this invoice is: $invoice_details.category. - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] +@invoice_text +""" ``` -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. -## Best Practices for Testing -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose -- Log outputs for debugging -- Generate reports for cost tracking +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/CLAUDE.md b/CLAUDE.md index 7339ec32a..0573dba49 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,156 +1,29 @@ -# Coding Standards & Best Practices - -This document outlines the core coding standards, best practices, and quality control procedures for the codebase. - -## Type Hints - -1. **Always Use Type Hints** - - - Every function parameter must be typed - - Every function return must be typed - - Use type hints for all variables where type is not obvious - - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] - - Use type hints for all fields - - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals - - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` - - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose - -2. **StrEnum** - - Import from `pipelex.types`: - ```python - from pipelex.types import StrEnum - ``` - -3. **Self type** - - Import from `pipelex.types`: - ```python - from pipelex.types import Self - ``` - -## Factory Pattern - - - Use Factory Pattern for object creation when dealing with multiple implementations - - Our factory methods are named `make_from_...` and such - -## Error Handling - - - Always catch exceptions at the place where you can add useful context to it. - - Use try/except blocks with specific exceptions - - Convert third-party exceptions to our custom ones - - Never catch Exception, only catch specific exceptions - - Always add `from exc` to the exception - - ```python - try: - self.models_manager.setup() - except RoutingProfileLibraryNotFoundError as exc: - msg = "The routing library could not be found, please call `pipelex init config` to create it" - raise PipelexSetupError(msg) from exc - ``` - - **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. - -## Documentation - -1. **Docstring Format** - ```python - def process_image(image_path: str, size: Tuple[int, int]) -> bytes: - """Process and resize an image. - - Args: - image_path: Path to the source image - size: Tuple of (width, height) for resizing - - Returns: - Processed image as bytes - """ - pass - ``` - -2. **Class Documentation** - ```python - class ImageProcessor: - """Handles image processing operations. - - Provides methods for resizing, converting, and optimizing images. - """ - ``` - -## Code Quality Checks - -### Linting and Type Checking - -Before finalizing a task, run: -```bash -make fix-unused-imports -make check -``` - -This runs multiple code quality tools: -- Pyright: Static type checking -- Ruff: Fast Python linter -- Mypy: Static type checker - -Always fix any issues reported by these tools before proceeding. - -### Running Tests - -1. **Quick Test Run** (no LLM/image generation): - ```bash - make tp - ``` - Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or ocr)) and not (needs_output or pipelex_api)` - -2. **Specific Tests**: - ```bash - make tp TEST=TestClassName - # or - make tp TEST=test_function_name - ``` - Note: Matches names starting with the provided string. - -**Important**: Never run `make ti`, `make test-inference`, `make to`, `make test-ocr`, `make tg`, or `make test-img-gen` - these use costly inference. - -## Pipelines - -- All pipeline definitions go in `pipelex/libraries/pipelines/` -- Always validate pipelines after creation/edit with `make validate`. - Iterate if there are errors. - -## Project Structure - -- **Pipelines**: `pipelex/libraries/pipelines/` -- **Tests**: `tests/` directory -- **Documentation**: `docs/` directory - ---- - -# Guide to write or edit pipelines using the Pipelex language in .plx files + +## Guide to write or edit pipelines using the Pipelex language in .plx files - Always first write your "plan" in natural langage, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) -## Pipeline File Naming +### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) - Files must be `.py` for structures - Use descriptive names in `snake_case` -## Pipeline File Structure +### Pipeline File Structure A pipeline file has three main sections: 1. Domain statement 2. Concept definitions 3. Pipe definitions -### Domain Statement +#### Domain Statement ```plx domain = "domain_name" description = "Description of the domain" # Optional ``` Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. -### Concept Definitions +#### Concept Definitions ```plx [concept] ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output @@ -162,9 +35,9 @@ Important Rules: - Avoid adjectives (no "LargeText", use "Text") - Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) yes -### Pipe Definitions +#### Pipe Definitions -## Pipe Base Structure +### Pipe Base Structure ```plx [pipe.your_pipe_name] @@ -185,7 +58,7 @@ But it should be: ```plx [pipe.your_pipe_name] type = "PipeSequence" -description = "....." +description = "....." ``` The pipes will all have at least this base structure. @@ -207,14 +80,14 @@ inputs = { - `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: -## Structuring Models +### Structuring Models -### Model Location and Registration +#### Model Location and Registration - Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` - Models must inherit from `StructuredContent` or appropriate content type -## Model Structure +### Model Structure Concepts and their structure classes are meant to indicate an idea. A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. @@ -247,7 +120,7 @@ from pydantic import Field from pipelex.core.stuffs.structured_content import StructuredContent -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent class YourModel(StructuredContent): # Always be a subclass of StructuredContent # Required fields field1: str @@ -260,21 +133,21 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent # Date fields should remove timezone date_field: Optional[datetime] = None ``` -### Usage +#### Usage Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. -### Best Practices for structures +#### Best Practices for structures - Respect Pydantic v2 standards - Use type hints for all fields - Use `Field` declaration and write the description -## Pipe Controllers and Pipe Operators +### Pipe Controllers and Pipe Operators Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: @@ -290,11 +163,11 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ - `PipeImgGen` - Generate Images - `PipeFunc` - For running classic python scripts -## PipeSequence controller +### PipeSequence controller Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -### Basic Structure +#### Basic Structure ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -308,13 +181,13 @@ steps = [ ] ``` -### Key Components +#### Key Components 1. **Steps Array**: List of pipes to execute in sequence - `pipe`: Name of the pipe to execute - `result`: Name to assign to the pipe's output that will be in the working memory -### Using PipeBatch in Steps +#### Using PipeBatch in Steps You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: @@ -335,11 +208,11 @@ steps = [ The result of a batched step will be a `ListContent` containing the outputs from processing each item. -## PipeCondition controller +### PipeCondition controller The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. -### Basic usage +#### Basic usage ```plx [pipe.conditional_operation] @@ -348,8 +221,9 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" @@ -362,28 +236,45 @@ description = "A conditonal pipe to decide wheter..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" -[pipe.conditional_operation.pipe_map] +[pipe.conditional_operation.outcomes] small = "process_small" medium = "process_medium" large = "process_large" ``` -### Key Parameters +#### Key Parameters - `expression`: Direct boolean or string expression (mutually exclusive with expression_template) - `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `pipe_map`: Dictionary mapping expression results to pipe codes : -1 - The key on the left (`small`, `medium`) is the result of `expression` or `expression_template`. -2 - The value on the right (`process_small`, `process_medium`, ..) is the name of the pipce to trigger +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found -## PipeLLM operator +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator PipeLLM is used to: 1. Generate text or objects with LLMs 2. Process images with Vision LLMs -### Basic Usage +#### Basic Usage Simple Text Generation: ```plx @@ -391,7 +282,7 @@ Simple Text Generation: type = "PipeLLM" description = "Write a short story" output = "Text" -prompt_template = """ +prompt = """ Write a short story about a programmer. """ ``` @@ -403,7 +294,7 @@ type = "PipeLLM" description = "Extract information" inputs = { text = "Text" } output = "PersonInfo" -prompt_template = """ +prompt = """ Extract person information from this text: @text """ @@ -416,10 +307,10 @@ type = "PipeLLM" description = "Expert analysis" output = "Analysis" system_prompt = "You are a data analysis expert" -prompt_template = "Analyze this data" +prompt = "Analyze this data" ``` -### Multiple Outputs +#### Multiple Outputs Generate multiple outputs (fixed number): ```plx @@ -439,19 +330,33 @@ output = "Idea" multiple_output = true # Let the LLM decide how many to generate ``` -### Vision +#### Vision -Process images with VLMs: +Process images with VLMs (image inputs must be tagged in the prompt): ```plx [pipe.analyze_image] type = "PipeLLM" description = "Analyze image" -inputs = { image = "Image" } # `image` is the name of the stuff that contains the Image. If its in an attribute within a stuff, you can add something like `{ "page.image": "Image" } +inputs = { image = "Image" } output = "ImageAnalysis" -prompt_template = "Describe what you see in this image" +prompt = """ +Describe what you see in this image: + +$image +""" ``` -### Writing prompts for PipeLLM +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM **Insert stuff inside a tagged block** @@ -459,7 +364,7 @@ If the inserted text is supposedly a long text, made of several lines or paragra Example template: ```plx -prompt_template = """ +prompt = """ Match the expense with its corresponding invoice: @expense @@ -467,7 +372,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt template. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -478,7 +383,7 @@ If the inserted text is short text and it makes sense to have it inserted direct Example template: ```plx -prompt_template = """ +prompt = """ Your goal is to summarize everything related to $topic in the provided text: @text @@ -495,11 +400,11 @@ DO NOT write "$topic" alone in an isolated line. DO write things like "Write an essay about $topic" to include text into an actual sentence. -## PipeExtract operator +### PipeExtract operator The PipeExtract operator is used to extract text and images from an image or a PDF -### Simple Text Extraction +#### Simple Text Extraction ```plx [pipe.extract_info] type = "PipeExtract" @@ -508,7 +413,17 @@ inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is output = "Page" ``` -Only one input is allowed and it must either be an `Image` or a `PDF`. +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. The output concept `Page` is a native concept, with the structure `PageContent`: It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` @@ -525,11 +440,11 @@ class PageContent(StructuredContent): # CONCEPT IS "Page" - `text_and_images` are the text, and the related images found in the input image or PDF. - `page_view` is the screenshot of the whole pdf page/image. -## PipeCompose operator +### PipeCompose operator The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. -### Basic Usage +#### Basic Usage Simple Template Composition: ```plx @@ -538,8 +453,8 @@ type = "PipeCompose" description = "Compose a report using template" inputs = { data = "ReportData" } output = "Text" -jinja2 = """ -# Report Summary +template = """ +## Report Summary Based on the analysis: $data @@ -555,7 +470,21 @@ type = "PipeCompose" description = "Use a predefined template" inputs = { content = "Text" } output = "Text" -jinja2_name = "standard_report_template" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } ``` CRM Email Template: @@ -567,7 +496,7 @@ inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } output = "Text" template_category = "html" templating_style = { tag_style = "square_brackets", text_format = "html" } -jinja2 = """ +template = """ Subject: Following up on our $deal.product_name discussion Hi $customer.first_name, @@ -597,25 +526,30 @@ $sales_rep.phone | $sales_rep.email """ ``` -### Key Parameters +#### Key Parameters -- `jinja2`: Inline Jinja2 template (mutually exclusive with jinja2_name) -- `jinja2_name`: Name of a predefined template (mutually exclusive with jinja2) +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) - `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) - `templating_style`: Styling options for template rendering - `extra_context`: Additional context variables for template -### Template Variables +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables Use the same variable insertion rules as PipeLLM: - `@variable` for block insertion (multi-line content) - `$variable` for inline insertion (short text) -## PipeImgGen operator +### PipeImgGen operator The PipeImgGen operator is used to generate images using AI image generation models. -### Basic Usage +#### Basic Usage Simple Image Generation: ```plx @@ -624,7 +558,6 @@ type = "PipeImgGen" description = "Generate an image from prompt" inputs = { prompt = "ImgGenPrompt" } output = "Image" -model = "flux-pro/v1.1-ultra" ``` Using Image Generation Settings: @@ -634,9 +567,9 @@ type = "PipeImgGen" description = "Generate a high-quality photo" inputs = { prompt = "ImgGenPrompt" } output = "Photo" -model = { model = "flux-pro/v1.1-ultra", quality = "hd" } +model = { model = "fast-img-gen" } aspect_ratio = "16:9" -nb_steps = 8 +quality = "hd" ``` Multiple Image Generation: @@ -666,14 +599,11 @@ is_raw = false safety_tolerance = 3 ``` -### Key Parameters +#### Key Parameters **Image Generation Settings:** -- `img_gen`: ImgGenChoice (preset name or inline settings) -- `img_gen_handle`: Direct model handle (legacy) +- `model`: Model choice (preset name or inline settings with model name) - `quality`: Image quality ("standard", "hd") -- `nb_steps`: Number of generation steps -- `guidance_scale`: How closely to follow the prompt **Output Configuration:** - `nb_output`: Number of images to generate @@ -687,7 +617,7 @@ safety_tolerance = 3 - `is_moderated`: Enable content moderation - `safety_tolerance`: Content safety level (1-6) -### Input Requirements +#### Input Requirements PipeImgGen requires exactly one input that must be either: - An `ImgGenPrompt` concept @@ -695,11 +625,11 @@ PipeImgGen requires exactly one input that must be either: The input can be named anything but must contain the prompt text for image generation. -## PipeFunc operator +### PipeFunc operator The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. -### Basic Usage +#### Basic Usage Simple Function Call: ```plx @@ -721,11 +651,11 @@ output = "FileContent" function_name = "read_file_content" ``` -### Key Parameters +#### Key Parameters - `function_name`: Name of the Python function to call (must be registered in func_registry) -### Function Requirements +#### Function Requirements The Python function must: @@ -742,7 +672,7 @@ The Python function must: - `list[StuffContent]`: Multiple content objects (becomes ListContent) - `str`: Simple string (becomes TextContent) -### Function Registration +#### Function Registration Functions must be registered in the function registry before use: @@ -761,7 +691,7 @@ async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: return MyResultContent(data=result) ``` -### Working Memory Access +#### Working Memory Access Inside the function, access pipeline inputs through working memory: @@ -780,9 +710,9 @@ async def process_function(working_memory: WorkingMemory) -> TextContent: --- -## Rules to choose LLM models used in PipeLLMs. +### Rules to choose LLM models used in PipeLLMs. -### LLM Configuration System +#### LLM Configuration System In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: @@ -791,7 +721,7 @@ LLM configurations are managed through the new inference backend system with fil - **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` - **Routing**: `.pipelex/inference/routing_profiles.toml` -### LLM Handles +#### LLM Handles An llm_handle can be either: 1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system @@ -807,9 +737,9 @@ base-mistral = "mistral-medium" The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. -### Using an LLM Handle in a PipeLLM +#### Using an LLM Handle in a PipeLLM -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: +Here is an example of using a model to specify which LLM to use in a PipeLLM: ```plx [pipe.hello_world] @@ -817,14 +747,14 @@ type = "PipeLLM" description = "Write text about Hello World." output = "Text" model = { model = "gpt-5", temperature = 0.9 } -prompt_template = """ +prompt = """ Write a haiku about Hello World. """ ``` As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). -### LLM Presets +#### LLM Presets Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. @@ -843,7 +773,7 @@ description = "Extract invoice information from an invoice text transcript" inputs = { invoice_text = "InvoiceText" } output = "Invoice" model = "llm_to_extract_invoice" -prompt_template = """ +prompt = """ Extract invoice information from this invoice: The category of this invoice is: $invoice_details.category. @@ -863,11 +793,9 @@ ALWAYS RUN `make validate` when you are finished writing pipelines: This checks Then, create an example file to run the pipeline in the `examples` folder. But don't write documentation unless asked explicitly to. ---- - -# Guide to write an example to execute a pipeline +## Guide to execute a pipeline and write example code -## Example to execute a pipeline with text output +### Example to execute a pipeline with text output ```python import asyncio @@ -889,14 +817,14 @@ async def hello_world() -> str: return pipe_output.main_stuff_as_str -# start Pipelex +## start Pipelex Pipelex.make() -# run sample using asyncio +## run sample using asyncio output_text = asyncio.run(hello_world()) pretty_print(output_text, title="Your first Pipelex output") ``` -## Example to execute a pipeline with structured output +### Example to execute a pipeline with structured output ```python import asyncio @@ -926,17 +854,17 @@ async def extract_gantt(image_url: str) -> GanttChart: return pipe_output.main_stuff_as(content_type=GanttChart) -# start Pipelex +## start Pipelex Pipelex.make() -# run sample using asyncio +## run sample using asyncio gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) pretty_print(gantt_chart, title="Gantt Chart") ``` -## Setting up the input memory +### Setting up the input memory -### Explanation of input memory +#### Explanation of input memory The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: ```python @@ -945,13 +873,13 @@ ImplicitMemory = Dict[str, StuffContentOrData] ``` As you can seen, we made it so different ways can be used to define that stuff using structured content or data. -### Different ways to set up the input memory +#### Different ways to set up the input memory So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: ```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. pipe_output = await execute_pipeline( pipe_code="master_advisory_orchestrator", input_memory={ @@ -959,9 +887,9 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: pipe_output = await execute_pipeline( pipe_code="power_extractor_dpe", input_memory={ @@ -969,8 +897,8 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: pipe_output = await execute_pipeline( pipe_code="fashion_variation_pipeline", input_memory={ @@ -978,9 +906,9 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: pipe_output = await execute_pipeline( pipe_code="extract_gantt_by_steps", input_memory={ @@ -991,7 +919,7 @@ So here are a few concrete examples of calls to execute_pipeline with various wa }, ) -# Here is a more complex example with multiple inputs assigned using different ways: +## Here is a more complex example with multiple inputs assigned using different ways: pipe_output = await execute_pipeline( pipe_code="retrieve_then_answer", dynamic_output_concept_code="contracts.Fees", @@ -1006,7 +934,7 @@ So here are a few concrete examples of calls to execute_pipeline with various wa ) ``` -## Using the outputs of a pipeline +### Using the outputs of a pipeline All pipe executions return a `PipeOutput` object. It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. @@ -1062,7 +990,7 @@ class PipeOutput(BaseModel): As you can see, you can extarct any variable from the output working memory. -### Getting the main stuff as a specific type +#### Getting the main stuff as a specific type Simple text as a string: @@ -1089,160 +1017,81 @@ result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) --- -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Use descriptive names that match the functionality being tested -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure +## Rules to choose LLM models used in PipeLLMs. -Always group the tests of a module into a test class: +### LLM Configuration System -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` -## Writing integration test to test pipes +### LLM Handles -### Required imports for pipe tests +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" ``` -### Pipe test implementation steps - -1. Create Stuff from blueprint: +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` +### Using an LLM Handle in a PipeLLM -2. Create Working Memory: +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" ``` -3. Run the pipe: +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` +### LLM Presets -4. Basic assertions: +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } ``` -### Test Data Organization +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: -- If it's not already there, create a `test_data.py` file in the test directory -- Define test cases using `StuffBlueprint`: +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) +The category of this invoice is: $invoice_details.category. - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] +@invoice_text +""" ``` -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. -## Best Practices for Testing -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose -- Log outputs for debugging -- Generate reports for cost tracking +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/README.md b/README.md index 5d57c07b6..fabf80da6 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ description = "Extract buyer from purchase document" inputs = { purchase_document_text = "PurchaseDocumentText" } output = "Buyer" model = "llm_to_extract_info" -prompt_template = """ +prompt = """ Extract the first and last name of the buyer from this purchase document: @purchase_document_text """ @@ -257,7 +257,7 @@ system_prompt = """ You are an expert in social media optimization, particularly for tech content on Twitter/X. Your role is to analyze tech tweets and check if they display typical startup communication pitfalls. """ -prompt_template = """ +prompt = """ Evaluate the tweet for these key issues: **Fluffiness** - Overuse of buzzwords without concrete meaning (e.g., "synergizing disruptive paradigms") @@ -286,7 +286,7 @@ system_prompt = """ You are an expert in writing engaging tech tweets that drive meaningful discussions and engagement. Your goal is to rewrite tweets to be impactful and avoid the pitfalls identified in the analysis. """ -prompt_template = """ +prompt = """ Rewrite this tech tweet to be more engaging and effective, based on the analysis: Original tweet: diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/design_and_run_pipelines.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/design_and_run_pipelines.md index 265e5b4df..49f14c4fa 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/design_and_run_pipelines.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/design_and_run_pipelines.md @@ -40,7 +40,7 @@ type = "PipeLLM" description = "Generate a catchy tagline for a product" inputs = { description = "ProductDescription" } output = "Tagline" -prompt_template = """ +prompt = """ Product Description: @description @@ -76,7 +76,7 @@ description = "Extract keywords from a product description" inputs = { description = "ProductDescription" } output = "Keyword" multiple_output = true -prompt_template = """ +prompt = """ Please extract the most relevant keywords from the following product description: @description @@ -89,7 +89,7 @@ type = "PipeLLM" description = "Generate a tagline from keywords" inputs = { keywords = "Keyword" } output = "Tagline" -prompt_template = """ +prompt = """ Here are the key product keywords: @keywords diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md index 26a39347f..aad64e211 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md @@ -21,7 +21,7 @@ type = "PipeLLM" description = "Answer a question" inputs = { question = "tutorial.Question" } output = "tutorial.Answer" -prompt_template = """ +prompt = """ Please answer the following question: @question diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-controllers/PipeCondition.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-controllers/PipeCondition.md index e71addcf0..ece06b821 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-controllers/PipeCondition.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-controllers/PipeCondition.md @@ -61,7 +61,7 @@ large = "process_large" type = "PipeLLM" description = "Handle small category" output = "native.Text" -prompt_template = """ +prompt = """ Output this only: "small" """ @@ -69,7 +69,7 @@ Output this only: "small" type = "PipeLLM" description = "Handle medium category" output = "native.Text" -prompt_template = """ +prompt = """ Output this only: "medium" """ @@ -77,7 +77,7 @@ Output this only: "medium" type = "PipeLLM" description = "Handle large category" output = "native.Text" -prompt_template = """ +prompt = """ Output this only: "large" """ ``` @@ -109,7 +109,7 @@ type = "PipeLLM" description = "Process invoice documents" inputs = { classification = "DocumentType" } output = "ProcessedDocument" -prompt_template = """ +prompt = """ Process this invoice document... """ @@ -118,7 +118,7 @@ type = "PipeLLM" description = "Process receipt documents" inputs = { classification = "DocumentType" } output = "ProcessedDocument" -prompt_template = """ +prompt = """ Process this receipt document... """ @@ -127,7 +127,7 @@ type = "PipeLLM" description = "Handle unknown document types" inputs = { classification = "DocumentType" } output = "ProcessedDocument" -prompt_template = """ +prompt = """ Process this unknown document type... """ ``` diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md index e02a41e93..08f0e50dd 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md @@ -27,7 +27,7 @@ type = "PipeLLM" description = "Describe an image" inputs = { image = "Image" } output = "VisualDescription" -prompt_template = """ +prompt = """ Describe the provided image in great detail. """ ``` @@ -44,7 +44,7 @@ type = "PipeLLM" description = "Analyze wedding photo" inputs = { wedding_photo = "images.Photo" } output = "PhotoAnalysis" -prompt_template = """ +prompt = """ Analyze this wedding photo and describe the key moments captured. """ ``` @@ -59,7 +59,7 @@ type = "PipeLLM" description = "Analyze the visual layout of a page" inputs = { "page_content.page_view" = "Image" } output = "LayoutAnalysis" -prompt_template = """ +prompt = """ Analyze the visual layout and design elements of this page. Focus on typography, spacing, and overall composition. """ @@ -83,7 +83,7 @@ inputs = { second_image = "Image" } output = "ImageComparison" -prompt_template = """ +prompt = """ Compare these two images and describe their similarities and differences. """ ``` @@ -101,7 +101,7 @@ inputs = { document.page_view = "Image" } output = "DocumentAnalysis" -prompt_template = """ +prompt = """ Given this context: $context Analyze the document page shown in the image and explain how it relates to the provided context. @@ -167,7 +167,7 @@ type = "PipeLLM" description = "Summarize a text" inputs = { text = "TextToSummarize" } output = "TextSummary" -prompt_template = """ +prompt = """ Please provide a concise summary of the following text: @text @@ -186,7 +186,7 @@ type = "PipeLLM" description = "Extract table data from an image" inputs = { image = "TableScreenshot" } output = "TableData" -prompt_template = """ +prompt = """ Extract the table data from this image and format it as a structured table. """ ``` @@ -204,7 +204,7 @@ type = "PipeLLM" description = "Process an expense report" inputs = { report = "ExpenseReport" } output = "ProcessedExpenseReport" -prompt_template = """ +prompt = """ Analyze this expense report and extract the following information: - Total amount - Date diff --git a/docs/pages/cookbook-examples/extract-dpe.md b/docs/pages/cookbook-examples/extract-dpe.md index b185974c5..297670c5f 100644 --- a/docs/pages/cookbook-examples/extract-dpe.md +++ b/docs/pages/cookbook-examples/extract-dpe.md @@ -67,7 +67,7 @@ output = "Dpe" # The output is structured as a Dpe object model = "llm_for_img_to_text" structuring_method = "preliminary_text" system_prompt = """You are a multimodal LLM, expert in converting images into perfect markdown.""" -prompt_template = """ +prompt = """ You are given an image of a French 'Diagnostic de Performance Energetique'. Your role is to convert the image into perfect markdown. diff --git a/docs/pages/cookbook-examples/extract-gantt.md b/docs/pages/cookbook-examples/extract-gantt.md index bb29bdbc1..b18b03856 100644 --- a/docs/pages/cookbook-examples/extract-gantt.md +++ b/docs/pages/cookbook-examples/extract-gantt.md @@ -82,7 +82,7 @@ inputs = { gantt_chart_image = "GanttChartImage", gantt_timescale = "GanttTimesc output = "GanttTaskDetails" # The output is structured as a GanttTaskDetails object structuring_method = "preliminary_text" model = "llm_to_extract_diagram" -prompt_template = """ +prompt = """ I am sharing an image of a Gantt chart. Please analyse the image and for a given task name (and only this task), extract the information of the task, if relevant. diff --git a/docs/pages/cookbook-examples/extract-proof-of-purchase.md b/docs/pages/cookbook-examples/extract-proof-of-purchase.md index f17a75672..9cf496d27 100644 --- a/docs/pages/cookbook-examples/extract-proof-of-purchase.md +++ b/docs/pages/cookbook-examples/extract-proof-of-purchase.md @@ -63,7 +63,7 @@ output = "ProofOfPurchase" # The LLM is forced to output a ProofOfPurchase objec model = "llm_for_img_to_text" structuring_method = "preliminary_text" system_prompt = """You are a multimodal LLM, expert at converting images into perfect markdown.""" -prompt_template = """ +prompt = """ You are given an image of a proof of purchase. Your role is to convert the image into perfect markdown. diff --git a/docs/pages/cookbook-examples/extract-table.md b/docs/pages/cookbook-examples/extract-table.md index 4243ab8f1..f490619ab 100644 --- a/docs/pages/cookbook-examples/extract-table.md +++ b/docs/pages/cookbook-examples/extract-table.md @@ -77,7 +77,7 @@ type = "PipeLLM" description = "Review an HTML table" inputs = { table_screenshot = "TableScreenshot", html_table = "HtmlTable" } output = "HtmlTable" -prompt_template = """ +prompt = """ Your role is to correct an html_table to make sure that it matches the one in the provided image. @html_table diff --git a/docs/pages/cookbook-examples/invoice-extractor.md b/docs/pages/cookbook-examples/invoice-extractor.md index 0aacf4bbb..5f3c8d8f7 100644 --- a/docs/pages/cookbook-examples/invoice-extractor.md +++ b/docs/pages/cookbook-examples/invoice-extractor.md @@ -83,7 +83,7 @@ inputs = { "invoice_page.page_view" = "Page", invoice_details = "InvoiceDetails" output = "Invoice" # The output is constrained to the "Invoice" model model = "llm_to_extract_invoice" -prompt_template = """ +prompt = """ Extract invoice information from this invoice: The category of this invoice is: $invoice_details.category. diff --git a/docs/pages/cookbook-examples/write-tweet.md b/docs/pages/cookbook-examples/write-tweet.md index 067c3449a..4e8bdd6d5 100644 --- a/docs/pages/cookbook-examples/write-tweet.md +++ b/docs/pages/cookbook-examples/write-tweet.md @@ -82,7 +82,7 @@ system_prompt = """ You are an expert in social media optimization, particularly for tech content on Twitter/X. Your role is to analyze tech tweets and check if they display typical startup communication pitfalls. """ -prompt_template = """ +prompt = """ Evaluate the tweet for these key issues: **Fluffiness** - Overuse of buzzwords without concrete meaning... diff --git a/docs/pages/quick-start/index.md b/docs/pages/quick-start/index.md index f913699ea..062189cec 100644 --- a/docs/pages/quick-start/index.md +++ b/docs/pages/quick-start/index.md @@ -84,7 +84,7 @@ domain = "characters" type = "PipeLLM" description = "Creates a character." output = "Text" -prompt_template = """You are a book writer. Your task is to create a character. +prompt = """You are a book writer. Your task is to create a character. Think of it and then output the character description.""" ``` @@ -131,7 +131,7 @@ type = "PipeLLM" description = "Create a character." output = "Text" model = { model = "gpt-4o-mini", temperature = 0.9, max_tokens = "auto" } -prompt_template = """You are a book writer. Your task is to create a character. +prompt = """You are a book writer. Your task is to create a character. Think of it and then output the character description.""" ``` @@ -143,7 +143,7 @@ type = "PipeLLM" description = "Create a character." output = "Text" model = "llm_for_creative_writing" -prompt_template = """You are a book writer. Your task is to create a character. +prompt = """You are a book writer. Your task is to create a character. Think of it and then output the character description.""" # The llm deck above is defined in `.pipelex/inference/deck/base_deck.toml` as: @@ -195,7 +195,7 @@ Character = "A character is a fiction story" # <- Define here your output concep type = "PipeLLM" description = "Create a character. Get a structured result." output = "Character" # <- This is the output concept for your pipe -prompt_template = """You are a book writer. Your task is to create a character. +prompt = """You are a book writer. Your task is to create a character. Think of it and then output the character description.""" ``` @@ -253,7 +253,7 @@ type = "PipeLLM" description = "Get character information from a description." inputs = { character = "Character" } # <- These are the inputs of your pipe, usable in the prompt_template output = "CharacterMetadata" -prompt_template = """ +prompt = """ You are given a text description of a character. Your task is to extract specific data from the following description. diff --git a/pipelex/kit/agents/llms.md b/pipelex/kit/agents/llms.md index e10ee10f2..f79c234bf 100644 --- a/pipelex/kit/agents/llms.md +++ b/pipelex/kit/agents/llms.md @@ -35,7 +35,7 @@ type = "PipeLLM" description = "Write text about Hello World." output = "Text" model = { model = "gpt-5", temperature = 0.9 } -prompt_template = """ +prompt = """ Write a haiku about Hello World. """ ``` @@ -61,7 +61,7 @@ description = "Extract invoice information from an invoice text transcript" inputs = { invoice_text = "InvoiceText" } output = "Invoice" model = "llm_to_extract_invoice" -prompt_template = """ +prompt = """ Extract invoice information from this invoice: The category of this invoice is: $invoice_details.category. diff --git a/pipelex/kit/index.toml b/pipelex/kit/index.toml index 585f5f6d2..cd75bd3d7 100644 --- a/pipelex/kit/index.toml +++ b/pipelex/kit/index.toml @@ -16,12 +16,12 @@ coding_standards = [ pipelex_language = ["write_pipelex.md", "run_pipelex.md", "llms.md"] all = [ "python_standards.md", - "pytest_standards.md", - "docs.md", - "tdd.md", "write_pipelex.md", "run_pipelex.md", + "docs.md", "llms.md", + "pytest_standards.md", + "tdd.md", ] [cursor.front_matter] @@ -56,17 +56,41 @@ description = "LLM configuration and usage guidelines" globs = ["*.plx", "*.toml"] [[targets]] -id = "agents_md" +id = "agents" path = "AGENTS.md" strategy = "merge" -marker_begin = "" -marker_end = "" +marker_begin = "" +marker_end = "" parent = "# Coding Standards & Best Practices" [[targets]] -id = "claude_md" +id = "claude" path = "CLAUDE.md" strategy = "merge" -marker_begin = "" -marker_end = "" +marker_begin = "" +marker_end = "" +parent = "# Coding Standards & Best Practices" + +[[targets]] +id = "github_copilot" +path = ".github/copilot-instructions.md" +strategy = "merge" +marker_begin = "" +marker_end = "" +parent = "# Coding Standards & Best Practices" + +[[targets]] +id = "windsurf" +path = ".windsurfrules.md" +strategy = "merge" +marker_begin = "" +marker_end = "" +parent = "# Coding Standards & Best Practices" + +[[targets]] +id = "blackbox" +path = "BLACKBOX_RULES.md" +strategy = "merge" +marker_begin = "" +marker_end = "" parent = "# Coding Standards & Best Practices" diff --git a/tests/data/test_migrate_v0_1_0_to_v0_2_0.toml b/tests/data/test_migrate_v0_1_0_to_v0_2_0.toml deleted file mode 100644 index 29640d8bc..000000000 --- a/tests/data/test_migrate_v0_1_0_to_v0_2_0.toml +++ /dev/null @@ -1,31 +0,0 @@ -domain = "test" -description = "Test domain" - -[concept] -SimpleText = "A simple text concept" -SimpleDoc = "A simple document concept" - -# Complex concepts with old syntax -[concept.ComplexConcept] -Concept = "A complex concept with old syntax" -refines = "Text" - -[concept.AnotherConcept] -Concept = "Another concept to test migration" -structure = "CustomStructure" -refines = "Text" - -[concept.AlreadyMigrated] -description = "This one is already migrated" -refines = "Text" - -[pipe.test_pipe] -PipeLLM = "A test pipe that generates simple text" -inputs = { input_text = "SimpleText" } -output = "SimpleDoc" -prompt_template = """ -Transform this text: -@input_text - -Concept = "This is not a definition of concept, it's just text in the prompt template, so it should not be migrated" -""" From 7d67a819ee2bac1055a5fd2fc3f2ddcb7b6f6ff8 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 17:48:41 +0200 Subject: [PATCH 035/115] Simpler heading 1 --- .windsurfrules.md | 1097 +++++++++++++++++++++++++++++++++ BLACKBOX_RULES.md | 1097 +++++++++++++++++++++++++++++++++ pipelex/kit/index.toml | 27 +- pipelex/kit/index_models.py | 5 +- pipelex/kit/targets_update.py | 54 +- 5 files changed, 2230 insertions(+), 50 deletions(-) create mode 100644 .windsurfrules.md create mode 100644 BLACKBOX_RULES.md diff --git a/.windsurfrules.md b/.windsurfrules.md new file mode 100644 index 000000000..0573dba49 --- /dev/null +++ b/.windsurfrules.md @@ -0,0 +1,1097 @@ + +## Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +### Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for structures +- Use descriptive names in `snake_case` + +### Pipeline File Structure +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +#### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +#### Concept Definitions +```plx +[concept] +ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output +``` + +Important Rules: +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") +- Avoid adjectives (no "LargeText", use "Text") +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) +yes +#### Pipe Definitions + +### Pipe Base Structure + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +DO NOT WRITE: +```plx +[pipe.your_pipe_name] +type = "pipe_sequence" +``` + +But it should be: + +```plx +[pipe.your_pipe_name] +type = "PipeSequence" +description = "....." +``` + +The pipes will all have at least this base structure. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +### Structuring Models + +#### Model Location and Registration + +- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Models must inherit from `StructuredContent` or appropriate content type + +### Model Structure + +Concepts and their structure classes are meant to indicate an idea. +A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. + +**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** + +DO NOT create structures like: +```python +class Joke(TextContent): + """A humorous text that makes people laugh.""" + pass +``` + +If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: +```plx +[concept] +Joke = "A humorous text that makes people laugh." +``` +If you simply need to refine another native concept, construct it like this: +```plx +[concept.Landscape] +refines = "Image" +``` +Only create a Python structure class when you need to add specific fields: + +```python +from datetime import datetime +from typing import List, Optional +from pydantic import Field + +from pipelex.core.stuffs.structured_content import StructuredContent + +## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +class YourModel(StructuredContent): # Always be a subclass of StructuredContent + # Required fields + field1: str + field2: int + + # Optional fields with defaults + field3: Optional[str] = Field(None, "Description of field3") + field4: List[str] = Field(default_factory=list) + + # Date fields should remove timezone + date_field: Optional[datetime] = None +``` +#### Usage + +Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. + +Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. + + +#### Best Practices for structures + +- Respect Pydantic v2 standards +- Use type hints for all fields +- Use `Field` declaration and write the description + + +### Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +### PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +#### Basic Structure +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +#### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +#### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +### PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +#### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +#### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +#### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +#### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +#### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +### PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +#### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: Optional[TextContent] + images: Optional[List[ImageContent]] + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: Optional[ImageContent] = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +### PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +#### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +## Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +#### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +### PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +#### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +#### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +#### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +### PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +#### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +#### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +#### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +#### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +#### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +### Rules to choose LLM models used in PipeLLMs. + +#### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +#### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +#### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +#### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +## Guide to execute a pipeline and write example code + +### Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +## start Pipelex +Pipelex.make() +## run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +### Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + +from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +## start Pipelex +Pipelex.make() + +## run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +### Setting up the input memory + +#### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str +ImplicitMemory = Dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +#### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +## Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +### Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +#### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/BLACKBOX_RULES.md b/BLACKBOX_RULES.md new file mode 100644 index 000000000..0573dba49 --- /dev/null +++ b/BLACKBOX_RULES.md @@ -0,0 +1,1097 @@ + +## Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +### Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for structures +- Use descriptive names in `snake_case` + +### Pipeline File Structure +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +#### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +#### Concept Definitions +```plx +[concept] +ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output +``` + +Important Rules: +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") +- Avoid adjectives (no "LargeText", use "Text") +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) +yes +#### Pipe Definitions + +### Pipe Base Structure + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +DO NOT WRITE: +```plx +[pipe.your_pipe_name] +type = "pipe_sequence" +``` + +But it should be: + +```plx +[pipe.your_pipe_name] +type = "PipeSequence" +description = "....." +``` + +The pipes will all have at least this base structure. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +### Structuring Models + +#### Model Location and Registration + +- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Models must inherit from `StructuredContent` or appropriate content type + +### Model Structure + +Concepts and their structure classes are meant to indicate an idea. +A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. + +**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** + +DO NOT create structures like: +```python +class Joke(TextContent): + """A humorous text that makes people laugh.""" + pass +``` + +If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: +```plx +[concept] +Joke = "A humorous text that makes people laugh." +``` +If you simply need to refine another native concept, construct it like this: +```plx +[concept.Landscape] +refines = "Image" +``` +Only create a Python structure class when you need to add specific fields: + +```python +from datetime import datetime +from typing import List, Optional +from pydantic import Field + +from pipelex.core.stuffs.structured_content import StructuredContent + +## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent +class YourModel(StructuredContent): # Always be a subclass of StructuredContent + # Required fields + field1: str + field2: int + + # Optional fields with defaults + field3: Optional[str] = Field(None, "Description of field3") + field4: List[str] = Field(default_factory=list) + + # Date fields should remove timezone + date_field: Optional[datetime] = None +``` +#### Usage + +Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. + +Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. + + +#### Best Practices for structures + +- Respect Pydantic v2 standards +- Use type hints for all fields +- Use `Field` declaration and write the description + + +### Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +### PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +#### Basic Structure +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +#### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +#### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +### PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +#### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +#### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +#### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +#### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +#### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +### PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +#### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: Optional[TextContent] + images: Optional[List[ImageContent]] + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: Optional[ImageContent] = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +### PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +#### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +## Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +#### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +### PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +#### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +#### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +#### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +### PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +#### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +#### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +#### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +#### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +#### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +### Rules to choose LLM models used in PipeLLMs. + +#### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +#### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +#### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +#### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +## Guide to execute a pipeline and write example code + +### Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +## start Pipelex +Pipelex.make() +## run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +### Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + +from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +## start Pipelex +Pipelex.make() + +## run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +### Setting up the input memory + +#### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str +ImplicitMemory = Dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +#### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +## Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +### Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +#### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/pipelex/kit/index.toml b/pipelex/kit/index.toml index cd75bd3d7..ed7b9a2ca 100644 --- a/pipelex/kit/index.toml +++ b/pipelex/kit/index.toml @@ -1,6 +1,6 @@ [meta] version = "1.0.0" -description = "Pipelex kit configuration for agent documentation and cursor rules" +description = "Pipelex kit configuration for agent rules" [agents] demote = 1 @@ -55,42 +55,37 @@ globs = ["examples/**/*.py"] description = "LLM configuration and usage guidelines" globs = ["*.plx", "*.toml"] -[[targets]] -id = "agents" +[targets.agents] path = "AGENTS.md" strategy = "merge" marker_begin = "" marker_end = "" -parent = "# Coding Standards & Best Practices" +heading_1 = "# Pipelex Rules" -[[targets]] -id = "claude" +[targets.claude] path = "CLAUDE.md" strategy = "merge" marker_begin = "" marker_end = "" -parent = "# Coding Standards & Best Practices" +heading_1 = "# Pipelex Rules" -[[targets]] -id = "github_copilot" +[targets.github_copilot] path = ".github/copilot-instructions.md" strategy = "merge" marker_begin = "" marker_end = "" -parent = "# Coding Standards & Best Practices" +heading_1 = "# Pipelex Rules" -[[targets]] -id = "windsurf" +[targets.windsurf] path = ".windsurfrules.md" strategy = "merge" marker_begin = "" marker_end = "" -parent = "# Coding Standards & Best Practices" +heading_1 = "# Pipelex Rules" -[[targets]] -id = "blackbox" +[targets.blackbox] path = "BLACKBOX_RULES.md" strategy = "merge" marker_begin = "" marker_end = "" -parent = "# Coding Standards & Best Practices" +heading_1 = "# Pipelex Rules" diff --git a/pipelex/kit/index_models.py b/pipelex/kit/index_models.py index 3f297dec9..be1730e4b 100644 --- a/pipelex/kit/index_models.py +++ b/pipelex/kit/index_models.py @@ -29,12 +29,11 @@ class CursorSpec(BaseModel): class Target(BaseModel): """Configuration for a single-file merge target.""" - id: str = Field(description="Unique identifier for this target") path: str = Field(description="Path to the target file relative to repo root") strategy: str = Field(description="Merge strategy (currently only 'merge' supported)") marker_begin: str = Field(description="Beginning marker for content insertion") marker_end: str = Field(description="Ending marker for content insertion") - parent: str | None = Field(default=None, description="Parent heading to insert under if markers not found") + heading_1: str | None = Field(default=None, description="Main title (H1) to add when inserting into empty file or file with no H1 headings") class KitIndex(BaseModel): @@ -43,4 +42,4 @@ class KitIndex(BaseModel): meta: dict[str, Any] = Field(default_factory=dict, description="Metadata about the kit configuration") agents: AgentsMerge = Field(description="Agent documentation merge configuration") cursor: CursorSpec = Field(description="Cursor rules export configuration") - targets: list[Target] = Field(description="List of single-file merge targets") + targets: dict[str, Target] = Field(description="Dictionary of single-file merge targets keyed by ID") diff --git a/pipelex/kit/targets_update.py b/pipelex/kit/targets_update.py index 18ed0695c..f02f33452 100644 --- a/pipelex/kit/targets_update.py +++ b/pipelex/kit/targets_update.py @@ -76,13 +76,13 @@ def build_merged_rules(agents_dir: Traversable, idx: KitIndex, agent_set: str | return ("\n\n".join(parts)).strip() + "\n" -def _insert_block_with_ast(target_md: str, block_md: str, parent: str | None, markers: tuple[str, str]) -> str: - """Insert block into target markdown with heuristic placement. +def _insert_block_with_markers(target_md: str, block_md: str, main_title: str | None, markers: tuple[str, str]) -> str: + """Insert block into target markdown using marker-based logic. Args: target_md: Existing target markdown content block_md: Block to insert - parent: Parent heading to insert under (if specified) + main_title: Main title (H1) to add when inserting into empty file or file with no H1 headings markers: Tuple of (begin_marker, end_marker) Returns: @@ -91,30 +91,22 @@ def _insert_block_with_ast(target_md: str, block_md: str, parent: str | None, ma marker_begin, marker_end = markers wrapped_block = wrap(marker_begin, marker_end, block_md) - if not target_md: - # Empty file - just insert the wrapped block - return wrapped_block + "\n" - - # If parent heading is specified, try to find it and insert after - if parent: - # Escape special regex characters in parent - escaped_parent = re.escape(parent.strip()) - # Look for the parent heading line - pattern = rf"^({escaped_parent})\s*$" - match = re.search(pattern, target_md, flags=re.MULTILINE | re.IGNORECASE) - if match: - # Insert after the parent heading line - insert_pos = match.end() - return target_md[:insert_pos] + "\n\n" + wrapped_block + "\n" + target_md[insert_pos:] - - # Fallback: insert after first H1 heading + # Check if file is empty or has no H1 heading + is_empty = not target_md or not target_md.strip() h1_pattern = r"^#\s+.+$" - match = re.search(h1_pattern, target_md, flags=re.MULTILINE) - if match: - insert_pos = match.end() - return target_md[:insert_pos] + "\n\n" + wrapped_block + "\n" + target_md[insert_pos:] + has_h1 = bool(target_md) and bool(re.search(h1_pattern, target_md, flags=re.MULTILINE)) - # Last resort: append at the end + # If empty or no H1 heading, add main_title at top if provided + if (is_empty or not has_h1) and main_title: + if is_empty: + return f"{main_title}\n\n{wrapped_block}\n" + else: + # File has content but no H1 - add title at top, preserve content, append wrapped block + return f"{main_title}\n\n{target_md.rstrip()}\n\n{wrapped_block}\n" + + # Otherwise append at the end + if is_empty: + return wrapped_block + "\n" return target_md.rstrip() + "\n\n" + wrapped_block + "\n" @@ -142,7 +134,7 @@ def _diff(before: str, after: str, path: str) -> str: def update_targets( repo_root: Path, merged_rules: str, - targets: list[Target], + targets: dict[str, Target], dry_run: bool, diff: bool, backup: str | None, @@ -152,12 +144,12 @@ def update_targets( Args: repo_root: Repository root directory merged_rules: Merged markdown content to insert - targets: List of target file configurations + targets: Dictionary of target file configurations keyed by ID dry_run: If True, only print what would be done diff: If True, show unified diff backup: Backup suffix (e.g., ".bak"), or None for no backup """ - for target in targets: + for target in targets.values(): target_path = repo_root / target.path before = target_path.read_text(encoding="utf-8") if target_path.exists() else "" @@ -168,11 +160,11 @@ def update_targets( wrapped_block = wrap(target.marker_begin, target.marker_end, merged_rules) after = replace_span(before, span, wrapped_block) else: - # No markers - insert via AST and add markers - after = _insert_block_with_ast( + # No markers - insert with markers + after = _insert_block_with_markers( before, merged_rules, - target.parent, + target.heading_1, (target.marker_begin, target.marker_end), ) From 13ee80489e6a62ebdf19b709841a18ec751a9392 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 18:00:26 +0200 Subject: [PATCH 036/115] refactor Kit index --- pipelex/cli/commands/kit_cmd.py | 2 +- pipelex/kit/{agents => agent_rules}/docs.md | 0 pipelex/kit/{agents => agent_rules}/llms.md | 0 .../pytest_standards.md | 0 .../python_standards.md | 0 .../{agents => agent_rules}/run_pipelex.md | 0 pipelex/kit/{agents => agent_rules}/tdd.md | 0 .../{agents => agent_rules}/write_pipelex.md | 0 pipelex/kit/cursor_export.py | 6 ++-- pipelex/kit/index.toml | 30 +++++++++---------- pipelex/kit/index_loader.py | 16 ++++++++-- pipelex/kit/index_models.py | 22 +++++++------- pipelex/kit/paths.py | 4 +-- pipelex/kit/targets_update.py | 10 +++---- 14 files changed, 51 insertions(+), 39 deletions(-) rename pipelex/kit/{agents => agent_rules}/docs.md (100%) rename pipelex/kit/{agents => agent_rules}/llms.md (100%) rename pipelex/kit/{agents => agent_rules}/pytest_standards.md (100%) rename pipelex/kit/{agents => agent_rules}/python_standards.md (100%) rename pipelex/kit/{agents => agent_rules}/run_pipelex.md (100%) rename pipelex/kit/{agents => agent_rules}/tdd.md (100%) rename pipelex/kit/{agents => agent_rules}/write_pipelex.md (100%) diff --git a/pipelex/cli/commands/kit_cmd.py b/pipelex/cli/commands/kit_cmd.py index d7d293128..988bde29a 100644 --- a/pipelex/cli/commands/kit_cmd.py +++ b/pipelex/cli/commands/kit_cmd.py @@ -45,7 +45,7 @@ def sync( typer.echo("📝 Building merged agent documentation...") merged_md = build_merged_rules(agents_dir, idx) typer.echo("📝 Updating target files...") - update_targets(repo_root, merged_md, idx.targets, dry_run=dry_run, diff=diff, backup=backup) + update_targets(repo_root, merged_md, idx.agent_rules.targets, dry_run=dry_run, diff=diff, backup=backup) if dry_run: typer.echo("✅ Dry run completed - no changes made") diff --git a/pipelex/kit/agents/docs.md b/pipelex/kit/agent_rules/docs.md similarity index 100% rename from pipelex/kit/agents/docs.md rename to pipelex/kit/agent_rules/docs.md diff --git a/pipelex/kit/agents/llms.md b/pipelex/kit/agent_rules/llms.md similarity index 100% rename from pipelex/kit/agents/llms.md rename to pipelex/kit/agent_rules/llms.md diff --git a/pipelex/kit/agents/pytest_standards.md b/pipelex/kit/agent_rules/pytest_standards.md similarity index 100% rename from pipelex/kit/agents/pytest_standards.md rename to pipelex/kit/agent_rules/pytest_standards.md diff --git a/pipelex/kit/agents/python_standards.md b/pipelex/kit/agent_rules/python_standards.md similarity index 100% rename from pipelex/kit/agents/python_standards.md rename to pipelex/kit/agent_rules/python_standards.md diff --git a/pipelex/kit/agents/run_pipelex.md b/pipelex/kit/agent_rules/run_pipelex.md similarity index 100% rename from pipelex/kit/agents/run_pipelex.md rename to pipelex/kit/agent_rules/run_pipelex.md diff --git a/pipelex/kit/agents/tdd.md b/pipelex/kit/agent_rules/tdd.md similarity index 100% rename from pipelex/kit/agents/tdd.md rename to pipelex/kit/agent_rules/tdd.md diff --git a/pipelex/kit/agents/write_pipelex.md b/pipelex/kit/agent_rules/write_pipelex.md similarity index 100% rename from pipelex/kit/agents/write_pipelex.md rename to pipelex/kit/agent_rules/write_pipelex.md diff --git a/pipelex/kit/cursor_export.py b/pipelex/kit/cursor_export.py index 4c2ed7e12..ec1c7d260 100644 --- a/pipelex/kit/cursor_export.py +++ b/pipelex/kit/cursor_export.py @@ -35,10 +35,10 @@ def _front_matter_for(name: str, idx: KitIndex) -> dict[str, Any]: Returns: Merged front-matter dictionary """ - base = dict(idx.cursor.front_matter) + base = dict(idx.agent_rules.cursor.front_matter) key = name.removesuffix(".md") - if key in idx.cursor.files: - base |= idx.cursor.files[key].front_matter + if key in idx.agent_rules.cursor.files: + base |= idx.agent_rules.cursor.files[key].front_matter return base diff --git a/pipelex/kit/index.toml b/pipelex/kit/index.toml index ed7b9a2ca..42a723d5c 100644 --- a/pipelex/kit/index.toml +++ b/pipelex/kit/index.toml @@ -2,11 +2,11 @@ version = "1.0.0" description = "Pipelex kit configuration for agent rules" -[agents] +[agent_rules] demote = 1 default_set = "pipelex_language" -[agents.sets] +[agent_rules.sets] coding_standards = [ "python_standards.md", "pytest_standards.md", @@ -24,66 +24,66 @@ all = [ "tdd.md", ] -[cursor.front_matter] +[agent_rules.cursor.front_matter] alwaysApply = false -[cursor.files.python_standards.front_matter] +[agent_rules.cursor.files.python_standards.front_matter] description = "Python coding standards and best practices" globs = ["**/*.py"] -[cursor.files.pytest_standards.front_matter] +[agent_rules.cursor.files.pytest_standards.front_matter] description = "Guidelines for writing unit tests" globs = ["tests/**/*.py"] -[cursor.files.docs.front_matter] +[agent_rules.cursor.files.docs.front_matter] description = "Guidelines for writing documentation" globs = ["docs/**/*.md"] -[cursor.files.tdd.front_matter] +[agent_rules.cursor.files.tdd.front_matter] description = "Guidelines for writing test-driven development code" globs = [] -[cursor.files.write_pipelex.front_matter] +[agent_rules.cursor.files.write_pipelex.front_matter] description = "Guidelines for writing Pipelex pipelines" globs = ["**/*.plx", "**/pipelines/**/*.py"] -[cursor.files.run_pipelex.front_matter] +[agent_rules.cursor.files.run_pipelex.front_matter] description = "Guidelines for running Pipelex pipelines" globs = ["examples/**/*.py"] -[cursor.files.llms.front_matter] +[agent_rules.cursor.files.llms.front_matter] description = "LLM configuration and usage guidelines" globs = ["*.plx", "*.toml"] -[targets.agents] +[agent_rules.targets.agents] path = "AGENTS.md" strategy = "merge" marker_begin = "" marker_end = "" heading_1 = "# Pipelex Rules" -[targets.claude] +[agent_rules.targets.claude] path = "CLAUDE.md" strategy = "merge" marker_begin = "" marker_end = "" heading_1 = "# Pipelex Rules" -[targets.github_copilot] +[agent_rules.targets.github_copilot] path = ".github/copilot-instructions.md" strategy = "merge" marker_begin = "" marker_end = "" heading_1 = "# Pipelex Rules" -[targets.windsurf] +[agent_rules.targets.windsurf] path = ".windsurfrules.md" strategy = "merge" marker_begin = "" marker_end = "" heading_1 = "# Pipelex Rules" -[targets.blackbox] +[agent_rules.targets.blackbox] path = "BLACKBOX_RULES.md" strategy = "merge" marker_begin = "" diff --git a/pipelex/kit/index_loader.py b/pipelex/kit/index_loader.py index 572e26eb1..530945205 100644 --- a/pipelex/kit/index_loader.py +++ b/pipelex/kit/index_loader.py @@ -1,8 +1,16 @@ """Index loader for kit configuration.""" +from pydantic import ValidationError + +from pipelex.exceptions import PipelexException from pipelex.kit.index_models import KitIndex from pipelex.kit.paths import get_kit_root from pipelex.tools.misc.toml_utils import load_toml_from_path +from pipelex.tools.typing.pydantic_utils import format_pydantic_validation_error + + +class KitIndexLoadingError(PipelexException): + pass def load_index() -> KitIndex: @@ -13,8 +21,12 @@ def load_index() -> KitIndex: Raises: TomlError: If TOML parsing fails - ValidationError: If validation fails + KitIndexLoadingError: If validation fails """ index_path = get_kit_root() / "index.toml" data = load_toml_from_path(str(index_path)) - return KitIndex.model_validate(data) + try: + return KitIndex.model_validate(data) + except ValidationError as exc: + msg = f"Validation error in kit index at '{index_path}': {format_pydantic_validation_error(exc)}" + raise KitIndexLoadingError(message=msg) from exc diff --git a/pipelex/kit/index_models.py b/pipelex/kit/index_models.py index be1730e4b..bd4eeddf9 100644 --- a/pipelex/kit/index_models.py +++ b/pipelex/kit/index_models.py @@ -5,14 +5,6 @@ from pydantic import BaseModel, Field -class AgentsMerge(BaseModel): - """Configuration for merging agent documentation files.""" - - sets: dict[str, list[str]] = Field(description="Named sets of agent_rules files (e.g., coding_standards, pipelex_language, all)") - default_set: str = Field(default="pipelex_language", description="Default set to use when syncing") - demote: int = Field(default=1, description="Number of levels to demote headings when merging") - - class CursorFileOverride(BaseModel): """Per-file front-matter overrides for Cursor export.""" @@ -36,10 +28,18 @@ class Target(BaseModel): heading_1: str | None = Field(default=None, description="Main title (H1) to add when inserting into empty file or file with no H1 headings") +class AgentRules(BaseModel): + """Configuration for merging agent documentation files.""" + + sets: dict[str, list[str]] = Field(description="Named sets of agent_rules files (e.g., coding_standards, pipelex_language, all)") + default_set: str = Field(default="pipelex_language", description="Default set to use when syncing") + demote: int = Field(default=1, description="Number of levels to demote headings when merging") + cursor: CursorSpec = Field(description="Cursor rules export configuration") + targets: dict[str, Target] = Field(description="Dictionary of single-file merge targets keyed by ID") + + class KitIndex(BaseModel): """Root configuration model for kit index.toml.""" meta: dict[str, Any] = Field(default_factory=dict, description="Metadata about the kit configuration") - agents: AgentsMerge = Field(description="Agent documentation merge configuration") - cursor: CursorSpec = Field(description="Cursor rules export configuration") - targets: dict[str, Target] = Field(description="Dictionary of single-file merge targets keyed by ID") + agent_rules: AgentRules = Field(description="Agent documentation merge configuration") diff --git a/pipelex/kit/paths.py b/pipelex/kit/paths.py index ff29a1eb8..6fda709b0 100644 --- a/pipelex/kit/paths.py +++ b/pipelex/kit/paths.py @@ -17,9 +17,9 @@ def get_agents_dir() -> Traversable: """Get the agents directory within the kit package. Returns: - Traversable object pointing to pipelex.kit/agents + Traversable object pointing to pipelex.kit/agent_rules """ - return get_kit_root() / "agents" + return get_kit_root() / "agent_rules" def get_configs_dir() -> Traversable: diff --git a/pipelex/kit/targets_update.py b/pipelex/kit/targets_update.py index f02f33452..05afddf25 100644 --- a/pipelex/kit/targets_update.py +++ b/pipelex/kit/targets_update.py @@ -60,17 +60,17 @@ def build_merged_rules(agents_dir: Traversable, idx: KitIndex, agent_set: str | Merged markdown content with demoted headings """ if agent_set is None: - agent_set = idx.agents.default_set + agent_set = idx.agent_rules.default_set - if agent_set not in idx.agents.sets: - msg = f"Agent set '{agent_set}' not found in index.toml. Available sets: {list(idx.agents.sets.keys())}" + if agent_set not in idx.agent_rules.sets: + msg = f"Agent set '{agent_set}' not found in index.toml. Available sets: {list(idx.agent_rules.sets.keys())}" raise ValueError(msg) parts: list[str] = [] - for name in idx.agents.sets[agent_set]: + for name in idx.agent_rules.sets[agent_set]: md = _read_agent_file(agents_dir, name) - demoted = _demote_headings(md, idx.agents.demote) + demoted = _demote_headings(md, idx.agent_rules.demote) parts.append(demoted.rstrip()) return ("\n\n".join(parts)).strip() + "\n" From b64885a8250aeccc6a235fd0d3b250920c5ac773 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 18:32:49 +0200 Subject: [PATCH 037/115] kit UT --- pipelex/kit/__init__.py | 1 - pipelex/kit/__init__.py.bak | 1 + pyproject.toml | 3 +- .../pipelex/kit/test_cursor_export.py | 47 +++++++ .../pipelex/kit/test_merged_rules.py | 50 ++++++++ .../pipelex/kit/test_targets_update.py | 120 ++++++++++++++++++ tests/unit/pipelex/kit/test_index_loader.py | 39 ++++++ tests/unit/pipelex/kit/test_markers.py | 37 ++++++ tests/unit/pipelex/kit/test_paths.py | 23 ++++ uv.lock | 30 ++--- 10 files changed, 334 insertions(+), 17 deletions(-) create mode 100644 pipelex/kit/__init__.py.bak create mode 100644 tests/integration/pipelex/kit/test_cursor_export.py create mode 100644 tests/integration/pipelex/kit/test_merged_rules.py create mode 100644 tests/integration/pipelex/kit/test_targets_update.py create mode 100644 tests/unit/pipelex/kit/test_index_loader.py create mode 100644 tests/unit/pipelex/kit/test_markers.py create mode 100644 tests/unit/pipelex/kit/test_paths.py diff --git a/pipelex/kit/__init__.py b/pipelex/kit/__init__.py index 8b1378917..e69de29bb 100644 --- a/pipelex/kit/__init__.py +++ b/pipelex/kit/__init__.py @@ -1 +0,0 @@ - diff --git a/pipelex/kit/__init__.py.bak b/pipelex/kit/__init__.py.bak new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/pipelex/kit/__init__.py.bak @@ -0,0 +1 @@ + diff --git a/pyproject.toml b/pyproject.toml index 5bdb38133..c02c6a9bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -357,7 +357,8 @@ convention = "google" [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = [ - "T201", # Allow print statements in test files + "INP001", # Allow test files to not have __init__.py in their directories (avoids namespace collisions) + "T201", # Allow print statements in test files ] [tool.uv] diff --git a/tests/integration/pipelex/kit/test_cursor_export.py b/tests/integration/pipelex/kit/test_cursor_export.py new file mode 100644 index 000000000..6cb380819 --- /dev/null +++ b/tests/integration/pipelex/kit/test_cursor_export.py @@ -0,0 +1,47 @@ +from pathlib import Path + +from pipelex.kit.cursor_export import export_cursor_rules +from pipelex.kit.index_loader import load_index +from pipelex.kit.paths import get_agents_dir + + +class TestCursorExport: + """Test Cursor rules export functionality.""" + + def test_export_cursor_rules_dry_run(self, tmp_path: Path): + """Test Cursor export in dry-run mode.""" + idx = load_index() + agents_dir = get_agents_dir() + out_dir = tmp_path / "cursor_rules" + + # Dry run should not create files + export_cursor_rules(agents_dir, out_dir, idx, dry_run=True) + assert not out_dir.exists() or len(list(out_dir.iterdir())) == 0 + + def test_export_cursor_rules_creates_mdc_files(self, tmp_path: Path): + """Test that Cursor export creates .mdc files.""" + idx = load_index() + agents_dir = get_agents_dir() + out_dir = tmp_path / "cursor_rules" + + export_cursor_rules(agents_dir, out_dir, idx, dry_run=False) + + # Verify output directory exists and contains .mdc files + assert out_dir.exists() + mdc_files = list(out_dir.glob("*.mdc")) + assert len(mdc_files) > 0, "Expected .mdc files to be created" + + def test_export_cursor_rules_have_front_matter(self, tmp_path: Path): + """Test that exported .mdc files have YAML front-matter.""" + idx = load_index() + agents_dir = get_agents_dir() + out_dir = tmp_path / "cursor_rules" + + export_cursor_rules(agents_dir, out_dir, idx, dry_run=False) + + # Check first .mdc file for front-matter + mdc_files = list(out_dir.glob("*.mdc")) + if mdc_files: + content = mdc_files[0].read_text(encoding="utf-8") + assert content.startswith("---\n"), "Expected YAML front-matter to start with ---" + assert "---\n" in content[4:], "Expected YAML front-matter to end with ---" diff --git a/tests/integration/pipelex/kit/test_merged_rules.py b/tests/integration/pipelex/kit/test_merged_rules.py new file mode 100644 index 000000000..de9aeba3e --- /dev/null +++ b/tests/integration/pipelex/kit/test_merged_rules.py @@ -0,0 +1,50 @@ +"""Integration tests for building merged agent documentation.""" + +import pytest + +from pipelex.kit.index_loader import load_index +from pipelex.kit.paths import get_agents_dir +from pipelex.kit.targets_update import build_merged_rules + + +class TestMergedRules: + """Test building merged agent documentation.""" + + def test_build_merged_rules_default_set(self): + """Test building merged rules with default set.""" + idx = load_index() + agents_dir = get_agents_dir() + + merged = build_merged_rules(agents_dir, idx) + assert merged is not None + assert len(merged) > 0 + assert merged.endswith("\n") + + def test_build_merged_rules_specific_set(self): + """Test building merged rules with specific set.""" + idx = load_index() + agents_dir = get_agents_dir() + + # Test with 'all' set + merged = build_merged_rules(agents_dir, idx, agent_set="all") + assert merged is not None + assert len(merged) > 0 + + def test_build_merged_rules_invalid_set(self): + """Test building merged rules with invalid set name.""" + idx = load_index() + agents_dir = get_agents_dir() + + with pytest.raises(ValueError, match="Agent set 'nonexistent' not found"): + build_merged_rules(agents_dir, idx, agent_set="nonexistent") + + def test_merged_rules_contain_demoted_headings(self): + """Test that merged rules have demoted headings.""" + idx = load_index() + agents_dir = get_agents_dir() + + merged = build_merged_rules(agents_dir, idx) + + # If demote is 1, check that we have ## headings (demoted from #) + if idx.agent_rules.demote > 0: + assert "##" in merged, "Expected demoted headings in merged content" diff --git a/tests/integration/pipelex/kit/test_targets_update.py b/tests/integration/pipelex/kit/test_targets_update.py new file mode 100644 index 000000000..4b3dbec91 --- /dev/null +++ b/tests/integration/pipelex/kit/test_targets_update.py @@ -0,0 +1,120 @@ +from pathlib import Path + +from pipelex.kit.index_loader import load_index +from pipelex.kit.markers import find_span +from pipelex.kit.paths import get_agents_dir +from pipelex.kit.targets_update import build_merged_rules, update_targets + + +class TestTargetsUpdate: + """Test target file updating functionality.""" + + def test_update_targets_dry_run(self, tmp_path: Path): + """Test updating targets in dry-run mode.""" + idx = load_index() + agents_dir = get_agents_dir() + + # Create a temporary repo root with a target file + repo_root = tmp_path / "repo" + repo_root.mkdir() + target_file = repo_root / "test_target.md" + target_file.write_text("# Test\n\nOriginal content\n", encoding="utf-8") + + merged_rules = build_merged_rules(agents_dir, idx) + + # Create a test target + test_targets = {"test": idx.agent_rules.targets["agents"].model_copy(update={"path": "test_target.md"})} + + original_content = target_file.read_text(encoding="utf-8") + + # Dry run should not modify file + update_targets(repo_root, merged_rules, test_targets, dry_run=True, diff=False, backup=None) + + assert target_file.read_text(encoding="utf-8") == original_content + + def test_update_targets_inserts_with_markers(self, tmp_path: Path): + """Test that update_targets inserts content with markers.""" + idx = load_index() + agents_dir = get_agents_dir() + + repo_root = tmp_path / "repo" + repo_root.mkdir() + target_file = repo_root / "test_target.md" + target_file.write_text("# Test\n\nOriginal content\n", encoding="utf-8") + + merged_rules = build_merged_rules(agents_dir, idx) + + test_targets = {"test": idx.agent_rules.targets["agents"].model_copy(update={"path": "test_target.md"})} + + update_targets(repo_root, merged_rules, test_targets, dry_run=False, diff=False, backup=None) + + updated_content = target_file.read_text(encoding="utf-8") + target = test_targets["test"] + + # Verify markers are present + assert target.marker_begin in updated_content + assert target.marker_end in updated_content + + # Verify content is between markers + span = find_span(updated_content, target.marker_begin, target.marker_end) + assert span is not None + + def test_update_targets_replaces_existing_markers(self, tmp_path: Path): + """Test that update_targets replaces content between existing markers.""" + idx = load_index() + agents_dir = get_agents_dir() + + repo_root = tmp_path / "repo" + repo_root.mkdir() + target_file = repo_root / "test_target.md" + + # Create file with existing markers + marker_begin = "" + marker_end = "" + initial_content = f"# Test\n\n{marker_begin}\nOld content\n{marker_end}\n" + target_file.write_text(initial_content, encoding="utf-8") + + merged_rules = build_merged_rules(agents_dir, idx) + + test_targets = { + "test": idx.agent_rules.targets["agents"].model_copy( + update={ + "path": "test_target.md", + "marker_begin": marker_begin, + "marker_end": marker_end, + } + ) + } + + update_targets(repo_root, merged_rules, test_targets, dry_run=False, diff=False, backup=None) + + updated_content = target_file.read_text(encoding="utf-8") + + # Verify markers still exist + assert marker_begin in updated_content + assert marker_end in updated_content + + # Verify old content is replaced + assert "Old content" not in updated_content + + def test_update_targets_creates_backup(self, tmp_path: Path): + """Test that update_targets creates backup files when requested.""" + idx = load_index() + agents_dir = get_agents_dir() + + repo_root = tmp_path / "repo" + repo_root.mkdir() + target_file = repo_root / "test_target.md" + original_content = "# Test\n\nOriginal content\n" + target_file.write_text(original_content, encoding="utf-8") + + merged_rules = build_merged_rules(agents_dir, idx) + + test_targets = {"test": idx.agent_rules.targets["agents"].model_copy(update={"path": "test_target.md"})} + + update_targets(repo_root, merged_rules, test_targets, dry_run=False, diff=False, backup=".bak") + + # Verify backup exists + backup_file = target_file.with_suffix(target_file.suffix + ".bak") + assert backup_file.exists() + assert backup_file.read_text(encoding="utf-8") == original_content diff --git a/tests/unit/pipelex/kit/test_index_loader.py b/tests/unit/pipelex/kit/test_index_loader.py new file mode 100644 index 000000000..3f71fa6c5 --- /dev/null +++ b/tests/unit/pipelex/kit/test_index_loader.py @@ -0,0 +1,39 @@ +from pipelex.kit.index_loader import load_index + + +class TestKitIndexLoader: + """Test kit index loading and validation.""" + + def test_load_index_succeeds(self): + """Test that index.toml loads successfully.""" + idx = load_index() + assert idx is not None + assert idx.agent_rules is not None + assert idx.meta is not None + + def test_index_has_required_structure(self): + """Test that loaded index has expected structure.""" + idx = load_index() + assert hasattr(idx.agent_rules, "sets") + assert hasattr(idx.agent_rules, "default_set") + assert hasattr(idx.agent_rules, "demote") + assert hasattr(idx.agent_rules, "cursor") + assert hasattr(idx.agent_rules, "targets") + + def test_index_sets_contain_expected_files(self): + """Test that agent_rules sets reference expected markdown files.""" + idx = load_index() + assert "pipelex_language" in idx.agent_rules.sets + assert "all" in idx.agent_rules.sets + assert len(idx.agent_rules.sets["all"]) > 0 + + # Verify files in sets end with .md + for file_list in idx.agent_rules.sets.values(): + for file_name in file_list: + assert file_name.endswith(".md"), f"Expected .md file, got {file_name}" + + def test_index_has_valid_default_set(self): + """Test that default_set points to an existing set.""" + idx = load_index() + default_set = idx.agent_rules.default_set + assert default_set in idx.agent_rules.sets, f"Default set '{default_set}' not found in sets" diff --git a/tests/unit/pipelex/kit/test_markers.py b/tests/unit/pipelex/kit/test_markers.py new file mode 100644 index 000000000..8c4fcb7fd --- /dev/null +++ b/tests/unit/pipelex/kit/test_markers.py @@ -0,0 +1,37 @@ +from pipelex.kit.markers import find_span, replace_span, wrap + + +class TestMarkers: + """Test marker utilities for content insertion and replacement.""" + + def test_find_span_with_markers(self): + """Test finding span when markers exist.""" + text = "prefix\n\ncontent\n\nsuffix" + span = find_span(text, "", "") + assert span is not None + assert text[span[0] : span[1]] == "\ncontent\n" + + def test_find_span_no_markers(self): + """Test finding span when markers don't exist.""" + text = "no markers here" + span = find_span(text, "", "") + assert span is None + + def test_find_span_incomplete_markers(self): + """Test finding span with only begin marker.""" + text = "prefix\n\ncontent without end" + span = find_span(text, "", "") + assert span is None + + def test_wrap_content(self): + """Test wrapping content with markers.""" + content = "line1\nline2" + wrapped = wrap("", "", content) + assert wrapped == "\nline1\nline2\n" + + def test_replace_span(self): + """Test replacing content within a span.""" + text = "prefix\nOLD_CONTENT\nsuffix" + span = (7, 18) # Position of OLD_CONTENT + new_text = replace_span(text, span, "NEW_CONTENT") + assert new_text == "prefix\nNEW_CONTENT\nsuffix" diff --git a/tests/unit/pipelex/kit/test_paths.py b/tests/unit/pipelex/kit/test_paths.py new file mode 100644 index 000000000..4713cc79b --- /dev/null +++ b/tests/unit/pipelex/kit/test_paths.py @@ -0,0 +1,23 @@ +from pipelex.kit.paths import get_agents_dir, get_configs_dir, get_kit_root + + +class TestKitPaths: + """Test kit path utilities.""" + + def test_get_kit_root(self): + """Test that kit root path is valid.""" + kit_root = get_kit_root() + assert kit_root is not None + assert (kit_root / "index.toml").is_file() + + def test_get_agents_dir(self): + """Test that agents directory path is valid.""" + agents_dir = get_agents_dir() + assert agents_dir is not None + assert agents_dir.is_dir() + + def test_get_configs_dir(self): + """Test that configs directory path is valid.""" + configs_dir = get_configs_dir() + assert configs_dir is not None + assert configs_dir.is_dir() diff --git a/uv.lock b/uv.lock index 8f998a3e9..8836ae26f 100644 --- a/uv.lock +++ b/uv.lock @@ -250,14 +250,14 @@ wheels = [ [[package]] name = "astroid" -version = "3.3.11" +version = "4.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/18/74/dfb75f9ccd592bbedb175d4a32fc643cf569d7c218508bfbd6ea7ef9c091/astroid-3.3.11.tar.gz", hash = "sha256:1e5a5011af2920c7c67a53f65d536d65bfa7116feeaf2354d8b94f29573bb0ce", size = 400439, upload-time = "2025-07-13T18:04:23.177Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/d1/6eee8726a863f28ff50d26c5eacb1a590f96ccbb273ce0a8c047ffb10f5a/astroid-4.0.1.tar.gz", hash = "sha256:0d778ec0def05b935e198412e62f9bcca8b3b5c39fdbe50b0ba074005e477aab", size = 405414, upload-time = "2025-10-11T15:15:42.6Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/0f/3b8fdc946b4d9cc8cc1e8af42c4e409468c84441b933d037e101b3d72d86/astroid-3.3.11-py3-none-any.whl", hash = "sha256:54c760ae8322ece1abd213057c4b5bba7c49818853fc901ef09719a60dbf9dec", size = 275612, upload-time = "2025-07-13T18:04:21.07Z" }, + { url = "https://files.pythonhosted.org/packages/47/f4/034361a9cbd9284ef40c8ad107955ede4efae29cbc17a059f63f6569c06a/astroid-4.0.1-py3-none-any.whl", hash = "sha256:37ab2f107d14dc173412327febf6c78d39590fdafcb44868f03b6c03452e3db0", size = 276268, upload-time = "2025-10-11T15:15:40.585Z" }, ] [[package]] @@ -375,11 +375,11 @@ wheels = [ [[package]] name = "cachetools" -version = "6.2.0" +version = "6.2.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/61/e4fad8155db4a04bfb4734c7c8ff0882f078f24294d42798b3568eb63bff/cachetools-6.2.0.tar.gz", hash = "sha256:38b328c0889450f05f5e120f56ab68c8abaf424e1275522b138ffc93253f7e32", size = 30988, upload-time = "2025-08-25T18:57:30.924Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/7e/b975b5814bd36faf009faebe22c1072a1fa1168db34d285ef0ba071ad78c/cachetools-6.2.1.tar.gz", hash = "sha256:3f391e4bd8f8bf0931169baf7456cc822705f4e2a31f840d218f445b9a854201", size = 31325, upload-time = "2025-10-12T14:55:30.139Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6c/56/3124f61d37a7a4e7cc96afc5492c78ba0cb551151e530b54669ddd1436ef/cachetools-6.2.0-py3-none-any.whl", hash = "sha256:1c76a8960c0041fcc21097e357f882197c79da0dbff766e7317890a65d7d8ba6", size = 11276, upload-time = "2025-08-25T18:57:29.684Z" }, + { url = "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl", hash = "sha256:09868944b6dde876dfd44e1d47e18484541eaf12f26f29b7af91b26cc892d701", size = 11280, upload-time = "2025-10-12T14:55:28.382Z" }, ] [[package]] @@ -1088,11 +1088,11 @@ wheels = [ [[package]] name = "idna" -version = "3.10" +version = "3.11" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] [[package]] @@ -1135,11 +1135,11 @@ google-genai = [ [[package]] name = "isort" -version = "6.1.0" +version = "7.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1e/82/fa43935523efdfcce6abbae9da7f372b627b27142c3419fcf13bf5b0c397/isort-6.1.0.tar.gz", hash = "sha256:9b8f96a14cfee0677e78e941ff62f03769a06d412aabb9e2a90487b3b7e8d481", size = 824325, upload-time = "2025-10-01T16:26:45.027Z" } +sdist = { url = "https://files.pythonhosted.org/packages/63/53/4f3c058e3bace40282876f9b553343376ee687f3c35a525dc79dbd450f88/isort-7.0.0.tar.gz", hash = "sha256:5513527951aadb3ac4292a41a16cbc50dd1642432f5e8c20057d414bdafb4187", size = 805049, upload-time = "2025-10-11T13:30:59.107Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/cc/9b681a170efab4868a032631dea1e8446d8ec718a7f657b94d49d1a12643/isort-6.1.0-py3-none-any.whl", hash = "sha256:58d8927ecce74e5087aef019f778d4081a3b6c98f15a80ba35782ca8a2097784", size = 94329, upload-time = "2025-10-01T16:26:43.291Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ed/e3705d6d02b4f7aea715a353c8ce193efd0b5db13e204df895d38734c244/isort-7.0.0-py3-none-any.whl", hash = "sha256:1bcabac8bc3c36c7fb7b98a76c8abb18e0f841a3ba81decac7691008592499c1", size = 94672, upload-time = "2025-10-11T13:30:57.665Z" }, ] [[package]] @@ -2602,7 +2602,7 @@ crypto = [ [[package]] name = "pylint" -version = "3.3.9" +version = "4.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "astroid" }, @@ -2614,9 +2614,9 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "tomlkit" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/04/9d/81c84a312d1fa8133b0db0c76148542a98349298a01747ab122f9314b04e/pylint-3.3.9.tar.gz", hash = "sha256:d312737d7b25ccf6b01cc4ac629b5dcd14a0fcf3ec392735ac70f137a9d5f83a", size = 1525946, upload-time = "2025-10-05T18:41:43.786Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/2f/e80cc4301c81c41a8836d726377daeebf5901a33c06ba8c2d5afb94f7612/pylint-4.0.0.tar.gz", hash = "sha256:62da212808c0681e49ffb125f0a994c685d912cf19ae373075649ebb5870ec28", size = 1567676, upload-time = "2025-10-12T15:21:15.165Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/a7/69460c4a6af7575449e615144aa2205b89408dc2969b87bc3df2f262ad0b/pylint-3.3.9-py3-none-any.whl", hash = "sha256:01f9b0462c7730f94786c283f3e52a1fbdf0494bbe0971a78d7277ef46a751e7", size = 523465, upload-time = "2025-10-05T18:41:41.766Z" }, + { url = "https://files.pythonhosted.org/packages/42/af/068a0b92c49927ada0e177561244157dc9d122eeea5987e34c423172a296/pylint-4.0.0-py3-none-any.whl", hash = "sha256:196b92a85204bb0c0a416a6bb324f6185e59ff1d687ee1d614bf0abf34a348e8", size = 535836, upload-time = "2025-10-12T15:21:13.041Z" }, ] [[package]] From 1fd9ac1b006035bc25ea3e372a10385c03a17cab Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 18:39:43 +0200 Subject: [PATCH 038/115] remove useless __init__.py from tests/ --- pyproject.toml | 1 - tests/e2e/pipelex/pipes/pipe_controller/__init__.py | 0 .../e2e/pipelex/pipes/pipe_controller/pipe_condition/__init__.py | 0 .../e2e/pipelex/pipes/pipe_controller/pipe_parallel/__init__.py | 0 .../e2e/pipelex/pipes/pipe_controller/pipe_sequence/__init__.py | 0 tests/e2e/pipelex/pipes/pipe_operators/__init__.py | 0 tests/integration/pipelex/__init__.py | 0 tests/integration/pipelex/client/__init__.py | 0 tests/integration/pipelex/cogt/__init__.py | 0 tests/integration/pipelex/core/memory/__init__.py | 0 tests/integration/pipelex/language/__init__.py | 0 .../integration/pipelex/libraries/pipelines/builder/__init__.py | 0 .../pipelex/libraries/pipelines/builder/concept/__init__.py | 0 tests/integration/pipelex/pipes/__init__.py | 0 .../integration/pipelex/pipes/controller/pipe_batch/__init__.py | 0 .../pipelex/pipes/controller/pipe_condition/__init__.py | 0 .../pipelex/pipes/controller/pipe_parallel/__init__.py | 0 .../pipelex/pipes/controller/pipe_sequence/__init__.py | 0 tests/integration/pipelex/pipes/operator/__init__.py | 0 .../integration/pipelex/pipes/operator/pipe_extract/__init__.py | 0 tests/integration/pipelex/pipes/operator/pipe_func/__init__.py | 1 - .../integration/pipelex/pipes/operator/pipe_img_gen/__init__.py | 0 tests/integration/pipelex/pipes/operator/pipe_llm/__init__.py | 0 tests/integration/pipelex/plugins/__init__.py | 0 tests/integration/pipelex/tools/aws/__init__.py | 0 tests/integration/pipelex/tools/class_registry/__init__.py | 0 tests/integration/pipelex/tools/httpx/__init__.py | 0 tests/integration/pipelex/tools/pdf/__init__.py | 0 tests/unit/pipelex/cli/commands/__init__.py | 0 tests/unit/pipelex/client/__init__.py | 1 - tests/unit/pipelex/cogt/models/__init__.py | 0 tests/unit/pipelex/cogt/templating/__init__.py | 0 tests/unit/pipelex/core/bundles/__init__.py | 1 - tests/unit/pipelex/core/concepts/concept/__init__.py | 0 tests/unit/pipelex/core/concepts/concept_factory/__init__.py | 0 tests/unit/pipelex/core/concepts/concept_library/__init__.py | 0 tests/unit/pipelex/core/interpreter/__init__.py | 0 tests/unit/pipelex/core/memory/__init__.py | 0 tests/unit/pipelex/core/stuffs/__init__.py | 0 tests/unit/pipelex/language/__init__.py | 0 tests/unit/pipelex/libraries/pipelines/builder/__init__.py | 0 .../unit/pipelex/libraries/pipelines/builder/concept/__init__.py | 0 tests/unit/pipelex/libraries/pipelines/builder/pipe/__init__.py | 0 .../builder/pipe/pipe_controller/pipe_batch/__init__.py | 0 .../builder/pipe/pipe_controller/pipe_condition/__init__.py | 0 .../builder/pipe/pipe_controller/pipe_parallel/__init__.py | 0 .../builder/pipe/pipe_controller/pipe_sequence/__init__.py | 0 .../builder/pipe/pipe_operator/pipe_compose/__init__.py | 0 .../builder/pipe/pipe_operator/pipe_extract/__init__.py | 0 .../pipelines/builder/pipe/pipe_operator/pipe_func/__init__.py | 0 .../builder/pipe/pipe_operator/pipe_img_gen/__init__.py | 0 .../pipelines/builder/pipe/pipe_operator/pipe_llm/__init__.py | 0 tests/unit/pipelex/pipe_controllers/__init__.py | 0 tests/unit/pipelex/pipe_controllers/batch/__init__.py | 0 tests/unit/pipelex/pipe_controllers/condition/__init__.py | 0 tests/unit/pipelex/pipe_controllers/parallel/__init__.py | 0 tests/unit/pipelex/pipe_controllers/sequence/__init__.py | 0 tests/unit/pipelex/pipe_operators/__init__.py | 0 tests/unit/pipelex/pipe_operators/pipe_compose/__init__.py | 0 tests/unit/pipelex/pipe_operators/pipe_extract/__init__.py | 0 tests/unit/pipelex/pipe_operators/pipe_func/__init__.py | 0 tests/unit/pipelex/pipe_operators/pipe_img_gen/__init__.py | 0 tests/unit/pipelex/pipe_operators/pipe_llm/__init__.py | 0 tests/unit/pipelex/pipe_run/__init__.py | 0 tests/unit/pipelex/tools/__init__.py | 0 tests/unit/pipelex/tools/config/__init__.py | 0 tests/unit/pipelex/tools/misc/__init__.py | 0 tests/unit/pipelex/tools/secrets/__init__.py | 0 tests/unit/pipelex/tools/typing/__init__.py | 0 69 files changed, 4 deletions(-) delete mode 100644 tests/e2e/pipelex/pipes/pipe_controller/__init__.py delete mode 100644 tests/e2e/pipelex/pipes/pipe_controller/pipe_condition/__init__.py delete mode 100644 tests/e2e/pipelex/pipes/pipe_controller/pipe_parallel/__init__.py delete mode 100644 tests/e2e/pipelex/pipes/pipe_controller/pipe_sequence/__init__.py delete mode 100644 tests/e2e/pipelex/pipes/pipe_operators/__init__.py delete mode 100644 tests/integration/pipelex/__init__.py delete mode 100644 tests/integration/pipelex/client/__init__.py delete mode 100644 tests/integration/pipelex/cogt/__init__.py delete mode 100644 tests/integration/pipelex/core/memory/__init__.py delete mode 100644 tests/integration/pipelex/language/__init__.py delete mode 100644 tests/integration/pipelex/libraries/pipelines/builder/__init__.py delete mode 100644 tests/integration/pipelex/libraries/pipelines/builder/concept/__init__.py delete mode 100644 tests/integration/pipelex/pipes/__init__.py delete mode 100644 tests/integration/pipelex/pipes/controller/pipe_batch/__init__.py delete mode 100644 tests/integration/pipelex/pipes/controller/pipe_condition/__init__.py delete mode 100644 tests/integration/pipelex/pipes/controller/pipe_parallel/__init__.py delete mode 100644 tests/integration/pipelex/pipes/controller/pipe_sequence/__init__.py delete mode 100644 tests/integration/pipelex/pipes/operator/__init__.py delete mode 100644 tests/integration/pipelex/pipes/operator/pipe_extract/__init__.py delete mode 100644 tests/integration/pipelex/pipes/operator/pipe_func/__init__.py delete mode 100644 tests/integration/pipelex/pipes/operator/pipe_img_gen/__init__.py delete mode 100644 tests/integration/pipelex/pipes/operator/pipe_llm/__init__.py delete mode 100644 tests/integration/pipelex/plugins/__init__.py delete mode 100644 tests/integration/pipelex/tools/aws/__init__.py delete mode 100644 tests/integration/pipelex/tools/class_registry/__init__.py delete mode 100644 tests/integration/pipelex/tools/httpx/__init__.py delete mode 100644 tests/integration/pipelex/tools/pdf/__init__.py delete mode 100644 tests/unit/pipelex/cli/commands/__init__.py delete mode 100644 tests/unit/pipelex/client/__init__.py delete mode 100644 tests/unit/pipelex/cogt/models/__init__.py delete mode 100644 tests/unit/pipelex/cogt/templating/__init__.py delete mode 100644 tests/unit/pipelex/core/bundles/__init__.py delete mode 100644 tests/unit/pipelex/core/concepts/concept/__init__.py delete mode 100644 tests/unit/pipelex/core/concepts/concept_factory/__init__.py delete mode 100644 tests/unit/pipelex/core/concepts/concept_library/__init__.py delete mode 100644 tests/unit/pipelex/core/interpreter/__init__.py delete mode 100644 tests/unit/pipelex/core/memory/__init__.py delete mode 100644 tests/unit/pipelex/core/stuffs/__init__.py delete mode 100644 tests/unit/pipelex/language/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/concept/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/__init__.py delete mode 100644 tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/__init__.py delete mode 100644 tests/unit/pipelex/pipe_controllers/__init__.py delete mode 100644 tests/unit/pipelex/pipe_controllers/batch/__init__.py delete mode 100644 tests/unit/pipelex/pipe_controllers/condition/__init__.py delete mode 100644 tests/unit/pipelex/pipe_controllers/parallel/__init__.py delete mode 100644 tests/unit/pipelex/pipe_controllers/sequence/__init__.py delete mode 100644 tests/unit/pipelex/pipe_operators/__init__.py delete mode 100644 tests/unit/pipelex/pipe_operators/pipe_compose/__init__.py delete mode 100644 tests/unit/pipelex/pipe_operators/pipe_extract/__init__.py delete mode 100644 tests/unit/pipelex/pipe_operators/pipe_func/__init__.py delete mode 100644 tests/unit/pipelex/pipe_operators/pipe_img_gen/__init__.py delete mode 100644 tests/unit/pipelex/pipe_operators/pipe_llm/__init__.py delete mode 100644 tests/unit/pipelex/pipe_run/__init__.py delete mode 100644 tests/unit/pipelex/tools/__init__.py delete mode 100644 tests/unit/pipelex/tools/config/__init__.py delete mode 100644 tests/unit/pipelex/tools/misc/__init__.py delete mode 100644 tests/unit/pipelex/tools/secrets/__init__.py delete mode 100644 tests/unit/pipelex/tools/typing/__init__.py diff --git a/pyproject.toml b/pyproject.toml index c02c6a9bb..be6a7b231 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -358,7 +358,6 @@ convention = "google" [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = [ "INP001", # Allow test files to not have __init__.py in their directories (avoids namespace collisions) - "T201", # Allow print statements in test files ] [tool.uv] diff --git a/tests/e2e/pipelex/pipes/pipe_controller/__init__.py b/tests/e2e/pipelex/pipes/pipe_controller/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/e2e/pipelex/pipes/pipe_controller/pipe_condition/__init__.py b/tests/e2e/pipelex/pipes/pipe_controller/pipe_condition/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/e2e/pipelex/pipes/pipe_controller/pipe_parallel/__init__.py b/tests/e2e/pipelex/pipes/pipe_controller/pipe_parallel/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/e2e/pipelex/pipes/pipe_controller/pipe_sequence/__init__.py b/tests/e2e/pipelex/pipes/pipe_controller/pipe_sequence/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/e2e/pipelex/pipes/pipe_operators/__init__.py b/tests/e2e/pipelex/pipes/pipe_operators/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/__init__.py b/tests/integration/pipelex/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/client/__init__.py b/tests/integration/pipelex/client/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/cogt/__init__.py b/tests/integration/pipelex/cogt/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/core/memory/__init__.py b/tests/integration/pipelex/core/memory/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/language/__init__.py b/tests/integration/pipelex/language/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/libraries/pipelines/builder/__init__.py b/tests/integration/pipelex/libraries/pipelines/builder/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/libraries/pipelines/builder/concept/__init__.py b/tests/integration/pipelex/libraries/pipelines/builder/concept/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/__init__.py b/tests/integration/pipelex/pipes/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/controller/pipe_batch/__init__.py b/tests/integration/pipelex/pipes/controller/pipe_batch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/controller/pipe_condition/__init__.py b/tests/integration/pipelex/pipes/controller/pipe_condition/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/controller/pipe_parallel/__init__.py b/tests/integration/pipelex/pipes/controller/pipe_parallel/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/controller/pipe_sequence/__init__.py b/tests/integration/pipelex/pipes/controller/pipe_sequence/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/operator/__init__.py b/tests/integration/pipelex/pipes/operator/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/operator/pipe_extract/__init__.py b/tests/integration/pipelex/pipes/operator/pipe_extract/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/operator/pipe_func/__init__.py b/tests/integration/pipelex/pipes/operator/pipe_func/__init__.py deleted file mode 100644 index d610235db..000000000 --- a/tests/integration/pipelex/pipes/operator/pipe_func/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Empty file for Python package structure diff --git a/tests/integration/pipelex/pipes/operator/pipe_img_gen/__init__.py b/tests/integration/pipelex/pipes/operator/pipe_img_gen/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/pipes/operator/pipe_llm/__init__.py b/tests/integration/pipelex/pipes/operator/pipe_llm/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/plugins/__init__.py b/tests/integration/pipelex/plugins/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/tools/aws/__init__.py b/tests/integration/pipelex/tools/aws/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/tools/class_registry/__init__.py b/tests/integration/pipelex/tools/class_registry/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/tools/httpx/__init__.py b/tests/integration/pipelex/tools/httpx/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/pipelex/tools/pdf/__init__.py b/tests/integration/pipelex/tools/pdf/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/cli/commands/__init__.py b/tests/unit/pipelex/cli/commands/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/client/__init__.py b/tests/unit/pipelex/client/__init__.py deleted file mode 100644 index 713f4401d..000000000 --- a/tests/unit/pipelex/client/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Client tests diff --git a/tests/unit/pipelex/cogt/models/__init__.py b/tests/unit/pipelex/cogt/models/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/cogt/templating/__init__.py b/tests/unit/pipelex/cogt/templating/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/core/bundles/__init__.py b/tests/unit/pipelex/core/bundles/__init__.py deleted file mode 100644 index e978cd940..000000000 --- a/tests/unit/pipelex/core/bundles/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Unit tests for core bundles module.""" diff --git a/tests/unit/pipelex/core/concepts/concept/__init__.py b/tests/unit/pipelex/core/concepts/concept/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/core/concepts/concept_factory/__init__.py b/tests/unit/pipelex/core/concepts/concept_factory/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/core/concepts/concept_library/__init__.py b/tests/unit/pipelex/core/concepts/concept_library/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/core/interpreter/__init__.py b/tests/unit/pipelex/core/interpreter/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/core/memory/__init__.py b/tests/unit/pipelex/core/memory/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/core/stuffs/__init__.py b/tests/unit/pipelex/core/stuffs/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/language/__init__.py b/tests/unit/pipelex/language/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/concept/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/concept/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/__init__.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_controllers/__init__.py b/tests/unit/pipelex/pipe_controllers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_controllers/batch/__init__.py b/tests/unit/pipelex/pipe_controllers/batch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_controllers/condition/__init__.py b/tests/unit/pipelex/pipe_controllers/condition/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_controllers/parallel/__init__.py b/tests/unit/pipelex/pipe_controllers/parallel/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_controllers/sequence/__init__.py b/tests/unit/pipelex/pipe_controllers/sequence/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_operators/__init__.py b/tests/unit/pipelex/pipe_operators/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_operators/pipe_compose/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_compose/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_operators/pipe_extract/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_extract/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_operators/pipe_func/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_func/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_operators/pipe_img_gen/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_img_gen/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_operators/pipe_llm/__init__.py b/tests/unit/pipelex/pipe_operators/pipe_llm/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/pipe_run/__init__.py b/tests/unit/pipelex/pipe_run/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/tools/__init__.py b/tests/unit/pipelex/tools/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/tools/config/__init__.py b/tests/unit/pipelex/tools/config/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/tools/misc/__init__.py b/tests/unit/pipelex/tools/misc/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/tools/secrets/__init__.py b/tests/unit/pipelex/tools/secrets/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/pipelex/tools/typing/__init__.py b/tests/unit/pipelex/tools/typing/__init__.py deleted file mode 100644 index e69de29bb..000000000 From 9add8a5569a21f70647df0f15675a9f2921d48da Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 22:27:48 +0200 Subject: [PATCH 039/115] No Base Library --- .../pipe-operators/PipeImgGen.md | 2 +- .../pipe-operators/PipeLLM.md | 2 +- pipelex/cli/commands/init_cmd.py | 30 +++--- pipelex/client/api_serializer.py | 2 +- pipelex/core/concepts/concept_factory.py | 9 +- pipelex/core/concepts/concept_native.py | 4 +- pipelex/libraries/library_config.py | 59 ++++++----- pipelex/libraries/library_manager.py | 65 +++++++++--- pipelex/libraries/pipelines/documents.plx | 28 ------ pipelex/libraries/pipelines/images.plx | 36 ------- pipelex/tools/class_registry_utils.py | 98 +++++++++++++++++-- pipelex/tools/config/manager.py | 22 ++--- pipelex/tools/func_registry_utils.py | 24 +++-- .../pipes/test_pipe_running_variants.py | 2 + tests/integration/pipelex/test_data.py | 5 - .../misc_tests/multiplicity.plx | 6 +- .../pipe_controllers/pipe_batch/text_list.py | 6 -- .../pipe_batch/uppercase_transformer.plx | 9 -- .../pipe_operators/pipe_llm_vision.plx | 6 +- 19 files changed, 242 insertions(+), 173 deletions(-) delete mode 100644 pipelex/libraries/pipelines/documents.plx delete mode 100644 pipelex/libraries/pipelines/images.plx delete mode 100644 tests/test_pipelines/pipe_controllers/pipe_batch/text_list.py diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeImgGen.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeImgGen.md index 456859fd8..8ce99bf60 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeImgGen.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeImgGen.md @@ -76,7 +76,7 @@ ImagePrompt = "A text prompt for generating an image" [pipe.generate_logo_variations] type = "PipeImgGen" description = "Generate three logo variations from a prompt" -inputs = { prompt = "images.ImgGenPrompt" } +inputs = { prompt = "ImgGenPrompt" } output = "Image" nb_output = 3 model = "base_img_gen" diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md index 08f0e50dd..ec8b5ee67 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md @@ -42,7 +42,7 @@ You can use any concept that refines `Image` as an input, and choose descriptive [pipe.analyze_wedding] type = "PipeLLM" description = "Analyze wedding photo" -inputs = { wedding_photo = "images.Photo" } +inputs = { wedding_photo = "Photo" } output = "PhotoAnalysis" prompt = """ Analyze this wedding photo and describe the key moments captured. diff --git a/pipelex/cli/commands/init_cmd.py b/pipelex/cli/commands/init_cmd.py index e61a6dcea..97813ff47 100644 --- a/pipelex/cli/commands/init_cmd.py +++ b/pipelex/cli/commands/init_cmd.py @@ -7,7 +7,6 @@ from pipelex.exceptions import PipelexCLIError from pipelex.kit.paths import get_configs_dir -from pipelex.libraries.library_config import LibraryConfig from pipelex.tools.config.manager import config_manager PACKAGE_NAME = __name__.split(".", maxsplit=1)[0] @@ -15,20 +14,21 @@ def do_init_libraries(directory: str = ".", overwrite: bool = False) -> None: - try: - target_dir = os.path.join(directory, "pipelex_libraries") - os.makedirs(directory, exist_ok=True) - - library_config = LibraryConfig(config_dir_path=target_dir) - library_config.export_libraries(overwrite=overwrite) - - if overwrite: - typer.echo(f"✅ Successfully initialized pipelex libraries at '{target_dir}' (all files overwritten)") - else: - typer.echo(f"✅ Successfully initialized pipelex libraries at '{target_dir}' (only created non-existing files)") - except Exception as exc: - msg = f"Failed to initialize libraries at '{directory}': {exc}" - raise PipelexCLIError(msg) from exc + # try: + # target_dir = os.path.join(directory, "pipelex_libraries") + # os.makedirs(directory, exist_ok=True) + + # library_config = LibraryConfig(config_dir_path=target_dir) + # library_config.export_libraries(overwrite=overwrite) + + # if overwrite: + # typer.echo(f"✅ Successfully initialized pipelex libraries at '{target_dir}' (all files overwritten)") + # else: + # typer.echo(f"✅ Successfully initialized pipelex libraries at '{target_dir}' (only created non-existing files)") + # except Exception as exc: + # msg = f"Failed to initialize libraries at '{directory}': {exc}" + # raise PipelexCLIError(msg) from exc + pass def do_init_config(reset: bool = False) -> None: diff --git a/pipelex/client/api_serializer.py b/pipelex/client/api_serializer.py index 9c5b60b05..64fbc017b 100644 --- a/pipelex/client/api_serializer.py +++ b/pipelex/client/api_serializer.py @@ -36,7 +36,7 @@ def serialize_working_memory_for_api(cls, working_memory: WorkingMemory | None = return compact_memory for stuff_name, stuff in working_memory.root.items(): - if NativeConceptCode.is_text(concept_code=stuff.concept.code): + if NativeConceptCode.is_text_concept(concept_code=stuff.concept.code): stuff_content = cast("TextContent", stuff.content) item_dict: dict[str, Any] = { "concept_code": stuff.concept.code, diff --git a/pipelex/core/concepts/concept_factory.py b/pipelex/core/concepts/concept_factory.py index 94639d2b9..c9817c97e 100644 --- a/pipelex/core/concepts/concept_factory.py +++ b/pipelex/core/concepts/concept_factory.py @@ -119,7 +119,14 @@ def make_native_concept(cls, native_concept_code: NativeConceptCode) -> Concept: code=native_concept_code, domain=SpecialDomain.NATIVE, description="A prompt for an LLM", - structure_class_name=structure_class_name, + structure_class_name=NativeConceptCode.TEXT.structure_class_name, + ) + case NativeConceptCode.IMG_GEN_PROMPT: + return Concept( + code=native_concept_code, + domain=SpecialDomain.NATIVE, + description="A prompt for an image generator", + structure_class_name=NativeConceptCode.TEXT.structure_class_name, ) case NativeConceptCode.PAGE: return Concept( diff --git a/pipelex/core/concepts/concept_native.py b/pipelex/core/concepts/concept_native.py index 559976609..df4c44b7d 100644 --- a/pipelex/core/concepts/concept_native.py +++ b/pipelex/core/concepts/concept_native.py @@ -14,6 +14,7 @@ class NativeConceptCode(StrEnum): TEXT_AND_IMAGES = "TextAndImages" NUMBER = "Number" LLM_PROMPT = "LlmPrompt" + IMG_GEN_PROMPT = "ImgGenPrompt" PAGE = "Page" ANYTHING = "Anything" @@ -26,7 +27,7 @@ def structure_class_name(self) -> str: return f"{self.value}Content" @classmethod - def is_text(cls, concept_code: str) -> bool: + def is_text_concept(cls, concept_code: str) -> bool: try: enum_value = NativeConceptCode(concept_code) except ValueError: @@ -42,6 +43,7 @@ def is_text(cls, concept_code: str) -> bool: | NativeConceptCode.TEXT_AND_IMAGES | NativeConceptCode.NUMBER | NativeConceptCode.LLM_PROMPT + | NativeConceptCode.IMG_GEN_PROMPT | NativeConceptCode.PAGE | NativeConceptCode.ANYTHING ): diff --git a/pipelex/libraries/library_config.py b/pipelex/libraries/library_config.py index e91e624e3..c22d0aa22 100644 --- a/pipelex/libraries/library_config.py +++ b/pipelex/libraries/library_config.py @@ -2,22 +2,21 @@ from typing import ClassVar from pipelex.tools.config.config_model import ConfigModel -from pipelex.tools.misc.file_utils import copy_file_from_package, copy_folder_from_package -PIPELEX_LIBRARIES_PATH = "libraries" +# PIPELEX_LIBRARIES_PATH = "libraries" class LibraryConfig(ConfigModel): package_name: ClassVar[str] = "pipelex" config_dir_path: str = "pipelex_libraries" - @property - def pipelines_dir_path(self) -> str: - return f"{self.config_dir_path}/pipelines" + # @property + # def pipelines_dir_path(self) -> str: + # return f"{self.config_dir_path}/pipelines" - @property - def base_pipelines_dir_path(self) -> str: - return f"{self.config_dir_path}/pipelines/base_library" + # @property + # def base_pipelines_dir_path(self) -> str: + # return f"{self.config_dir_path}/pipelines/base_library" @property def test_pipelines_dir_path(self) -> str: @@ -27,25 +26,25 @@ def test_pipelines_dir_path(self) -> str: def failing_pipelines_file_paths(self) -> set[Path]: return {Path("tests/test_pipelines/failing_pipelines.plx")} - def export_libraries(self, overwrite: bool = False) -> None: - """Duplicate pipelex libraries files in the client project, preserving directory structure.""" - copy_file_from_package( - package_name=self.package_name, - file_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/__init__.py", - target_path=f"{self.config_dir_path}/__init__.py", - overwrite=overwrite, - ) - - # pipelines - copy_folder_from_package( - package_name=self.package_name, - folder_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/pipelines", - target_dir=self.base_pipelines_dir_path, - overwrite=overwrite, - ) - copy_file_from_package( - package_name=self.package_name, - file_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/pipelines/__init__.py", - target_path=f"{self.pipelines_dir_path}/__init__.py", - overwrite=overwrite, - ) + # def export_libraries(self, overwrite: bool = False) -> None: + # """Duplicate pipelex libraries files in the client project, preserving directory structure.""" + # copy_file_from_package( + # package_name=self.package_name, + # file_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/__init__.py", + # target_path=f"{self.config_dir_path}/__init__.py", + # overwrite=overwrite, + # ) + + # # pipelines + # copy_folder_from_package( + # package_name=self.package_name, + # folder_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/pipelines", + # target_dir=self.base_pipelines_dir_path, + # overwrite=overwrite, + # ) + # copy_file_from_package( + # package_name=self.package_name, + # file_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/pipelines/__init__.py", + # target_path=f"{self.pipelines_dir_path}/__init__.py", + # overwrite=overwrite, + # ) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 48a409cbb..646130896 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -18,6 +18,7 @@ from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_factory import PipeFactory from pipelex.core.pipes.pipe_library import PipeLibrary +from pipelex.core.stuffs.structured_content import StructuredContent from pipelex.core.validation import report_validation_error from pipelex.exceptions import ( ConceptDefinitionError, @@ -34,9 +35,9 @@ from pipelex.libraries.library_config import LibraryConfig from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.tools.class_registry_utils import ClassRegistryUtils +from pipelex.tools.config.manager import config_manager from pipelex.tools.func_registry_utils import FuncRegistryUtils from pipelex.tools.misc.file_utils import find_files_in_dir -from pipelex.tools.runtime_manager import runtime_manager from pipelex.types import StrEnum @@ -98,22 +99,50 @@ def reset(self) -> None: self.setup() def _get_pipeline_library_dirs(self) -> list[Path]: - library_dirs = [Path(self.library_config.pipelines_dir_path)] - if runtime_manager.is_unit_testing: - log.debug("Registering test pipeline structures for unit testing") - library_dirs += [Path(self.library_config.test_pipelines_dir_path)] - return library_dirs + # Scan the entire project root for .plx files + project_root = Path(config_manager.local_root_dir) + return [project_root] + + def _find_plx_files_in_dir(self, dir_path: str, pattern: str, is_recursive: bool) -> list[Path]: + """Find PLX files matching a pattern in a directory, excluding problematic directories. + + Args: + dir_path: Directory path to search in + pattern: File pattern to match (e.g. "*.plx") + is_recursive: Whether to search recursively in subdirectories + + Returns: + List of matching Path objects, filtered to exclude problematic directories + + """ + # Get all files using the base utility + all_files = find_files_in_dir(dir_path, pattern, is_recursive) + + # Directories to exclude from scanning to avoid loading invalid PLX files + exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} + + # Filter out files in excluded directories + filtered_files: list[Path] = [] + for file_path in all_files: + # Check if any parent directory is in the exclude list + should_exclude = any(part in exclude_dirs for part in file_path.parts) + if not should_exclude: + filtered_files.append(file_path) + + return filtered_files def _get_pipelex_plx_files_from_dirs(self, dirs: list[Path]) -> list[Path]: """Get all valid Pipelex PLX files from the given directories.""" all_plx_paths: list[Path] = [] + seen_files: set[str] = set() # Track by absolute path to avoid duplicates + for dir_path in dirs: if not dir_path.exists(): - msg = f"Directory does not exist: {dir_path}" - raise LibraryError(msg) + log.debug(f"Directory does not exist, skipping: {dir_path}") + continue - # Find all TOML files in the directory - plx_files = find_files_in_dir( + # Find all .plx files in the directory, excluding problematic directories + plx_files = self._find_plx_files_in_dir( dir_path=str(dir_path), pattern="*.plx", is_recursive=True, @@ -121,8 +150,16 @@ def _get_pipelex_plx_files_from_dirs(self, dirs: list[Path]) -> list[Path]: # Filter to only include valid Pipelex files for plx_file in plx_files: + absolute_path = str(plx_file.resolve()) + + # Skip if already seen + if absolute_path in seen_files: + log.debug(f"Skipping duplicate PLX file: {plx_file}") + continue + if PipelexInterpreter.is_pipelex_file(plx_file): all_plx_paths.append(plx_file) + seen_files.add(absolute_path) else: log.debug(f"Skipping non-Pipelex PLX file: {plx_file}") @@ -233,11 +270,15 @@ def load_libraries( failing_pipelines_file_paths = get_config().pipelex.library_config.failing_pipelines_file_paths valid_plx_paths = [path for path in all_plx_paths if path not in failing_pipelines_file_paths] - # Register classes in the directories + # Import modules to load them into sys.modules (but don't register classes yet) for library_dir in dirs_to_use: - ClassRegistryUtils.register_classes_in_folder(folder_path=str(library_dir)) + ClassRegistryUtils.import_modules_in_folder(folder_path=str(library_dir)) FuncRegistryUtils.register_funcs_in_folder(folder_path=str(library_dir)) + # Auto-discover and register all StructuredContent classes from sys.modules + num_registered = ClassRegistryUtils.auto_register_all_subclasses(base_class=StructuredContent) + log.debug(f"Auto-registered {num_registered} StructuredContent classes from loaded modules") + # Parse all blueprints first blueprints: list[PipelexBundleBlueprint] = [] for plx_file_path in valid_plx_paths: diff --git a/pipelex/libraries/pipelines/documents.plx b/pipelex/libraries/pipelines/documents.plx deleted file mode 100644 index 8fee29f27..000000000 --- a/pipelex/libraries/pipelines/documents.plx +++ /dev/null @@ -1,28 +0,0 @@ - - -domain = "documents" -description = "The domain of documents that can comprise pages, text, images, etc. in PDF or other formats" - -[concept] -TextAndImagesContent = "A content that comprises text and images where the text can include local links to the images" - -[pipe] -# PipeExtract requires to have a single input -# It can be named however you want -# but it must be either an image or a pdf or a concept which refines one of them -[pipe.extract_page_contents_from_pdf] -type = "PipeExtract" -description = "Extract page contents from a PDF document" -inputs = { document = "PDF" } -output = "Page" -page_images = true -page_views = false - -[pipe.extract_page_contents_and_views_from_pdf] -type = "PipeExtract" -description = "Extract page contents from a PDF document as well as full page views" -inputs = { document = "PDF" } -output = "Page" -page_images = true -page_views = true - diff --git a/pipelex/libraries/pipelines/images.plx b/pipelex/libraries/pipelines/images.plx deleted file mode 100644 index dd3e1ae6d..000000000 --- a/pipelex/libraries/pipelines/images.plx +++ /dev/null @@ -1,36 +0,0 @@ -domain = "images" -description = "Generic image-related domain" - -[concept] -VisualDescription = "Visual description of something" - -[concept.ImgGenPrompt] -description = "Prompt to generate an image" -refines = "Text" - -[concept.Photo] -description = "Photo" -refines = "Image" - -[pipe] -################################################################# -# Image generation: PipeImgGen generating images as output -################################################################# - - -# PipeImgGen requires to have a single input -# It can be named however you want, -# but it must be either an ImgGenPrompt or a concept which refines ImgGenPrompt -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" - - -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a photo" -inputs = { prompt = "ImgGenPrompt" } -output = "images.Photo" - diff --git a/pipelex/tools/class_registry_utils.py b/pipelex/tools/class_registry_utils.py index 18e7384b8..627e9ecdd 100644 --- a/pipelex/tools/class_registry_utils.py +++ b/pipelex/tools/class_registry_utils.py @@ -1,12 +1,17 @@ +import inspect +import sys import types from pathlib import Path from typing import TYPE_CHECKING, Annotated, Any, Union, get_args, get_origin from kajson.kajson_manager import KajsonManager +from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir + if TYPE_CHECKING: from pydantic.fields import FieldInfo +from pipelex import log from pipelex.tools.typing.module_inspector import find_classes_in_module, import_module_from_file _NoneType = type(None) @@ -67,7 +72,7 @@ def register_classes_in_folder( @classmethod def find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> list[Path]: - """Find files matching a pattern in a directory. + """Find files matching a pattern in a directory, excluding common build/cache directories. Args: dir_path: Directory path to search in @@ -75,13 +80,24 @@ def find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> l is_recursive: Whether to search recursively in subdirectories Returns: - List of matching Path objects + List of matching Path objects, filtered to exclude problematic directories """ - path = Path(dir_path) - if is_recursive: - return list(path.rglob(pattern)) - return list(path.glob(pattern)) + # Get all files using the base utility + all_files = base_find_files_in_dir(dir_path, pattern, is_recursive) + + # Directories to exclude from scanning to avoid import issues + exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} + + # Filter out files in excluded directories + filtered_files: list[Path] = [] + for file_path in all_files: + # Check if any parent directory is in the exclude list + should_exclude = any(part in exclude_dirs for part in file_path.parts) + if not should_exclude: + filtered_files.append(file_path) + + return filtered_files @staticmethod def are_classes_equivalent(class_1: type[Any], class_2: type[Any]) -> bool: @@ -152,3 +168,73 @@ def _is_compatible(t: Any) -> bool: return False return any(_is_compatible(field.annotation) for field in fields.values()) + + @classmethod + def import_modules_in_folder( + cls, + folder_path: str, + is_recursive: bool = True, + ) -> None: + """Import Python modules without registering their classes. + + This loads modules into sys.modules so their classes are available + for discovery by auto_register_all_subclasses(). + + Args: + folder_path: Path to folder containing Python files + is_recursive: Whether to search recursively in subdirectories + + """ + python_files = cls.find_files_in_dir( + dir_path=folder_path, + pattern="*.py", + is_recursive=is_recursive, + ) + + for python_file in python_files: + try: + import_module_from_file(str(python_file)) + except Exception as e: + # Log but don't fail - some files might not be importable + log.debug(f"Could not import {python_file}: {e}") + + @classmethod + def auto_register_all_subclasses( + cls, + base_class: type[Any], + ) -> int: + """Scan all loaded modules in sys.modules and register all subclasses of base_class. + + This enables auto-discovery of classes that are already in memory, + making them available to concepts without explicit registration. + + Args: + base_class: Base class to filter by (e.g., StructuredContent) + + Returns: + Number of classes registered + + """ + registered_count = 0 + class_registry = KajsonManager.get_class_registry() + + # Create a snapshot of modules to avoid "dictionary changed size during iteration" error + # (inspect.getmembers can trigger imports which modify sys.modules) + modules_snapshot = list(sys.modules.values()) + + # Iterate through all loaded modules + for module in modules_snapshot: + try: + # Find all classes in this module + for _, obj in inspect.getmembers(module, inspect.isclass): + # Check if it's a subclass of base_class (but not the base_class itself) + if obj is not base_class and issubclass(obj, base_class): + # Register if not already registered + if not class_registry.has_class(name=obj.__name__): + class_registry.register_class(obj) + registered_count += 1 + except Exception as e: + # Skip modules that can't be inspected + log.debug(f"Could not inspect module for auto-registration: {e}") + + return registered_count diff --git a/pipelex/tools/config/manager.py b/pipelex/tools/config/manager.py index f57d58460..a22ee9798 100644 --- a/pipelex/tools/config/manager.py +++ b/pipelex/tools/config/manager.py @@ -22,7 +22,7 @@ class ConfigError(Exception): class ConfigManager: @property def is_in_pipelex_config(self) -> bool: - return os.path.basename(self.local_root_dir) == "pipelex" + return os.path.basename(os.getcwd()) == "pipelex" @property def pipelex_root_dir(self) -> str: @@ -43,11 +43,11 @@ def local_root_dir(self) -> str: """Get the root directory of the project using pipelex. This is the directory from where the command is being run. """ - return os.path.abspath(os.getcwd()) + return os.getcwd() @property def pipelex_config_dir(self) -> str: - return os.path.join(self.local_root_dir, CONFIG_DIR_NAME) + return os.path.join(os.getcwd(), CONFIG_DIR_NAME) @property def pipelex_specific_config_file_path(self) -> str: @@ -78,9 +78,9 @@ def load_inheritance_config(self, the_pipelex_config: dict[str, Any]): This will be removed in the future. Requires to have a pyproject.toml file in the project root. """ - pyproject_path = os.path.join(self.local_root_dir, "pyproject.toml") + pyproject_path = os.path.join(os.getcwd(), "pyproject.toml") if not os.path.exists(pyproject_path): - print(f"pyproject.toml not found in {self.local_root_dir}") + print(f"pyproject.toml not found in {os.getcwd()}") return def _find_package_path(package_name: str) -> str | None: @@ -143,9 +143,9 @@ def load_config(self, specific_config_path: str | None = None) -> dict[str, Any] for override in list_of_overrides: if override: if override == runtime_manager.run_mode.UNIT_TEST: - override_path = os.path.join(self.local_root_dir, "tests", f"pipelex_{override}.toml") + override_path = os.path.join(os.getcwd(), "tests", f"pipelex_{override}.toml") else: - override_path = os.path.join(self.local_root_dir, "pipelex" if self.is_in_pipelex_config else "", f"pipelex_{override}.toml") + override_path = os.path.join(os.getcwd(), "pipelex" if self.is_in_pipelex_config else "", f"pipelex_{override}.toml") if override_dict := load_toml_from_path_if_exists(override_path): deep_update(pipelex_config, override_dict) @@ -174,20 +174,20 @@ def get_project_name(self) -> str | None: """ # First check pipelex's pyproject.toml - pipelex_pyproject_path = os.path.join(os.path.dirname(self.local_root_dir), "pyproject.toml") + pipelex_pyproject_path = os.path.join(os.path.dirname(os.getcwd()), "pyproject.toml") if pipelex_pyproject := load_toml_from_path_if_exists(path=pipelex_pyproject_path): if (project_name := pipelex_pyproject.get("project", {}).get("name")) and isinstance(project_name, str): return str(project_name) # Check local pyproject.toml - local_pyproject_path = os.path.join(self.local_root_dir, "pyproject.toml") + local_pyproject_path = os.path.join(os.getcwd(), "pyproject.toml") if local_pyproject := load_toml_from_path_if_exists(local_pyproject_path): name_obj: object = local_pyproject.get("project", {}).get("name") or local_pyproject.get("tool", {}).get("poetry", {}).get("name") if isinstance(name_obj, str): return name_obj # Check setup.cfg - setup_cfg_path = os.path.join(self.local_root_dir, "setup.cfg") + setup_cfg_path = os.path.join(os.getcwd(), "setup.cfg") try: config = ConfigParser() config.read(setup_cfg_path) @@ -199,7 +199,7 @@ def get_project_name(self) -> str | None: print(f"Failed to parse setup.cfg at {setup_cfg_path}: {exc}") # Check setup.py as last resort - setup_py_path = os.path.join(self.local_root_dir, "setup.py") + setup_py_path = os.path.join(os.getcwd(), "setup.py") try: with open(setup_py_path) as f: content = f.read() diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index 469f8646d..5db9953f8 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -4,6 +4,7 @@ from typing import Any from pipelex.tools.func_registry import func_registry +from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir from pipelex.tools.typing.module_inspector import import_module_from_file @@ -74,7 +75,7 @@ def _find_functions_in_module(cls, module: Any) -> list[Callable[..., Any]]: @classmethod def _find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> list[Path]: - """Find files matching a pattern in a directory. + """Find files matching a pattern in a directory, excluding common build/cache directories. Args: dir_path: Directory path to search in @@ -82,10 +83,21 @@ def _find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> is_recursive: Whether to search recursively in subdirectories Returns: - List of matching Path objects + List of matching Path objects, filtered to exclude problematic directories """ - path = Path(dir_path) - if is_recursive: - return list(path.rglob(pattern)) - return list(path.glob(pattern)) + # Get all files using the base utility + all_files = base_find_files_in_dir(dir_path, pattern, is_recursive) + + # Directories to exclude from scanning to avoid import issues + exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} + + # Filter out files in excluded directories + filtered_files: list[Path] = [] + for file_path in all_files: + # Check if any parent directory is in the exclude list + should_exclude = any(part in exclude_dirs for part in file_path.parts) + if not should_exclude: + filtered_files.append(file_path) + + return filtered_files diff --git a/tests/integration/pipelex/pipes/test_pipe_running_variants.py b/tests/integration/pipelex/pipes/test_pipe_running_variants.py index d42458d3f..fc1d0a738 100644 --- a/tests/integration/pipelex/pipes/test_pipe_running_variants.py +++ b/tests/integration/pipelex/pipes/test_pipe_running_variants.py @@ -122,6 +122,8 @@ async def test_pipe_infinite_loop( ): failing_pipelines_file_paths = get_config().pipelex.library_config.failing_pipelines_file_paths library_manager = get_library_manager() + # Reset library to avoid pipe name collisions from previous test runs + library_manager.reset() library_manager.load_libraries( library_file_paths=[Path(failing_pipeline_file_path) for failing_pipeline_file_path in failing_pipelines_file_paths], ) diff --git a/tests/integration/pipelex/test_data.py b/tests/integration/pipelex/test_data.py index 4af15eb79..0c29d962c 100644 --- a/tests/integration/pipelex/test_data.py +++ b/tests/integration/pipelex/test_data.py @@ -151,11 +151,6 @@ class PipeTestCases: SIMPLE_STUFF_IMAGE, "simple_llm_test_from_image", ), - ( - "Extract page contents from PDF", - SIMPLE_STUFF_PDF, - "extract_page_contents_from_pdf", - ), ] FAILURE_PIPES: ClassVar[list[tuple[str, type[Exception], str]]] = [ ( diff --git a/tests/test_pipelines/misc_tests/multiplicity.plx b/tests/test_pipelines/misc_tests/multiplicity.plx index f3e842522..3d0576808 100644 --- a/tests/test_pipelines/misc_tests/multiplicity.plx +++ b/tests/test_pipelines/misc_tests/multiplicity.plx @@ -42,7 +42,7 @@ Just state what it is in a single sentence. type = "PipeLLM" description = "Imagine a fantasy scene including products of nature" inputs = { product_of_nature = "ProductOfNature" } -output = "images.ImgGenPrompt" +output = "ImgGenPrompt" prompt = """ Imagine a fantasy scene including the following products of nature: @@ -54,7 +54,7 @@ Keep it short and concise, just one sentence. [pipe.imagine_nature_scene_of_original_power_rangers_colors] type = "PipeSequence" description = "Imagine nature scenes of Power Rangers colors" -output = "images.ImgGenPrompt" +output = "ImgGenPrompt" steps = [ { pipe = "original_power_ranger_colors", result = "color" }, { pipe = "imagine_nature_product", result = "product_of_nature" }, @@ -64,7 +64,7 @@ steps = [ [pipe.imagine_nature_scene_of_alltime_power_rangers_colors] type = "PipeSequence" description = "Imagine nature scenes of Power Rangers colors" -output = "images.ImgGenPrompt" +output = "ImgGenPrompt" steps = [ { pipe = "alltime_power_ranger_colors", result = "color", multiple_output = true }, { pipe = "imagine_nature_product", result = "product_of_nature" }, diff --git a/tests/test_pipelines/pipe_controllers/pipe_batch/text_list.py b/tests/test_pipelines/pipe_controllers/pipe_batch/text_list.py deleted file mode 100644 index 47672c1e5..000000000 --- a/tests/test_pipelines/pipe_controllers/pipe_batch/text_list.py +++ /dev/null @@ -1,6 +0,0 @@ -from pipelex.core.stuffs.list_content import ListContent -from pipelex.core.stuffs.text_content import TextContent - - -class TextList(ListContent[TextContent]): - pass diff --git a/tests/test_pipelines/pipe_controllers/pipe_batch/uppercase_transformer.plx b/tests/test_pipelines/pipe_controllers/pipe_batch/uppercase_transformer.plx index c3e82f1fb..dbbfd8445 100644 --- a/tests/test_pipelines/pipe_controllers/pipe_batch/uppercase_transformer.plx +++ b/tests/test_pipelines/pipe_controllers/pipe_batch/uppercase_transformer.plx @@ -3,17 +3,8 @@ description = "Simple pipes for testing PipeBatch integration" [concept] UppercaseText = "Text that has been transformed to uppercase" -TextList = "List of Text" [pipe] -[pipe.test_pipe_batch] -type = "PipeBatch" -description = "Test pipe batch" -inputs = { text_item = "Text", text_list = "TextList" } -output = "UppercaseText" -branch_pipe_code = "uppercase_transformer" -input_list_name = "text_list" -input_item_name = "text_item" [pipe.uppercase_transformer] type = "PipeLLM" diff --git a/tests/test_pipelines/pipe_operators/pipe_llm_vision.plx b/tests/test_pipelines/pipe_operators/pipe_llm_vision.plx index 140ab6ff8..a3e3645a8 100644 --- a/tests/test_pipelines/pipe_operators/pipe_llm_vision.plx +++ b/tests/test_pipelines/pipe_operators/pipe_llm_vision.plx @@ -5,6 +5,10 @@ description = "Test PipeLLM with vision capabilities" VisionAnalysis = "Some analysis based on the image" BasicDescription = "Basic description of the image" +[concept.Photo] +description = "A photo" +refines = "Image" + [pipe.describe_image] type = "PipeLLM" description = "Describe what is in the image" @@ -42,7 +46,7 @@ Completely ignore $imageA. [pipe.vision_analysis] type = "PipeLLM" description = "Provide detailed analysis of the image" -inputs = { image = "images.Photo" } +inputs = { image = "Photo" } output = "VisionAnalysis" model = "llm_to_extract_diagram" system_prompt = "You are an expert image analyst. Provide detailed, accurate descriptions." From 3d84cdbe3bc0c6933c711ecb28ee787710fa8dc0 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 22:39:20 +0200 Subject: [PATCH 040/115] Cleanup --- pipelex/cogt/content_generation/content_generator.py | 1 - pipelex/libraries/library_config.py | 8 -------- pipelex/pipe_controllers/sequence/pipe_sequence.py | 3 +-- pipelex/pipe_run/pipe_run_params.py | 8 ++++++++ pipelex/pipelex.toml | 4 ++-- pyproject.toml | 5 +++++ 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pipelex/cogt/content_generation/content_generator.py b/pipelex/cogt/content_generation/content_generator.py index fffba6d52..91d5b66b4 100644 --- a/pipelex/cogt/content_generation/content_generator.py +++ b/pipelex/cogt/content_generation/content_generator.py @@ -249,7 +249,6 @@ async def make_templated_text( templating_style: TemplatingStyle | None = None, template_category: TemplateCategory | None = None, ) -> str: - log.debug(f"context: {context}") templating_assignment = TemplatingAssignment( context=context, template=template, diff --git a/pipelex/libraries/library_config.py b/pipelex/libraries/library_config.py index c22d0aa22..94816ca9f 100644 --- a/pipelex/libraries/library_config.py +++ b/pipelex/libraries/library_config.py @@ -10,14 +10,6 @@ class LibraryConfig(ConfigModel): package_name: ClassVar[str] = "pipelex" config_dir_path: str = "pipelex_libraries" - # @property - # def pipelines_dir_path(self) -> str: - # return f"{self.config_dir_path}/pipelines" - - # @property - # def base_pipelines_dir_path(self) -> str: - # return f"{self.config_dir_path}/pipelines/base_library" - @property def test_pipelines_dir_path(self) -> str: return "tests/test_pipelines" diff --git a/pipelex/pipe_controllers/sequence/pipe_sequence.py b/pipelex/pipe_controllers/sequence/pipe_sequence.py index 2b0b3c64b..52f031f90 100644 --- a/pipelex/pipe_controllers/sequence/pipe_sequence.py +++ b/pipelex/pipe_controllers/sequence/pipe_sequence.py @@ -203,10 +203,9 @@ async def _dry_run_controller_pipe( pipe_run_params: PipeRunParams, output_name: str | None = None, ) -> PipeOutput: - if pipe_run_params.run_mode != PipeRunMode.DRY: + if not pipe_run_params.run_mode.is_dry: msg = f"PipeSequence._dry_run_controller_pipe() called with run_mode = {pipe_run_params.run_mode} in pipe {self.code}" raise PipeRunParamsError(msg) - log.debug(f"PipeSequence._dry_run_controller_pipe() called with {self.code=} {pipe_run_params=}") # Verify the output of this pipe is matching the output of the last step. concept_string_of_last_step = get_required_pipe(pipe_code=self.sequential_sub_pipes[-1].pipe_code).output.concept_string if self.output.concept_string != concept_string_of_last_step: diff --git a/pipelex/pipe_run/pipe_run_params.py b/pipelex/pipe_run/pipe_run_params.py index d14a14a83..d487c83f6 100644 --- a/pipelex/pipe_run/pipe_run_params.py +++ b/pipelex/pipe_run/pipe_run_params.py @@ -19,6 +19,14 @@ class PipeRunMode(StrEnum): LIVE = "live" DRY = "dry" + @property + def is_dry(self) -> bool: + match self: + case PipeRunMode.DRY: + return True + case PipeRunMode.LIVE: + return False + FORCE_DRY_RUN_MODE_ENV_KEY = "PIPELEX_FORCE_DRY_RUN_MODE" diff --git a/pipelex/pipelex.toml b/pipelex/pipelex.toml index 9c295053a..3037153e7 100644 --- a/pipelex/pipelex.toml +++ b/pipelex/pipelex.toml @@ -249,8 +249,8 @@ nb_list_items = 3 nb_extract_pages = 4 allowed_to_fail_pipes = [ "infinite_loop_1", # Loop but only for testing purposes - "fix_failing_pipes_once", # Loop but its normal - "validate_pipelex_bundle_spec", # Loop but its normal + "fix_failing_pipes_once", # Loop but it's expected + "validate_pipelex_bundle_spec", # Loop but it's expected "handle_validation_result", # Complex case of a sub pipe needed a ListContent because its batching on, but the dry_run cannot know. "pipe_builder", # because of the 'fix_failing_pipes_once', 'validate_pipelex_bundle_spec', 'handle_validation_result' ] diff --git a/pyproject.toml b/pyproject.toml index be6a7b231..f80a4a58c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -212,6 +212,11 @@ addopts = "--import-mode=importlib -ra -m 'not (inference or llm or img_gen or e asyncio_default_fixture_loop_scope = "session" filterwarnings = [ "ignore:Support for class-based `config` is deprecated:DeprecationWarning", + "ignore:websockets.*is deprecated:DeprecationWarning", + "ignore:typing\\.io is deprecated:DeprecationWarning", + "ignore:typing\\.re is deprecated:DeprecationWarning", + "ignore:.*has been moved to cryptography.*", + "ignore:Use.*Types instead", ] markers = [ "needs_output: tests that need output to be displayed", From 79eec682af29fd1db538430bdd53c4294b716c64 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Sun, 12 Oct 2025 23:20:02 +0200 Subject: [PATCH 041/115] Cleanup dry run errors --- pipelex/core/memory/working_memory.py | 12 ++++----- pipelex/core/memory/working_memory_factory.py | 5 ---- .../condition/pipe_condition.py | 1 - .../sequence/pipe_sequence.py | 2 +- pipelex/pipe_controllers/sub_pipe.py | 2 -- .../llm/llm_prompt_blueprint.py | 5 +--- pipelex/pipe_operators/llm/pipe_llm.py | 4 --- pipelex/pipe_run/dry_run.py | 26 +++++++++---------- .../integration/pipelex/test_fundamentals.py | 8 ++++-- 9 files changed, 27 insertions(+), 38 deletions(-) diff --git a/pipelex/core/memory/working_memory.py b/pipelex/core/memory/working_memory.py index 2cf9faa8e..241063ccd 100644 --- a/pipelex/core/memory/working_memory.py +++ b/pipelex/core/memory/working_memory.py @@ -28,6 +28,7 @@ MAIN_STUFF_NAME = "main_stuff" BATCH_ITEM_STUFF_NAME = "BATCH_ITEM" PRETTY_PRINT_MAX_LENGTH = 1000 +TEST_DUMMY_NAME = "dummy_result" StuffDict = dict[str, Stuff] StuffArtefactDict = dict[str, StuffArtefact] @@ -125,12 +126,13 @@ def add_new_stuff(self, name: str, stuff: Stuff, aliases: list[str] | None = Non raise WorkingMemoryConsistencyError(msg) if name in self.root or name in self.aliases: existing_stuff = self.get_stuff(name=name) - if existing_stuff == stuff: + if existing_stuff == stuff and name != TEST_DUMMY_NAME: log.warning(f"Key '{name}' already exists in WorkingMemory with the same stuff") return - log.warning(f"Key '{name}' already exists in WorkingMemory and will be replaced by something different") - log.verbose(f"Existing stuff: {existing_stuff}") - log.verbose(f"New stuff: {stuff}") + elif name != TEST_DUMMY_NAME: + log.warning(f"Key '{name}' already exists in WorkingMemory and will be replaced by something different") + log.verbose(f"Existing stuff: {existing_stuff}") + log.verbose(f"New stuff: {stuff}") # it's a new stuff self.set_stuff(name=name, stuff=stuff) @@ -157,7 +159,6 @@ def set_alias(self, alias: str, target: str) -> None: if target not in self.root: msg = f"Cannot create alias to non-existent target '{target}'" raise WorkingMemoryConsistencyError(msg) - log.debug(f"Setting alias '{alias}' pointing to target '{target}'") self.aliases[alias] = target def add_alias(self, alias: str, target: str) -> None: @@ -166,7 +167,6 @@ def add_alias(self, alias: str, target: str) -> None: msg = f"Cannot add alias '{alias}' as it already exists" raise WorkingMemoryConsistencyError(msg) self.set_alias(alias=alias, target=target) - log.debug(f"Added alias '{alias}' pointing to target '{target}'") def remove_alias(self, alias: str) -> None: """Remove an alias if it exists.""" diff --git a/pipelex/core/memory/working_memory_factory.py b/pipelex/core/memory/working_memory_factory.py index f43f39518..bafe3ceb4 100644 --- a/pipelex/core/memory/working_memory_factory.py +++ b/pipelex/core/memory/working_memory_factory.py @@ -193,11 +193,6 @@ def make_for_dry_run(cls, needed_inputs: list[TypedNamedInputRequirement]) -> "W working_memory = cls.make_empty() for requirement in needed_inputs: - log.debug( - f"Creating dry run mock for '{requirement.variable_name}' with concept " - f"'{requirement.concept.code}' and class '{requirement.structure_class.__name__}'", - ) - try: if not requirement.multiplicity: mock_content = cls.create_mock_content(requirement) diff --git a/pipelex/pipe_controllers/condition/pipe_condition.py b/pipelex/pipe_controllers/condition/pipe_condition.py index 7d03c4bf1..87aee98b5 100644 --- a/pipelex/pipe_controllers/condition/pipe_condition.py +++ b/pipelex/pipe_controllers/condition/pipe_condition.py @@ -311,7 +311,6 @@ async def _run_controller_pipe( # Get required variables and validate they exist in working memory required_variables = chosen_pipe.required_variables() - log.debug(required_variables, title=f"Required variables for PipeCondition '{self.code}'") required_stuff_names = {required_variable for required_variable in required_variables if not required_variable.startswith("_")} try: required_stuffs = working_memory.get_stuffs(names=required_stuff_names) diff --git a/pipelex/pipe_controllers/sequence/pipe_sequence.py b/pipelex/pipe_controllers/sequence/pipe_sequence.py index 52f031f90..a3ef87cdb 100644 --- a/pipelex/pipe_controllers/sequence/pipe_sequence.py +++ b/pipelex/pipe_controllers/sequence/pipe_sequence.py @@ -19,7 +19,7 @@ from pipelex.pipe_controllers.pipe_controller import PipeController from pipelex.pipe_controllers.sequence.exceptions import PipeSequenceError from pipelex.pipe_controllers.sub_pipe import SubPipe -from pipelex.pipe_run.pipe_run_params import PipeRunMode, PipeRunParams +from pipelex.pipe_run.pipe_run_params import PipeRunParams from pipelex.pipeline.job_metadata import JobMetadata from pipelex.types import Self diff --git a/pipelex/pipe_controllers/sub_pipe.py b/pipelex/pipe_controllers/sub_pipe.py index 31d28fd90..79d0c9c37 100644 --- a/pipelex/pipe_controllers/sub_pipe.py +++ b/pipelex/pipe_controllers/sub_pipe.py @@ -30,7 +30,6 @@ async def run_pipe( sub_pipe_run_params: PipeRunParams, ) -> PipeOutput: """Run or dry run a single operation self.""" - log.debug(f"SubPipe {self.pipe_code} to generate {self.output_name}") if self.output_multiplicity: sub_pipe_run_params.output_multiplicity = self.output_multiplicity sub_pipe_run_params.batch_params = self.batch_params @@ -110,7 +109,6 @@ async def run_pipe( else: # Case 3: Normal processing required_variables = sub_pipe.required_variables() - log.debug(required_variables, title=f"Required variables for {self.pipe_code}") required_stuff_names = {rv for rv in required_variables if not rv.startswith("_")} try: required_stuffs = working_memory.get_stuffs(names=required_stuff_names) diff --git a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py index 32b2232b7..1416f606a 100644 --- a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py +++ b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py @@ -115,8 +115,8 @@ async def make_llm_prompt( if not extra_params: extra_params = {} for image_index, image_name in enumerate(prompt_user_images.keys()): + # Replacing image variable '{image_name}' with numbered tag '[Image {image_index + 1}]' extra_params[image_name] = f"[Image {image_index + 1}]" - log.warning(f"Replacing image variable '{image_name}' with numbered tag '[Image {image_index + 1}]'") user_text: str | None = None if self.prompt_blueprint: user_text = await self._unravel_text( @@ -163,9 +163,6 @@ async def _unravel_text( jinja2_blueprint.templating_style = templating_style log.verbose(f"Setting prompting style to {templating_style}") - log.info(f"extra_params: {extra_params}") - log.info(f"jinja2_blueprint.extra_context: {jinja2_blueprint.extra_context}") - context: dict[str, Any] = context_provider.generate_jinja2_context() if extra_params: context.update(**extra_params) diff --git a/pipelex/pipe_operators/llm/pipe_llm.py b/pipelex/pipe_operators/llm/pipe_llm.py index b1393fb57..2a5280f5f 100644 --- a/pipelex/pipe_operators/llm/pipe_llm.py +++ b/pipelex/pipe_operators/llm/pipe_llm.py @@ -256,11 +256,7 @@ async def _run_operator_pipe( # we acknowledge the code here with llm_prompt_1 and llm_prompt_2 is overly complex and should be refactored. the_content: StuffContent - log.debug(f"output_concept.structure_class_name: {output_concept.structure_class_name}") - log.debug(f"TextContent.__class__.__name__: {TextContent.__class__.__name__}") - log.debug(f"is_multiple_output: {is_multiple_output}") if output_concept.structure_class_name == "TextContent" and not is_multiple_output: - log.info(f"PipeLLM generating a single text output: {self.__class__.__name__}_gen_text") llm_prompt_1_for_text = await self.llm_prompt_spec.make_llm_prompt( output_concept_string=output_concept.concept_string, context_provider=working_memory, diff --git a/pipelex/pipe_run/dry_run.py b/pipelex/pipe_run/dry_run.py index fa9d4e5c1..c2ca25716 100644 --- a/pipelex/pipe_run/dry_run.py +++ b/pipelex/pipe_run/dry_run.py @@ -12,6 +12,7 @@ from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.stuffs.stuff_content import StuffContent from pipelex.core.stuffs.text_content import TextContent +from pipelex.exceptions import PipeStackOverflowError from pipelex.hub import get_class_registry from pipelex.pipe_run.pipe_run_params import PipeRunMode from pipelex.pipe_run.pipe_run_params_factory import PipeRunParamsFactory @@ -26,7 +27,6 @@ class DryRunError(Exception): class DryRunStatus(StrEnum): SUCCESS = "SUCCESS" FAILURE = "FAILURE" - WARNING = "WARNING" @property def is_failure(self) -> bool: @@ -35,15 +35,12 @@ def is_failure(self) -> bool: return True case DryRunStatus.SUCCESS: return False - case DryRunStatus.WARNING: - return False class DryRunOutput(BaseModel): pipe_code: str status: DryRunStatus error_message: str | None = None - warning_message: str | None = None async def dry_run_pipe(pipe: PipeAbstract, raise_on_failure: bool = False) -> DryRunOutput: @@ -60,17 +57,14 @@ async def dry_run_pipe(pipe: PipeAbstract, raise_on_failure: bool = False) -> Dr working_memory=working_memory, pipe_run_params=PipeRunParamsFactory.make_run_params(pipe_run_mode=PipeRunMode.DRY), ) - except Exception as exc: + except PipeStackOverflowError as exc: if pipe.code in allowed_to_fail_pipes: - warning_message = f"Allowed to fail dry run for pipe '{pipe.code}': {exc}" - log.warning(warning_message) - return DryRunOutput(pipe_code=pipe.code, status=DryRunStatus.WARNING, warning_message=warning_message) - - if raise_on_failure: + error_message = f"Allowed to fail dry run for pipe '{pipe.code}': {exc}" + return DryRunOutput(pipe_code=pipe.code, status=DryRunStatus.FAILURE, error_message=error_message) + elif raise_on_failure: raise error_message = f"Dry run failed for pipe '{pipe.code}': {exc}" - log.error(error_message) return DryRunOutput(pipe_code=pipe.code, status=DryRunStatus.FAILURE, error_message=error_message) log.info(f"Pipe '{pipe.code}' dry run completed successfully") return DryRunOutput(pipe_code=pipe.code, status=DryRunStatus.SUCCESS) @@ -117,8 +111,14 @@ def run_pipe_in_thread(pipe: PipeAbstract) -> DryRunOutput: for pipe in pipes: results[pipe.code] = await dry_run_pipe(pipe, raise_on_failure=raise_on_failure) - successful_pipes = [pipe_code for pipe_code, status in results.items() if status.status == DryRunStatus.SUCCESS] - failed_pipes = [pipe_code for pipe_code, status in results.items() if status.status != DryRunStatus.SUCCESS] + successful_pipes: list[str] = [] + failed_pipes: list[str] = [] + for pipe_code, dry_run_output in results.items(): + match dry_run_output.status: + case DryRunStatus.SUCCESS: + successful_pipes.append(pipe_code) + case DryRunStatus.FAILURE: + failed_pipes.append(pipe_code) unexpected_failures = {pipe_code: results[pipe_code] for pipe_code in failed_pipes if pipe_code not in allowed_to_fail_pipes} diff --git a/tests/integration/pipelex/test_fundamentals.py b/tests/integration/pipelex/test_fundamentals.py index 8933ccc52..fc9f5f1fc 100644 --- a/tests/integration/pipelex/test_fundamentals.py +++ b/tests/integration/pipelex/test_fundamentals.py @@ -1,7 +1,8 @@ import pytest +from pipelex.config import get_config from pipelex.hub import get_pipes -from pipelex.pipe_run.dry_run import DryRunStatus, dry_run_pipes +from pipelex.pipe_run.dry_run import dry_run_pipes from pipelex.pipelex import Pipelex @@ -21,8 +22,11 @@ async def test_dry_run_all_pipes(self): results = await dry_run_pipes(pipes=get_pipes(), raise_on_failure=False) # Check if there were any failures + allowed_to_fail_pipes = get_config().pipelex.dry_run_config.allowed_to_fail_pipes - failed_pipes = {pipe_code: output for pipe_code, output in results.items() if output.status == DryRunStatus.FAILURE} + failed_pipes = { + pipe_code: output for pipe_code, output in results.items() if output.status.is_failure and pipe_code not in allowed_to_fail_pipes + } if failed_pipes: failure_details = "\n".join([f" - {pipe_code}: {output.error_message}" for pipe_code, output in failed_pipes.items()]) From 23ecfa07a171bc3d6c1d62eb1c4a331dd291d421 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 00:02:09 +0200 Subject: [PATCH 042/115] Fixed warnings --- pipelex/cli/commands/__init__.py | 6 +-- pipelex/tools/class_registry_utils.py | 45 ++++++++++------ pipelex/tools/func_registry_utils.py | 17 +++++-- pipelex/tools/typing/module_inspector.py | 51 ++++++++++++++++--- tests/cases/__init__.py | 10 ++-- tests/cases/jinja2_templates.py | 3 +- .../pipelex/core/bundles/test_pipe_sorter.py | 3 +- .../concept_factory/test_concept_factory.py | 3 +- tests/unit/pipelex/core/test_data/__init__.py | 31 ++++++----- .../core/test_data/complex/__init__.py | 1 - .../builder/pipe/test_inputs_blueprint.py | 3 +- .../builder/pipe/test_pipe_blueprint.py | 3 +- .../builder/pipe/test_sub_pipe_blueprint.py | 3 +- 13 files changed, 116 insertions(+), 63 deletions(-) diff --git a/pipelex/cli/commands/__init__.py b/pipelex/cli/commands/__init__.py index fbec641e0..2a74b504f 100644 --- a/pipelex/cli/commands/__init__.py +++ b/pipelex/cli/commands/__init__.py @@ -3,8 +3,8 @@ This package organizes CLI commands into logical modules. """ -from .init_cmd import init_app -from .show_cmd import show_app -from .validate_cmd import validate_app +from pipelex.cli.commands.init_cmd import init_app +from pipelex.cli.commands.show_cmd import show_app +from pipelex.cli.commands.validate_cmd import validate_app __all__ = ["init_app", "show_app", "validate_app"] diff --git a/pipelex/tools/class_registry_utils.py b/pipelex/tools/class_registry_utils.py index 627e9ecdd..04da93099 100644 --- a/pipelex/tools/class_registry_utils.py +++ b/pipelex/tools/class_registry_utils.py @@ -1,6 +1,7 @@ import inspect import sys import types +import warnings from pathlib import Path from typing import TYPE_CHECKING, Annotated, Any, Union, get_args, get_origin @@ -12,7 +13,7 @@ from pydantic.fields import FieldInfo from pipelex import log -from pipelex.tools.typing.module_inspector import find_classes_in_module, import_module_from_file +from pipelex.tools.typing.module_inspector import ModuleFileError, find_classes_in_module, import_module_from_file _NoneType = type(None) _UnionType = getattr(types, "UnionType", None) # Py3.10+: types.UnionType @@ -194,9 +195,17 @@ def import_modules_in_folder( for python_file in python_files: try: import_module_from_file(str(python_file)) - except Exception as e: - # Log but don't fail - some files might not be importable - log.debug(f"Could not import {python_file}: {e}") + except ModuleFileError: + # Expected: file validation issues (directories with .py extension, etc.) + # log.debug(f"Skipping file {python_file}: {e}") + pass + except ImportError: + # Common: missing dependencies, circular imports, relative imports + # log.debug(f"Could not import {python_file}: {e}" + pass + except SyntaxError as exc: + # Potentially problematic: invalid Python syntax may indicate broken code + log.warning(f"Syntax error in {python_file}: {exc}") @classmethod def auto_register_all_subclasses( @@ -225,16 +234,22 @@ def auto_register_all_subclasses( # Iterate through all loaded modules for module in modules_snapshot: try: - # Find all classes in this module - for _, obj in inspect.getmembers(module, inspect.isclass): - # Check if it's a subclass of base_class (but not the base_class itself) - if obj is not base_class and issubclass(obj, base_class): - # Register if not already registered - if not class_registry.has_class(name=obj.__name__): - class_registry.register_class(obj) - registered_count += 1 - except Exception as e: - # Skip modules that can't be inspected - log.debug(f"Could not inspect module for auto-registration: {e}") + # Suppress all warnings during inspection (deprecation warnings from dependencies) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # Find all classes in this module + for _, obj in inspect.getmembers(module, inspect.isclass): + # Check if it's a subclass of base_class (but not the base_class itself) + if obj is not base_class and issubclass(obj, base_class): + # Register if not already registered + if not class_registry.has_class(name=obj.__name__): + class_registry.register_class(obj) + registered_count += 1 + except (AttributeError, ImportError, TypeError): + # Expected: some modules in sys.modules can't be inspected + # - Built-in/native modules (ImportError) + # - Modules without expected attributes (AttributeError) + # - Non-module objects (TypeError) + pass return registered_count diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index 5db9953f8..ca9fd7ada 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -3,9 +3,10 @@ from pathlib import Path from typing import Any +from pipelex import log from pipelex.tools.func_registry import func_registry from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir -from pipelex.tools.typing.module_inspector import import_module_from_file +from pipelex.tools.typing.module_inspector import ModuleFileError, import_module_from_file class FuncRegistryUtils: @@ -52,9 +53,17 @@ def _register_funcs_in_file(cls, file_path: str) -> None: name=func.__name__, should_warn_if_already_registered=True, ) - except Exception as e: - # Log error but continue processing other files - print(f"Error processing file {file_path}: {e}") + except ModuleFileError: + # Expected: file validation issues (directories with .py extension, etc.) + # log.verbose(f"Skipping file {file_path}: {e}") + pass + except ImportError: + # Common: missing dependencies, circular imports, relative imports + # log.verbose(f"Could not import {file_path}: {e}") + pass + except SyntaxError as exc: + # Potentially problematic: invalid Python syntax may indicate broken code + log.warning(f"Syntax error in {file_path}: {exc}") @classmethod def _find_functions_in_module(cls, module: Any) -> list[Callable[..., Any]]: diff --git a/pipelex/tools/typing/module_inspector.py b/pipelex/tools/typing/module_inspector.py index e86a143bc..3a5271dab 100644 --- a/pipelex/tools/typing/module_inspector.py +++ b/pipelex/tools/typing/module_inspector.py @@ -2,6 +2,7 @@ import inspect import os import sys +from pathlib import Path from typing import Any @@ -27,6 +28,12 @@ def import_module_from_file(file_path: str) -> Any: msg = f"File {file_path} is not a Python file (must end with .py)" raise ModuleFileError(msg) + # Validate that the path exists and is a file, not a directory + path = Path(file_path) + if path.exists() and not path.is_file(): + msg = f"Path {file_path} exists but is not a file (it may be a directory)" + raise ModuleFileError(msg) + # Convert file path to module-style path to use as the actual module name module_name = _convert_file_path_to_module_path(file_path) @@ -52,15 +59,45 @@ def import_module_from_file(file_path: str) -> Any: def _convert_file_path_to_module_path(file_path: str) -> str: - """Convert a file path to a module-style path.""" - # Remove .py extension - module_path = file_path.removesuffix(".py") + """Convert a file path to a valid module identifier. + + The module name doesn't need to match the actual package structure since + we're using spec_from_file_location - it just needs to be a unique, valid + Python identifier for registration in sys.modules. + + Args: + file_path: Path to the Python file - # Replace path separators with dots - module_path = module_path.replace(os.sep, ".") + Returns: + A unique, valid Python module name derived from the absolute file path + """ + # Convert to absolute path for uniqueness and consistency + abs_path = os.path.abspath(file_path) + + # Remove .py extension + module_path = abs_path.removesuffix(".py") + + # Replace all non-alphanumeric characters with underscores to create a valid identifier + # This handles path separators, dots, hyphens, spaces, etc. + valid_chars: list[str] = [] + for char in module_path: + if char.isalnum(): + valid_chars.append(char) + else: + valid_chars.append("_") + + result = "".join(valid_chars) + + # Ensure it doesn't start with a number (Python requirement) + if result and result[0].isdigit(): + result = "_" + result + + # Handle edge case of empty result + if not result: + msg = f"Cannot create valid module name from file path: {file_path}" + raise ModuleFileError(msg) - # Handle __init__.py files by removing the __init__ part - return module_path.removesuffix(".__init__") + return result def find_classes_in_module( diff --git a/tests/cases/__init__.py b/tests/cases/__init__.py index 4c16db2fb..7b1c6b63e 100644 --- a/tests/cases/__init__.py +++ b/tests/cases/__init__.py @@ -4,11 +4,11 @@ Each module exposes only data constants that can be imported cleanly. """ -from .documents import PDFTestCases -from .images import ImageTestCases -from .jinja2_templates import JINJA2TestCases -from .registry import ClassRegistryTestCases, FileHelperTestCases, Fruit -from .urls import TestURLs +from tests.cases.documents import PDFTestCases +from tests.cases.images import ImageTestCases +from tests.cases.jinja2_templates import JINJA2TestCases +from tests.cases.registry import ClassRegistryTestCases, FileHelperTestCases, Fruit +from tests.cases.urls import TestURLs __all__ = [ "ClassRegistryTestCases", diff --git a/tests/cases/jinja2_templates.py b/tests/cases/jinja2_templates.py index fd70714b6..a1e9169de 100644 --- a/tests/cases/jinja2_templates.py +++ b/tests/cases/jinja2_templates.py @@ -3,8 +3,7 @@ from typing import Any, ClassVar from pipelex.cogt.templating.templating_style import TagStyle, TemplatingStyle, TextFormat - -from .registry import Fruit +from tests.cases.registry import Fruit class JINJA2TestCases: diff --git a/tests/unit/pipelex/core/bundles/test_pipe_sorter.py b/tests/unit/pipelex/core/bundles/test_pipe_sorter.py index 66e0a1868..9f8718aaf 100644 --- a/tests/unit/pipelex/core/bundles/test_pipe_sorter.py +++ b/tests/unit/pipelex/core/bundles/test_pipe_sorter.py @@ -4,8 +4,7 @@ from pipelex.core.bundles.pipe_sorter import sort_pipes_by_dependencies from pipelex.core.bundles.pipelex_bundle_blueprint import PipeBlueprintUnion - -from .test_data_pipe_sorter import PipeSorterTestCases +from tests.unit.pipelex.core.bundles.test_data_pipe_sorter import PipeSorterTestCases class TestSortPipesByDependencies: diff --git a/tests/unit/pipelex/core/concepts/concept_factory/test_concept_factory.py b/tests/unit/pipelex/core/concepts/concept_factory/test_concept_factory.py index 2abbd115f..f64ffd299 100644 --- a/tests/unit/pipelex/core/concepts/concept_factory/test_concept_factory.py +++ b/tests/unit/pipelex/core/concepts/concept_factory/test_concept_factory.py @@ -8,8 +8,7 @@ ) from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.exceptions import StructureClassError - -from .data import TestCases +from tests.unit.pipelex.core.concepts.concept_factory.data import TestCases class TestConceptFactory: diff --git a/tests/unit/pipelex/core/test_data/__init__.py b/tests/unit/pipelex/core/test_data/__init__.py index 700c94538..663094fea 100644 --- a/tests/unit/pipelex/core/test_data/__init__.py +++ b/tests/unit/pipelex/core/test_data/__init__.py @@ -1,22 +1,21 @@ from typing import ClassVar from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint - -from .complex.multi_feature import COMPLEX_TEST_CASES -from .concepts.refining_concepts import REFINING_CONCEPT_TEST_CASES -from .concepts.simple_concepts import SIMPLE_CONCEPT_TEST_CASES -from .concepts.structured_concepts import STRUCTURED_CONCEPT_TEST_CASES -from .domain.simple_domains import DOMAIN_TEST_CASES -from .errors.invalid_plx import ERROR_TEST_CASES -from .pipes.controllers.batch.pipe_batch import PIPE_BATCH_TEST_CASES -from .pipes.controllers.condition.pipe_condition import PIPE_CONDITION_TEST_CASES -from .pipes.controllers.parallel.pipe_parallel import PIPE_PARALLEL_TEST_CASES -from .pipes.controllers.sequence.pipe_sequence import PIPE_SEQUENCE_TEST_CASES -from .pipes.operators.compose.pipe_compose import PIPE_COMPOSE_TEST_CASES -from .pipes.operators.extract.pipe_extract import PIPE_EXTRACT_TEST_CASES -from .pipes.operators.func.pipe_func import PIPE_FUNC_TEST_CASES -from .pipes.operators.img_gen.pipe_img_gen import PIPE_IMG_GEN_TEST_CASES -from .pipes.operators.llm.pipe_llm import PIPE_LLM_TEST_CASES +from tests.unit.pipelex.core.test_data.complex.multi_feature import COMPLEX_TEST_CASES +from tests.unit.pipelex.core.test_data.concepts.refining_concepts import REFINING_CONCEPT_TEST_CASES +from tests.unit.pipelex.core.test_data.concepts.simple_concepts import SIMPLE_CONCEPT_TEST_CASES +from tests.unit.pipelex.core.test_data.concepts.structured_concepts import STRUCTURED_CONCEPT_TEST_CASES +from tests.unit.pipelex.core.test_data.domain.simple_domains import DOMAIN_TEST_CASES +from tests.unit.pipelex.core.test_data.errors.invalid_plx import ERROR_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.controllers.batch.pipe_batch import PIPE_BATCH_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.controllers.condition.pipe_condition import PIPE_CONDITION_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.controllers.parallel.pipe_parallel import PIPE_PARALLEL_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.controllers.sequence.pipe_sequence import PIPE_SEQUENCE_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.operators.compose.pipe_compose import PIPE_COMPOSE_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.operators.extract.pipe_extract import PIPE_EXTRACT_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.operators.func.pipe_func import PIPE_FUNC_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.operators.img_gen.pipe_img_gen import PIPE_IMG_GEN_TEST_CASES +from tests.unit.pipelex.core.test_data.pipes.operators.llm.pipe_llm import PIPE_LLM_TEST_CASES class InterpreterTestCases: diff --git a/tests/unit/pipelex/core/test_data/complex/__init__.py b/tests/unit/pipelex/core/test_data/complex/__init__.py index bbba9d0b6..e69de29bb 100644 --- a/tests/unit/pipelex/core/test_data/complex/__init__.py +++ b/tests/unit/pipelex/core/test_data/complex/__init__.py @@ -1 +0,0 @@ -"""Complex test scenarios for PipelexInterpreter tests.""" diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_inputs_blueprint.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_inputs_blueprint.py index 524052bd6..15a0dcecb 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_inputs_blueprint.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_inputs_blueprint.py @@ -1,8 +1,7 @@ import pytest from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint - -from .test_data_inputs import InputRequirementTestCases +from tests.unit.pipelex.libraries.pipelines.builder.pipe.test_data_inputs import InputRequirementTestCases class TestInputRequirementBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_pipe_blueprint.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_pipe_blueprint.py index 4f11b6b87..2259ca6c5 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_pipe_blueprint.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_pipe_blueprint.py @@ -2,8 +2,7 @@ from pipelex.core.pipes.pipe_blueprint import PipeBlueprint from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec - -from .test_data_pipe import PipeBlueprintTestCases +from tests.unit.pipelex.libraries.pipelines.builder.pipe.test_data_pipe import PipeBlueprintTestCases class TestPipeBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_sub_pipe_blueprint.py b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_sub_pipe_blueprint.py index 19f83965d..42f8b6e99 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_sub_pipe_blueprint.py +++ b/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_sub_pipe_blueprint.py @@ -2,8 +2,7 @@ from pipelex.libraries.pipelines.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.pipe_controllers.sub_pipe_blueprint import SubPipeBlueprint - -from .test_data_sub_pipe import SubPipeTestCases +from tests.unit.pipelex.libraries.pipelines.builder.pipe.test_data_sub_pipe import SubPipeTestCases class TestSubPipeBlueprintConversion: From 6969203b941819b2eac993b89e31ca2dbdbf31e2 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 00:25:50 +0200 Subject: [PATCH 043/115] Moved Builder, library is no more --- .../pipelines => }/builder/__init__.py | 0 .../pipelines => }/builder/builder.plx | 0 .../pipelines => }/builder/builder.py | 50 +++++++++---------- .../pipelines => }/builder/builder_errors.py | 20 ++++---- .../pipelines => }/builder/builder_loop.py | 14 +++--- .../builder/concept/__init__.py | 0 .../builder/concept/concept.plx | 0 .../builder/concept/concept_spec.py | 0 .../{libraries/pipelines => }/builder/flow.py | 2 +- .../pipelines => }/builder/flow_factory.py | 6 +-- .../pipelines => }/builder/pipe/__init__.py | 0 .../builder/pipe/pipe_batch_spec.py | 2 +- .../builder/pipe/pipe_compose_spec.py | 2 +- .../builder/pipe/pipe_condition_spec.py | 2 +- .../builder/pipe/pipe_design.plx | 0 .../builder/pipe/pipe_extract_spec.py | 2 +- .../builder/pipe/pipe_func_spec.py | 2 +- .../builder/pipe/pipe_img_gen_spec.py | 2 +- .../builder/pipe/pipe_llm_spec.py | 2 +- .../builder/pipe/pipe_parallel_spec.py | 6 +-- .../builder/pipe/pipe_sequence_spec.py | 4 +- .../builder/pipe/pipe_signature.py | 2 +- .../builder/pipe/sub_pipe_spec.py | 0 pipelex/cli/commands/build_cmd.py | 6 +-- pipelex/libraries/pipelines/__init__.py | 1 - .../builder/concept/integration_test_data.py | 0 .../concept/test_concept_spec_generation.py | 4 +- .../pipelex/pipes/test_flow_factory.py | 2 +- .../builder/concept/test_concept_blueprint.py | 4 +- .../builder/concept/test_data.py | 2 +- .../pipe_controller/pipe_batch/test_data.py | 2 +- .../pipe_batch/test_pipe_batch.py | 4 +- .../pipe_condition/test_data.py | 2 +- .../pipe_condition/test_pipe_condition.py | 4 +- .../pipe_parallel/test_data.py | 4 +- .../pipe_parallel/test_pipe_parallel.py | 4 +- .../pipe_sequence/test_data.py | 4 +- .../pipe_sequence/test_pipe_sequence.py | 4 +- .../pipe_operator/pipe_compose/test_data.py | 2 +- .../pipe_compose/test_pipe_compose.py | 4 +- .../pipe_operator/pipe_extract/test_data.py | 2 +- .../pipe_extract/test_pipe_extract.py | 4 +- .../pipe/pipe_operator/pipe_func/test_data.py | 2 +- .../pipe_operator/pipe_func/test_pipe_func.py | 4 +- .../pipe_operator/pipe_img_gen/test_data.py | 2 +- .../pipe_img_gen/test_pipe_img_gen.py | 4 +- .../pipe/pipe_operator/pipe_llm/test_data.py | 2 +- .../pipe_operator/pipe_llm/test_pipe_llm.py | 4 +- .../builder/pipe/test_data_inputs.py | 0 .../builder/pipe/test_data_pipe.py | 2 +- .../builder/pipe/test_data_sub_pipe.py | 2 +- .../builder/pipe/test_inputs_blueprint.py | 2 +- .../builder/pipe/test_pipe_blueprint.py | 4 +- .../builder/pipe/test_sub_pipe_blueprint.py | 4 +- .../builder/test_pipelex_bundle_blueprint.py | 0 55 files changed, 104 insertions(+), 105 deletions(-) rename pipelex/{libraries/pipelines => }/builder/__init__.py (100%) rename pipelex/{libraries/pipelines => }/builder/builder.plx (100%) rename pipelex/{libraries/pipelines => }/builder/builder.py (94%) rename pipelex/{libraries/pipelines => }/builder/builder_errors.py (89%) rename pipelex/{libraries/pipelines => }/builder/builder_loop.py (97%) rename pipelex/{libraries/pipelines => }/builder/concept/__init__.py (100%) rename pipelex/{libraries/pipelines => }/builder/concept/concept.plx (100%) rename pipelex/{libraries/pipelines => }/builder/concept/concept_spec.py (100%) rename pipelex/{libraries/pipelines => }/builder/flow.py (95%) rename pipelex/{libraries/pipelines => }/builder/flow_factory.py (95%) rename pipelex/{libraries/pipelines => }/builder/pipe/__init__.py (100%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_batch_spec.py (96%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_compose_spec.py (98%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_condition_spec.py (96%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_design.plx (100%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_extract_spec.py (98%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_func_spec.py (92%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_img_gen_spec.py (97%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_llm_spec.py (98%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_parallel_spec.py (92%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_sequence_spec.py (89%) rename pipelex/{libraries/pipelines => }/builder/pipe/pipe_signature.py (98%) rename pipelex/{libraries/pipelines => }/builder/pipe/sub_pipe_spec.py (100%) delete mode 100644 pipelex/libraries/pipelines/__init__.py rename tests/integration/pipelex/{libraries/pipelines => }/builder/concept/integration_test_data.py (100%) rename tests/integration/pipelex/{libraries/pipelines => }/builder/concept/test_concept_spec_generation.py (92%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/concept/test_concept_blueprint.py (86%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/concept/test_data.py (98%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_batch/test_data.py (95%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_batch/test_pipe_batch.py (72%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_condition/test_data.py (94%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_condition/test_pipe_condition.py (72%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_parallel/test_data.py (96%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_parallel/test_pipe_parallel.py (75%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_sequence/test_data.py (94%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_controller/pipe_sequence/test_pipe_sequence.py (72%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_compose/test_data.py (94%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_compose/test_pipe_compose.py (72%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_extract/test_data.py (95%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_extract/test_pipe_extract.py (74%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_func/test_data.py (94%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_func/test_pipe_func.py (73%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_img_gen/test_data.py (95%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_img_gen/test_pipe_img_gen.py (72%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_llm/test_data.py (98%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/pipe_operator/pipe_llm/test_pipe_llm.py (81%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/test_data_inputs.py (100%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/test_data_pipe.py (96%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/test_data_sub_pipe.py (94%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/test_inputs_blueprint.py (84%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/test_pipe_blueprint.py (73%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/pipe/test_sub_pipe_blueprint.py (73%) rename tests/unit/pipelex/{libraries/pipelines => }/builder/test_pipelex_bundle_blueprint.py (100%) diff --git a/pipelex/libraries/pipelines/builder/__init__.py b/pipelex/builder/__init__.py similarity index 100% rename from pipelex/libraries/pipelines/builder/__init__.py rename to pipelex/builder/__init__.py diff --git a/pipelex/libraries/pipelines/builder/builder.plx b/pipelex/builder/builder.plx similarity index 100% rename from pipelex/libraries/pipelines/builder/builder.plx rename to pipelex/builder/builder.plx diff --git a/pipelex/libraries/pipelines/builder/builder.py b/pipelex/builder/builder.py similarity index 94% rename from pipelex/libraries/pipelines/builder/builder.py rename to pipelex/builder/builder.py index f96943eed..406f29299 100644 --- a/pipelex/libraries/pipelines/builder/builder.py +++ b/pipelex/builder/builder.py @@ -2,6 +2,31 @@ from pydantic import ConfigDict, Field, ValidationError, field_validator +from pipelex.builder.builder_errors import ( + ConceptDefinitionErrorData, + ConceptFailure, + ConceptSpecError, + DomainFailure, + PipeBuilderError, + PipeDefinitionErrorData, + PipeFailure, + PipelexBundleError, + PipelexBundleUnexpectedError, + PipeSpecError, + StaticValidationErrorData, + ValidateDryRunError, +) +from pipelex.builder.concept.concept_spec import ConceptSpec +from pipelex.builder.pipe.pipe_batch_spec import PipeBatchSpec +from pipelex.builder.pipe.pipe_compose_spec import PipeComposeSpec +from pipelex.builder.pipe.pipe_condition_spec import PipeConditionSpec +from pipelex.builder.pipe.pipe_extract_spec import PipeExtractSpec +from pipelex.builder.pipe.pipe_func_spec import PipeFuncSpec +from pipelex.builder.pipe.pipe_img_gen_spec import PipeImgGenSpec +from pipelex.builder.pipe.pipe_llm_spec import PipeLLMSpec +from pipelex.builder.pipe.pipe_parallel_spec import PipeParallelSpec +from pipelex.builder.pipe.pipe_sequence_spec import PipeSequenceSpec +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.core.bundles.pipe_sorter import sort_pipes_by_dependencies from pipelex.core.bundles.pipelex_bundle_blueprint import PipeBlueprintUnion, PipelexBundleBlueprint from pipelex.core.concepts.concept_blueprint import ConceptBlueprint @@ -16,31 +41,6 @@ StaticValidationError, ) from pipelex.hub import get_library_manager -from pipelex.libraries.pipelines.builder.builder_errors import ( - ConceptDefinitionErrorData, - ConceptFailure, - ConceptSpecError, - DomainFailure, - PipeBuilderError, - PipeDefinitionErrorData, - PipeFailure, - PipelexBundleError, - PipelexBundleUnexpectedError, - PipeSpecError, - StaticValidationErrorData, - ValidateDryRunError, -) -from pipelex.libraries.pipelines.builder.concept.concept_spec import ConceptSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_batch_spec import PipeBatchSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_compose_spec import PipeComposeSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_condition_spec import PipeConditionSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_extract_spec import PipeExtractSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_func_spec import PipeFuncSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_img_gen_spec import PipeImgGenSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_llm_spec import PipeLLMSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_parallel_spec import PipeParallelSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_sequence_spec import PipeSequenceSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_run.dry_run import dry_run_pipes from pipelex.tools.typing.pydantic_utils import format_pydantic_validation_error diff --git a/pipelex/libraries/pipelines/builder/builder_errors.py b/pipelex/builder/builder_errors.py similarity index 89% rename from pipelex/libraries/pipelines/builder/builder_errors.py rename to pipelex/builder/builder_errors.py index 2cab3ed0f..c30895a3d 100644 --- a/pipelex/libraries/pipelines/builder/builder_errors.py +++ b/pipelex/builder/builder_errors.py @@ -2,21 +2,21 @@ from pydantic import Field +from pipelex.builder.concept.concept_spec import ConceptSpec +from pipelex.builder.pipe.pipe_batch_spec import PipeBatchSpec +from pipelex.builder.pipe.pipe_compose_spec import PipeComposeSpec +from pipelex.builder.pipe.pipe_condition_spec import PipeConditionSpec +from pipelex.builder.pipe.pipe_extract_spec import PipeExtractSpec +from pipelex.builder.pipe.pipe_func_spec import PipeFuncSpec +from pipelex.builder.pipe.pipe_img_gen_spec import PipeImgGenSpec +from pipelex.builder.pipe.pipe_llm_spec import PipeLLMSpec +from pipelex.builder.pipe.pipe_parallel_spec import PipeParallelSpec +from pipelex.builder.pipe.pipe_sequence_spec import PipeSequenceSpec from pipelex.core.stuffs.structured_content import StructuredContent from pipelex.exceptions import ( PipelexException, StaticValidationErrorType, ) -from pipelex.libraries.pipelines.builder.concept.concept_spec import ConceptSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_batch_spec import PipeBatchSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_compose_spec import PipeComposeSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_condition_spec import PipeConditionSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_extract_spec import PipeExtractSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_func_spec import PipeFuncSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_img_gen_spec import PipeImgGenSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_llm_spec import PipeLLMSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_parallel_spec import PipeParallelSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_sequence_spec import PipeSequenceSpec from pipelex.types import Self # Type alias for pipe spec union diff --git a/pipelex/libraries/pipelines/builder/builder_loop.py b/pipelex/builder/builder_loop.py similarity index 97% rename from pipelex/libraries/pipelines/builder/builder_loop.py rename to pipelex/builder/builder_loop.py index fe0c47d4c..c7d113f29 100644 --- a/pipelex/libraries/pipelines/builder/builder_loop.py +++ b/pipelex/builder/builder_loop.py @@ -1,20 +1,20 @@ from pipelex import pretty_print -from pipelex.client.protocol import ImplicitMemory -from pipelex.core.pipes.pipe_blueprint import AllowedPipeCategories -from pipelex.exceptions import StaticValidationErrorType -from pipelex.hub import get_required_pipe -from pipelex.language.plx_factory import PlxFactory -from pipelex.libraries.pipelines.builder.builder import ( +from pipelex.builder.builder import ( PipelexBundleSpec, PipeSpecUnion, reconstruct_bundle_with_pipe_fixes, validate_bundle_spec, ) -from pipelex.libraries.pipelines.builder.builder_errors import ( +from pipelex.builder.builder_errors import ( PipelexBundleError, PipelexBundleNoFixForError, PipelexBundleUnexpectedError, ) +from pipelex.client.protocol import ImplicitMemory +from pipelex.core.pipes.pipe_blueprint import AllowedPipeCategories +from pipelex.exceptions import StaticValidationErrorType +from pipelex.hub import get_required_pipe +from pipelex.language.plx_factory import PlxFactory from pipelex.pipeline.execute import execute_pipeline from pipelex.tools.misc.file_utils import save_text_to_path diff --git a/pipelex/libraries/pipelines/builder/concept/__init__.py b/pipelex/builder/concept/__init__.py similarity index 100% rename from pipelex/libraries/pipelines/builder/concept/__init__.py rename to pipelex/builder/concept/__init__.py diff --git a/pipelex/libraries/pipelines/builder/concept/concept.plx b/pipelex/builder/concept/concept.plx similarity index 100% rename from pipelex/libraries/pipelines/builder/concept/concept.plx rename to pipelex/builder/concept/concept.plx diff --git a/pipelex/libraries/pipelines/builder/concept/concept_spec.py b/pipelex/builder/concept/concept_spec.py similarity index 100% rename from pipelex/libraries/pipelines/builder/concept/concept_spec.py rename to pipelex/builder/concept/concept_spec.py diff --git a/pipelex/libraries/pipelines/builder/flow.py b/pipelex/builder/flow.py similarity index 95% rename from pipelex/libraries/pipelines/builder/flow.py rename to pipelex/builder/flow.py index 7e866599b..ee5b67dfe 100644 --- a/pipelex/libraries/pipelines/builder/flow.py +++ b/pipelex/builder/flow.py @@ -1,7 +1,7 @@ from pydantic import ConfigDict, Field +from pipelex.builder.pipe.pipe_signature import PipeSignature from pipelex.core.stuffs.structured_content import StructuredContent -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSignature from pipelex.pipe_controllers.batch.pipe_batch_blueprint import PipeBatchBlueprint from pipelex.pipe_controllers.condition.pipe_condition_blueprint import PipeConditionBlueprint from pipelex.pipe_controllers.parallel.pipe_parallel_blueprint import PipeParallelBlueprint diff --git a/pipelex/libraries/pipelines/builder/flow_factory.py b/pipelex/builder/flow_factory.py similarity index 95% rename from pipelex/libraries/pipelines/builder/flow_factory.py rename to pipelex/builder/flow_factory.py index 917e7164d..32752c54e 100644 --- a/pipelex/libraries/pipelines/builder/flow_factory.py +++ b/pipelex/builder/flow_factory.py @@ -1,12 +1,12 @@ from pathlib import Path from typing import Any +from pipelex.builder.builder import PipelexBundleSpec +from pipelex.builder.flow import Flow, FlowElementUnion +from pipelex.builder.pipe.pipe_signature import PipeSignature from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint from pipelex.core.interpreter import PipelexInterpreter from pipelex.core.pipes.pipe_blueprint import AllowedPipeCategories -from pipelex.libraries.pipelines.builder.builder import PipelexBundleSpec -from pipelex.libraries.pipelines.builder.flow import Flow, FlowElementUnion -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSignature from pipelex.pipe_controllers.batch.pipe_batch_blueprint import PipeBatchBlueprint from pipelex.pipe_controllers.condition.pipe_condition_blueprint import PipeConditionBlueprint from pipelex.pipe_controllers.parallel.pipe_parallel_blueprint import PipeParallelBlueprint diff --git a/pipelex/libraries/pipelines/builder/pipe/__init__.py b/pipelex/builder/pipe/__init__.py similarity index 100% rename from pipelex/libraries/pipelines/builder/pipe/__init__.py rename to pipelex/builder/pipe/__init__.py diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_batch_spec.py b/pipelex/builder/pipe/pipe_batch_spec.py similarity index 96% rename from pipelex/libraries/pipelines/builder/pipe/pipe_batch_spec.py rename to pipelex/builder/pipe/pipe_batch_spec.py index e2770bb5f..e1332b50c 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_batch_spec.py +++ b/pipelex/builder/pipe/pipe_batch_spec.py @@ -3,7 +3,7 @@ from pydantic import Field from typing_extensions import override -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_controllers.batch.pipe_batch_blueprint import PipeBatchBlueprint diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py b/pipelex/builder/pipe/pipe_compose_spec.py similarity index 98% rename from pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py rename to pipelex/builder/pipe/pipe_compose_spec.py index a3062b980..2650043b8 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_compose_spec.py +++ b/pipelex/builder/pipe/pipe_compose_spec.py @@ -4,10 +4,10 @@ from pydantic.json_schema import SkipJsonSchema from typing_extensions import override +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.cogt.templating.template_blueprint import TemplateBlueprint from pipelex.cogt.templating.template_category import TemplateCategory from pipelex.cogt.templating.templating_style import TagStyle, TemplatingStyle, TextFormat -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_operators.compose.pipe_compose_blueprint import PipeComposeBlueprint from pipelex.types import StrEnum diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_condition_spec.py b/pipelex/builder/pipe/pipe_condition_spec.py similarity index 96% rename from pipelex/libraries/pipelines/builder/pipe/pipe_condition_spec.py rename to pipelex/builder/pipe/pipe_condition_spec.py index 12321c67c..23b763f13 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_condition_spec.py +++ b/pipelex/builder/pipe/pipe_condition_spec.py @@ -4,7 +4,7 @@ from pydantic.json_schema import SkipJsonSchema from typing_extensions import override -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_controllers.condition.pipe_condition_blueprint import PipeConditionBlueprint from pipelex.pipe_controllers.condition.special_outcome import SpecialOutcome diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_design.plx b/pipelex/builder/pipe/pipe_design.plx similarity index 100% rename from pipelex/libraries/pipelines/builder/pipe/pipe_design.plx rename to pipelex/builder/pipe/pipe_design.plx diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py b/pipelex/builder/pipe/pipe_extract_spec.py similarity index 98% rename from pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py rename to pipelex/builder/pipe/pipe_extract_spec.py index c28ae447f..10e5edd39 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_extract_spec.py +++ b/pipelex/builder/pipe/pipe_extract_spec.py @@ -4,8 +4,8 @@ from pydantic.json_schema import SkipJsonSchema from typing_extensions import override +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.exceptions import PipeDefinitionError -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_operators.extract.pipe_extract_blueprint import PipeExtractBlueprint from pipelex.types import StrEnum diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_func_spec.py b/pipelex/builder/pipe/pipe_func_spec.py similarity index 92% rename from pipelex/libraries/pipelines/builder/pipe/pipe_func_spec.py rename to pipelex/builder/pipe/pipe_func_spec.py index 7f569c478..20c8b8731 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_func_spec.py +++ b/pipelex/builder/pipe/pipe_func_spec.py @@ -4,7 +4,7 @@ from pydantic.json_schema import SkipJsonSchema from typing_extensions import override -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_operators.func.pipe_func_blueprint import PipeFuncBlueprint diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_img_gen_spec.py b/pipelex/builder/pipe/pipe_img_gen_spec.py similarity index 97% rename from pipelex/libraries/pipelines/builder/pipe/pipe_img_gen_spec.py rename to pipelex/builder/pipe/pipe_img_gen_spec.py index 669e4bf89..55924a085 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_img_gen_spec.py +++ b/pipelex/builder/pipe/pipe_img_gen_spec.py @@ -3,7 +3,7 @@ from pydantic import Field, field_validator from typing_extensions import override -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_operators.img_gen.pipe_img_gen_blueprint import PipeImgGenBlueprint from pipelex.types import StrEnum diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_llm_spec.py b/pipelex/builder/pipe/pipe_llm_spec.py similarity index 98% rename from pipelex/libraries/pipelines/builder/pipe/pipe_llm_spec.py rename to pipelex/builder/pipe/pipe_llm_spec.py index 21cca5d56..600667baf 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_llm_spec.py +++ b/pipelex/builder/pipe/pipe_llm_spec.py @@ -4,9 +4,9 @@ from pydantic.json_schema import SkipJsonSchema from typing_extensions import override +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.cogt.llm.llm_setting import LLMSetting from pipelex.exceptions import PipeDefinitionError -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec from pipelex.pipe_operators.llm.pipe_llm_blueprint import PipeLLMBlueprint from pipelex.tools.typing.validation_utils import has_more_than_one_among_attributes_from_list from pipelex.types import Self, StrEnum diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_parallel_spec.py b/pipelex/builder/pipe/pipe_parallel_spec.py similarity index 92% rename from pipelex/libraries/pipelines/builder/pipe/pipe_parallel_spec.py rename to pipelex/builder/pipe/pipe_parallel_spec.py index 9e09ba112..e468b6aad 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_parallel_spec.py +++ b/pipelex/builder/pipe/pipe_parallel_spec.py @@ -3,10 +3,10 @@ from pydantic import Field, field_validator, model_validator from typing_extensions import override +from pipelex.builder.concept.concept_spec import ConceptSpec +from pipelex.builder.pipe.pipe_signature import PipeSpec +from pipelex.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.exceptions import PipeDefinitionError -from pipelex.libraries.pipelines.builder.concept.concept_spec import ConceptSpec -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec -from pipelex.libraries.pipelines.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.pipe_controllers.parallel.pipe_parallel_blueprint import PipeParallelBlueprint from pipelex.types import Self diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_sequence_spec.py b/pipelex/builder/pipe/pipe_sequence_spec.py similarity index 89% rename from pipelex/libraries/pipelines/builder/pipe/pipe_sequence_spec.py rename to pipelex/builder/pipe/pipe_sequence_spec.py index c7ece8140..1ff721629 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_sequence_spec.py +++ b/pipelex/builder/pipe/pipe_sequence_spec.py @@ -4,8 +4,8 @@ from pydantic.json_schema import SkipJsonSchema from typing_extensions import override -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec -from pipelex.libraries.pipelines.builder.pipe.sub_pipe_spec import SubPipeSpec +from pipelex.builder.pipe.pipe_signature import PipeSpec +from pipelex.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.pipe_controllers.sequence.pipe_sequence_blueprint import PipeSequenceBlueprint diff --git a/pipelex/libraries/pipelines/builder/pipe/pipe_signature.py b/pipelex/builder/pipe/pipe_signature.py similarity index 98% rename from pipelex/libraries/pipelines/builder/pipe/pipe_signature.py rename to pipelex/builder/pipe/pipe_signature.py index 864f3538e..50c13d88c 100644 --- a/pipelex/libraries/pipelines/builder/pipe/pipe_signature.py +++ b/pipelex/builder/pipe/pipe_signature.py @@ -3,11 +3,11 @@ from pydantic import Field, field_validator from pipelex import log +from pipelex.builder.concept.concept_spec import ConceptSpec from pipelex.core.pipes.exceptions import PipeBlueprintError from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint from pipelex.core.pipes.pipe_blueprint import AllowedPipeCategories, AllowedPipeTypes, PipeBlueprint from pipelex.core.stuffs.structured_content import StructuredContent -from pipelex.libraries.pipelines.builder.concept.concept_spec import ConceptSpec from pipelex.tools.misc.string_utils import is_snake_case, normalize_to_ascii diff --git a/pipelex/libraries/pipelines/builder/pipe/sub_pipe_spec.py b/pipelex/builder/pipe/sub_pipe_spec.py similarity index 100% rename from pipelex/libraries/pipelines/builder/pipe/sub_pipe_spec.py rename to pipelex/builder/pipe/sub_pipe_spec.py diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index 403252540..65fd2a4b5 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -5,11 +5,11 @@ import typer from pipelex import pretty_print +from pipelex.builder.builder import PipelexBundleSpec +from pipelex.builder.builder_loop import BuilderLoop +from pipelex.builder.flow_factory import FlowFactory from pipelex.hub import get_report_delegate from pipelex.language.plx_factory import PlxFactory -from pipelex.libraries.pipelines.builder.builder import PipelexBundleSpec -from pipelex.libraries.pipelines.builder.builder_loop import BuilderLoop -from pipelex.libraries.pipelines.builder.flow_factory import FlowFactory from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline from pipelex.tools.misc.file_utils import ensure_directory_for_file_path, save_text_to_path diff --git a/pipelex/libraries/pipelines/__init__.py b/pipelex/libraries/pipelines/__init__.py deleted file mode 100644 index 8b1378917..000000000 --- a/pipelex/libraries/pipelines/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/integration/pipelex/libraries/pipelines/builder/concept/integration_test_data.py b/tests/integration/pipelex/builder/concept/integration_test_data.py similarity index 100% rename from tests/integration/pipelex/libraries/pipelines/builder/concept/integration_test_data.py rename to tests/integration/pipelex/builder/concept/integration_test_data.py diff --git a/tests/integration/pipelex/libraries/pipelines/builder/concept/test_concept_spec_generation.py b/tests/integration/pipelex/builder/concept/test_concept_spec_generation.py similarity index 92% rename from tests/integration/pipelex/libraries/pipelines/builder/concept/test_concept_spec_generation.py rename to tests/integration/pipelex/builder/concept/test_concept_spec_generation.py index d2c538e4e..524b1b2df 100644 --- a/tests/integration/pipelex/libraries/pipelines/builder/concept/test_concept_spec_generation.py +++ b/tests/integration/pipelex/builder/concept/test_concept_spec_generation.py @@ -1,12 +1,12 @@ import pytest from pipelex import log, pretty_print +from pipelex.builder.concept.concept_spec import ConceptSpec from pipelex.cogt.llm.llm_job_components import LLMJobConfig, LLMJobParams from pipelex.cogt.llm.llm_job_factory import LLMJobFactory from pipelex.cogt.llm.llm_prompt import LLMPrompt from pipelex.hub import get_llm_worker -from pipelex.libraries.pipelines.builder.concept.concept_spec import ConceptSpec -from tests.integration.pipelex.libraries.pipelines.builder.concept.integration_test_data import ConceptSpecGenerationTestCases +from tests.integration.pipelex.builder.concept.integration_test_data import ConceptSpecGenerationTestCases @pytest.mark.llm diff --git a/tests/integration/pipelex/pipes/test_flow_factory.py b/tests/integration/pipelex/pipes/test_flow_factory.py index 9b4530b69..3710bd27e 100644 --- a/tests/integration/pipelex/pipes/test_flow_factory.py +++ b/tests/integration/pipelex/pipes/test_flow_factory.py @@ -4,7 +4,7 @@ import pytest from pipelex import log, pretty_print -from pipelex.libraries.pipelines.builder.flow_factory import FlowFactory +from pipelex.builder.flow_factory import FlowFactory from pipelex.pipe_controllers.sequence.pipe_sequence_blueprint import PipeSequenceBlueprint from pipelex.tools.misc.file_utils import get_incremental_directory_path, remove_folder from pipelex.tools.misc.json_utils import save_as_json_to_path diff --git a/tests/unit/pipelex/libraries/pipelines/builder/concept/test_concept_blueprint.py b/tests/unit/pipelex/builder/concept/test_concept_blueprint.py similarity index 86% rename from tests/unit/pipelex/libraries/pipelines/builder/concept/test_concept_blueprint.py rename to tests/unit/pipelex/builder/concept/test_concept_blueprint.py index 3beb63904..4f3323bbd 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/concept/test_concept_blueprint.py +++ b/tests/unit/pipelex/builder/concept/test_concept_blueprint.py @@ -1,9 +1,9 @@ import pytest from pipelex import log +from pipelex.builder.concept.concept_spec import ConceptSpec from pipelex.core.concepts.concept_blueprint import ConceptBlueprint -from pipelex.libraries.pipelines.builder.concept.concept_spec import ConceptSpec -from tests.unit.pipelex.libraries.pipelines.builder.concept.test_data import ConceptBlueprintTestCases, ConceptCodeValidationTestCases +from tests.unit.pipelex.builder.concept.test_data import ConceptBlueprintTestCases, ConceptCodeValidationTestCases class TestConceptBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/concept/test_data.py b/tests/unit/pipelex/builder/concept/test_data.py similarity index 98% rename from tests/unit/pipelex/libraries/pipelines/builder/concept/test_data.py rename to tests/unit/pipelex/builder/concept/test_data.py index 9ea82f688..5fd196f8d 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/concept/test_data.py +++ b/tests/unit/pipelex/builder/concept/test_data.py @@ -1,7 +1,7 @@ from typing import ClassVar +from pipelex.builder.concept.concept_spec import ConceptSpec, ConceptStructureSpec, ConceptStructureSpecFieldType from pipelex.core.concepts.concept_blueprint import ConceptBlueprint, ConceptStructureBlueprint, ConceptStructureBlueprintFieldType -from pipelex.libraries.pipelines.builder.concept.concept_spec import ConceptSpec, ConceptStructureSpec, ConceptStructureSpecFieldType class ConceptBlueprintTestCases: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_batch/test_data.py similarity index 95% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_batch/test_data.py index 5a928c830..e51f941e2 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_batch/test_data.py @@ -1,7 +1,7 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_batch_spec import PipeBatchSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_batch_spec import PipeBatchSpec from pipelex.pipe_controllers.batch.pipe_batch_blueprint import PipeBatchBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_pipe_batch.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_batch/test_pipe_batch.py similarity index 72% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_pipe_batch.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_batch/test_pipe_batch.py index d24b48b48..804a78db4 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_batch/test_pipe_batch.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_batch/test_pipe_batch.py @@ -1,8 +1,8 @@ import pytest -from pipelex.libraries.pipelines.builder.pipe.pipe_batch_spec import PipeBatchSpec +from pipelex.builder.pipe.pipe_batch_spec import PipeBatchSpec from pipelex.pipe_controllers.batch.pipe_batch_blueprint import PipeBatchBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_controller.pipe_batch.test_data import PipeBatchTestCases +from tests.unit.pipelex.builder.pipe.pipe_controller.pipe_batch.test_data import PipeBatchTestCases class TestPipeBatchBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_condition/test_data.py similarity index 94% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_condition/test_data.py index e98a63abd..b9394b22c 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_condition/test_data.py @@ -1,7 +1,7 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_condition_spec import PipeConditionSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_condition_spec import PipeConditionSpec from pipelex.pipe_controllers.condition.pipe_condition_blueprint import PipeConditionBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_pipe_condition.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_condition/test_pipe_condition.py similarity index 72% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_pipe_condition.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_condition/test_pipe_condition.py index 3ec34ceb7..4fece994d 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_condition/test_pipe_condition.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_condition/test_pipe_condition.py @@ -1,8 +1,8 @@ import pytest -from pipelex.libraries.pipelines.builder.pipe.pipe_condition_spec import PipeConditionSpec +from pipelex.builder.pipe.pipe_condition_spec import PipeConditionSpec from pipelex.pipe_controllers.condition.pipe_condition_blueprint import PipeConditionBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_controller.pipe_condition.test_data import PipeConditionTestCases +from tests.unit.pipelex.builder.pipe.pipe_controller.pipe_condition.test_data import PipeConditionTestCases class TestPipeConditionBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_parallel/test_data.py similarity index 96% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_parallel/test_data.py index f846bada6..8d260a49e 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_parallel/test_data.py @@ -1,8 +1,8 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_parallel_spec import PipeParallelSpec +from pipelex.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_parallel_spec import PipeParallelSpec -from pipelex.libraries.pipelines.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.pipe_controllers.parallel.pipe_parallel_blueprint import PipeParallelBlueprint from pipelex.pipe_controllers.sub_pipe_blueprint import SubPipeBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_pipe_parallel.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_parallel/test_pipe_parallel.py similarity index 75% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_pipe_parallel.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_parallel/test_pipe_parallel.py index 64db9b946..23750cb39 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_parallel/test_pipe_parallel.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_parallel/test_pipe_parallel.py @@ -1,9 +1,9 @@ import pytest from pipelex import log -from pipelex.libraries.pipelines.builder.pipe.pipe_parallel_spec import PipeParallelSpec +from pipelex.builder.pipe.pipe_parallel_spec import PipeParallelSpec from pipelex.pipe_controllers.parallel.pipe_parallel_blueprint import PipeParallelBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_controller.pipe_parallel.test_data import PipeParallelTestCases +from tests.unit.pipelex.builder.pipe.pipe_controller.pipe_parallel.test_data import PipeParallelTestCases class TestPipeParallelBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_sequence/test_data.py similarity index 94% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_sequence/test_data.py index cdda545ef..321999083 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_sequence/test_data.py @@ -1,8 +1,8 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_sequence_spec import PipeSequenceSpec +from pipelex.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_sequence_spec import PipeSequenceSpec -from pipelex.libraries.pipelines.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.pipe_controllers.sequence.pipe_sequence_blueprint import PipeSequenceBlueprint from pipelex.pipe_controllers.sub_pipe_blueprint import SubPipeBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_pipe_sequence.py b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_sequence/test_pipe_sequence.py similarity index 72% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_pipe_sequence.py rename to tests/unit/pipelex/builder/pipe/pipe_controller/pipe_sequence/test_pipe_sequence.py index 3acda38a6..6cd1fee7e 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_controller/pipe_sequence/test_pipe_sequence.py +++ b/tests/unit/pipelex/builder/pipe/pipe_controller/pipe_sequence/test_pipe_sequence.py @@ -1,8 +1,8 @@ import pytest -from pipelex.libraries.pipelines.builder.pipe.pipe_sequence_spec import PipeSequenceSpec +from pipelex.builder.pipe.pipe_sequence_spec import PipeSequenceSpec from pipelex.pipe_controllers.sequence.pipe_sequence_blueprint import PipeSequenceBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_controller.pipe_sequence.test_data import PipeSequenceTestCases +from tests.unit.pipelex.builder.pipe.pipe_controller.pipe_sequence.test_data import PipeSequenceTestCases class TestPipeSequenceBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_compose/test_data.py similarity index 94% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_compose/test_data.py index be167dbfe..18d72ec75 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_compose/test_data.py @@ -1,10 +1,10 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_compose_spec import PipeComposeSpec from pipelex.cogt.templating.template_blueprint import TemplateBlueprint from pipelex.cogt.templating.template_category import TemplateCategory from pipelex.cogt.templating.templating_style import TagStyle, TemplatingStyle, TextFormat from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_compose_spec import PipeComposeSpec from pipelex.pipe_operators.compose.pipe_compose_blueprint import PipeComposeBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_pipe_compose.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_compose/test_pipe_compose.py similarity index 72% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_pipe_compose.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_compose/test_pipe_compose.py index 0327a6aae..8af557694 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_compose/test_pipe_compose.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_compose/test_pipe_compose.py @@ -1,8 +1,8 @@ import pytest -from pipelex.libraries.pipelines.builder.pipe.pipe_compose_spec import PipeComposeSpec +from pipelex.builder.pipe.pipe_compose_spec import PipeComposeSpec from pipelex.pipe_operators.compose.pipe_compose_blueprint import PipeComposeBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_operator.pipe_compose.test_data import PipeComposeTestCases +from tests.unit.pipelex.builder.pipe.pipe_operator.pipe_compose.test_data import PipeComposeTestCases class TestPipeComposeBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_extract/test_data.py similarity index 95% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_extract/test_data.py index e26e5bf83..0de5da9a6 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_extract/test_data.py @@ -1,7 +1,7 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_extract_spec import PipeExtractSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_extract_spec import PipeExtractSpec from pipelex.pipe_operators.extract.pipe_extract_blueprint import PipeExtractBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_pipe_extract.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_extract/test_pipe_extract.py similarity index 74% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_pipe_extract.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_extract/test_pipe_extract.py index 49ef48d46..3b7b96198 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_extract/test_pipe_extract.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_extract/test_pipe_extract.py @@ -1,9 +1,9 @@ import pytest from pipelex import log -from pipelex.libraries.pipelines.builder.pipe.pipe_extract_spec import PipeExtractSpec +from pipelex.builder.pipe.pipe_extract_spec import PipeExtractSpec from pipelex.pipe_operators.extract.pipe_extract_blueprint import PipeExtractBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_operator.pipe_extract.test_data import PipeExtractTestCases +from tests.unit.pipelex.builder.pipe.pipe_operator.pipe_extract.test_data import PipeExtractTestCases class TestPipeExtractBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_func/test_data.py similarity index 94% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_func/test_data.py index 9bd1d6785..1a0af343b 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_func/test_data.py @@ -1,7 +1,7 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_func_spec import PipeFuncSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_func_spec import PipeFuncSpec from pipelex.pipe_operators.func.pipe_func_blueprint import PipeFuncBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_pipe_func.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_func/test_pipe_func.py similarity index 73% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_pipe_func.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_func/test_pipe_func.py index 063c4ef0c..59496b93f 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_func/test_pipe_func.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_func/test_pipe_func.py @@ -1,8 +1,8 @@ import pytest -from pipelex.libraries.pipelines.builder.pipe.pipe_func_spec import PipeFuncSpec +from pipelex.builder.pipe.pipe_func_spec import PipeFuncSpec from pipelex.pipe_operators.func.pipe_func_blueprint import PipeFuncBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_operator.pipe_func.test_data import PipeFuncTestCases +from tests.unit.pipelex.builder.pipe.pipe_operator.pipe_func.test_data import PipeFuncTestCases class TestPipeFuncBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_img_gen/test_data.py similarity index 95% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_img_gen/test_data.py index eb7fdeaee..93802c05a 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_img_gen/test_data.py @@ -1,7 +1,7 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_img_gen_spec import ImgGenSkill, PipeImgGenSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_img_gen_spec import ImgGenSkill, PipeImgGenSpec from pipelex.pipe_operators.img_gen.pipe_img_gen_blueprint import PipeImgGenBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_pipe_img_gen.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_img_gen/test_pipe_img_gen.py similarity index 72% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_pipe_img_gen.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_img_gen/test_pipe_img_gen.py index c30257cf2..e29dbf9d2 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_img_gen/test_pipe_img_gen.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_img_gen/test_pipe_img_gen.py @@ -1,8 +1,8 @@ import pytest -from pipelex.libraries.pipelines.builder.pipe.pipe_img_gen_spec import PipeImgGenSpec +from pipelex.builder.pipe.pipe_img_gen_spec import PipeImgGenSpec from pipelex.pipe_operators.img_gen.pipe_img_gen_blueprint import PipeImgGenBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_operator.pipe_img_gen.test_data import PipeImgGenTestCases +from tests.unit.pipelex.builder.pipe.pipe_operator.pipe_img_gen.test_data import PipeImgGenTestCases class TestPipeImgGenBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/test_data.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_llm/test_data.py similarity index 98% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/test_data.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_llm/test_data.py index 8821adff8..523bd3361 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/test_data.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_llm/test_data.py @@ -1,8 +1,8 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_llm_spec import PipeLLMSpec from pipelex.cogt.llm.llm_setting import LLMSetting from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_llm_spec import PipeLLMSpec from pipelex.pipe_operators.llm.pipe_llm_blueprint import PipeLLMBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/test_pipe_llm.py b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_llm/test_pipe_llm.py similarity index 81% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/test_pipe_llm.py rename to tests/unit/pipelex/builder/pipe/pipe_operator/pipe_llm/test_pipe_llm.py index 6609fa589..fa02b5156 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/pipe_operator/pipe_llm/test_pipe_llm.py +++ b/tests/unit/pipelex/builder/pipe/pipe_operator/pipe_llm/test_pipe_llm.py @@ -1,10 +1,10 @@ import pytest from pipelex import pretty_print -from pipelex.libraries.pipelines.builder.pipe.pipe_llm_spec import PipeLLMSpec +from pipelex.builder.pipe.pipe_llm_spec import PipeLLMSpec from pipelex.pipe_operators.llm.pipe_llm_blueprint import PipeLLMBlueprint from pipelex.pipe_operators.llm.pipe_llm_factory import PipeLLMFactory -from tests.unit.pipelex.libraries.pipelines.builder.pipe.pipe_operator.pipe_llm.test_data import PipeLLMTestCases +from tests.unit.pipelex.builder.pipe.pipe_operator.pipe_llm.test_data import PipeLLMTestCases class TestPipeLLMBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_inputs.py b/tests/unit/pipelex/builder/pipe/test_data_inputs.py similarity index 100% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_inputs.py rename to tests/unit/pipelex/builder/pipe/test_data_inputs.py diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_pipe.py b/tests/unit/pipelex/builder/pipe/test_data_pipe.py similarity index 96% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_pipe.py rename to tests/unit/pipelex/builder/pipe/test_data_pipe.py index 93cd684ab..2c0be069c 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_pipe.py +++ b/tests/unit/pipelex/builder/pipe/test_data_pipe.py @@ -1,8 +1,8 @@ from typing import ClassVar +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint from pipelex.core.pipes.pipe_blueprint import PipeBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec class PipeBlueprintTestCases: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_sub_pipe.py b/tests/unit/pipelex/builder/pipe/test_data_sub_pipe.py similarity index 94% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_sub_pipe.py rename to tests/unit/pipelex/builder/pipe/test_data_sub_pipe.py index 23ae59a9f..be01916c3 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_data_sub_pipe.py +++ b/tests/unit/pipelex/builder/pipe/test_data_sub_pipe.py @@ -1,6 +1,6 @@ from typing import ClassVar -from pipelex.libraries.pipelines.builder.pipe.sub_pipe_spec import SubPipeSpec +from pipelex.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.pipe_controllers.sub_pipe_blueprint import SubPipeBlueprint diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_inputs_blueprint.py b/tests/unit/pipelex/builder/pipe/test_inputs_blueprint.py similarity index 84% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/test_inputs_blueprint.py rename to tests/unit/pipelex/builder/pipe/test_inputs_blueprint.py index 15a0dcecb..521e9e7e2 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_inputs_blueprint.py +++ b/tests/unit/pipelex/builder/pipe/test_inputs_blueprint.py @@ -1,7 +1,7 @@ import pytest from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.test_data_inputs import InputRequirementTestCases +from tests.unit.pipelex.builder.pipe.test_data_inputs import InputRequirementTestCases class TestInputRequirementBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_pipe_blueprint.py b/tests/unit/pipelex/builder/pipe/test_pipe_blueprint.py similarity index 73% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/test_pipe_blueprint.py rename to tests/unit/pipelex/builder/pipe/test_pipe_blueprint.py index 2259ca6c5..24748240a 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_pipe_blueprint.py +++ b/tests/unit/pipelex/builder/pipe/test_pipe_blueprint.py @@ -1,8 +1,8 @@ import pytest +from pipelex.builder.pipe.pipe_signature import PipeSpec from pipelex.core.pipes.pipe_blueprint import PipeBlueprint -from pipelex.libraries.pipelines.builder.pipe.pipe_signature import PipeSpec -from tests.unit.pipelex.libraries.pipelines.builder.pipe.test_data_pipe import PipeBlueprintTestCases +from tests.unit.pipelex.builder.pipe.test_data_pipe import PipeBlueprintTestCases class TestPipeBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_sub_pipe_blueprint.py b/tests/unit/pipelex/builder/pipe/test_sub_pipe_blueprint.py similarity index 73% rename from tests/unit/pipelex/libraries/pipelines/builder/pipe/test_sub_pipe_blueprint.py rename to tests/unit/pipelex/builder/pipe/test_sub_pipe_blueprint.py index 42f8b6e99..5b21da5fb 100644 --- a/tests/unit/pipelex/libraries/pipelines/builder/pipe/test_sub_pipe_blueprint.py +++ b/tests/unit/pipelex/builder/pipe/test_sub_pipe_blueprint.py @@ -1,8 +1,8 @@ import pytest -from pipelex.libraries.pipelines.builder.pipe.sub_pipe_spec import SubPipeSpec +from pipelex.builder.pipe.sub_pipe_spec import SubPipeSpec from pipelex.pipe_controllers.sub_pipe_blueprint import SubPipeBlueprint -from tests.unit.pipelex.libraries.pipelines.builder.pipe.test_data_sub_pipe import SubPipeTestCases +from tests.unit.pipelex.builder.pipe.test_data_sub_pipe import SubPipeTestCases class TestSubPipeBlueprintConversion: diff --git a/tests/unit/pipelex/libraries/pipelines/builder/test_pipelex_bundle_blueprint.py b/tests/unit/pipelex/builder/test_pipelex_bundle_blueprint.py similarity index 100% rename from tests/unit/pipelex/libraries/pipelines/builder/test_pipelex_bundle_blueprint.py rename to tests/unit/pipelex/builder/test_pipelex_bundle_blueprint.py From 902c6b694059dfdaeed67189bdd33ed0d0e2e5bf Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 01:48:41 +0200 Subject: [PATCH 044/115] Avoid executing modules if they don't have StructuredContent classes --- .vscode/launch.json | 4 +- Makefile | 2 +- .../pipeline-creation.md | 2 - .../config-technical/library-config.md | 2 +- docs/pages/configuration/index.md | 2 +- docs/pages/tools/cli.md | 4 +- pipelex/cli/commands/build_cmd.py | 8 +- pipelex/cli/commands/common.py | 21 --- pipelex/cli/commands/show_cmd.py | 28 +-- pipelex/cli/commands/validate_cmd.py | 34 +--- pipelex/cogt/model_backends/model_lists.py | 4 +- pipelex/config.py | 8 - pipelex/libraries/library_config.py | 42 ----- pipelex/libraries/library_manager.py | 15 +- pipelex/libraries/library_manager_factory.py | 9 +- pipelex/pipelex.py | 49 +---- pipelex/pipelex.toml | 11 -- pipelex/tools/class_registry_utils.py | 25 ++- pipelex/tools/typing/module_inspector.py | 98 ++++++++++ tests/conftest.py | 2 +- .../pipes/test_pipe_running_variants.py | 16 +- tests/integration/pipelex/test_data.py | 1 + tests/integration/pipelex/test_libraries.py | 5 +- .../tools/typing/test_module_inspector.py | 176 +++++++++++++++++- 24 files changed, 348 insertions(+), 220 deletions(-) delete mode 100644 pipelex/cli/commands/common.py delete mode 100644 pipelex/libraries/library_config.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 942c9c079..a9da599ad 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -16,14 +16,14 @@ "justMyCode": false }, { - "name": "Debug test without inference", + "name": "Debug test boot without inference", "type": "debugpy", "request": "launch", "module": "pytest", "args": [ "-s", "-vv", - "-k test_pipe_llm_with_external_llm_handle" + "-k test_boot" ], "console": "integratedTerminal" }, diff --git a/Makefile b/Makefile index 41c953a9b..7906e5b29 100644 --- a/Makefile +++ b/Makefile @@ -163,7 +163,7 @@ update: env validate: env $(call PRINT_TITLE,"Running setup sequence") - $(VENV_PIPELEX) validate all -c pipelex/libraries + $(VENV_PIPELEX) validate all build: env $(call PRINT_TITLE,"Building the wheels") diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md index a6404d251..0a8087e7b 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md @@ -22,13 +22,11 @@ pipelex build blueprint "BRIEF IN NATURAL LANGUAGE" [OPTIONS] **Example:** ```bash pipelex build blueprint "Take a photo as input, and render the opposite of the photo" \ - -c your/path/to/pipelex/libraries \ -o output/pipeline/file/path ``` **Options:** - `--output, -o`: Output path for generated files -- `--config, -c`: Path to libraries folder ## Complete Workflow diff --git a/docs/pages/configuration/config-technical/library-config.md b/docs/pages/configuration/config-technical/library-config.md index 54f85a6ea..64aff7be5 100644 --- a/docs/pages/configuration/config-technical/library-config.md +++ b/docs/pages/configuration/config-technical/library-config.md @@ -97,7 +97,7 @@ The library system includes specific error types: 2. **Validation**: - - Run `pipelex validate all -c path/to/your/pipelex/config/folder` after making changes + - Run `pipelex validate all` after making changes - Check for domain consistency - Verify concept relationships diff --git a/docs/pages/configuration/index.md b/docs/pages/configuration/index.md index 2c0cf18e2..e43037c86 100644 --- a/docs/pages/configuration/index.md +++ b/docs/pages/configuration/index.md @@ -69,5 +69,5 @@ NB: The run_mode unit_test is used for testing purposes. 1. **Version Control**: Include your base `pipelex.toml` in version control 2. **Environment Overrides**: Use environment-specific files for sensitive or environment-dependent settings 3. **Documentation**: Comment any custom settings for team reference -4. **Validation**: Run `pipelex validate all -c path/to/your/pipelex/config/folder` after making configuration changes +4. **Validation**: Run `pipelex validate all` after making configuration changes 5. **Gitignore**: Add local and sensitive override files to `.gitignore` diff --git a/docs/pages/tools/cli.md b/docs/pages/tools/cli.md index ce7b65f53..15411884e 100644 --- a/docs/pages/tools/cli.md +++ b/docs/pages/tools/cli.md @@ -34,9 +34,9 @@ pipelex show pipe PIPE_CODE [-c/--config-folder-path PATH] ## Usage Tips -1. Always run `pipelex validate all -c path/to/your/pipelex/config/folder` after making changes to your configuration or pipelines +1. Always run `pipelex validate all` after making changes to your configuration or pipelines 2. Use `pipelex show config` to debug configuration issues 3. When initializing a new project: - Start with `pipelex init config` - Then run `pipelex init libraries` - - Finally, validate your setup with `pipelex validate all -c path/to/your/pipelex/config/folder` + - Finally, validate your setup with `pipelex validate all` diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index 65fd2a4b5..7ed881701 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -47,7 +47,7 @@ def build_one_shot_cmd( typer.Option("--no-output", help="Skip saving the pipeline to file"), ] = False, ) -> None: - Pipelex.make(relative_config_folder_path="../../../pipelex/libraries", from_file=True) + Pipelex.make() typer.echo("=" * 70) typer.echo(typer.style("🔥 Starting pipe builder... 🚀", fg=typer.colors.GREEN)) typer.echo("") @@ -100,7 +100,7 @@ def build_pipe_cmd( typer.Option("--no-output", help="Skip saving the pipeline to file"), ] = False, ) -> None: - Pipelex.make(relative_config_folder_path="../../../pipelex/libraries", from_file=True) + Pipelex.make() typer.echo("=" * 70) typer.echo(typer.style("🔥 Starting pipe builder... 🚀", fg=typer.colors.GREEN)) typer.echo("") @@ -148,7 +148,7 @@ def build_partial_cmd( typer.Option("--no-output", help="Skip saving the pipeline to file"), ] = False, ) -> None: - Pipelex.make(relative_config_folder_path="../../../pipelex/libraries", from_file=True) + Pipelex.make() typer.echo("=" * 70) typer.echo(typer.style("🔥 Starting pipe builder... 🚀", fg=typer.colors.GREEN)) typer.echo("") @@ -197,7 +197,7 @@ def build_flow_cmd( typer.Option("--no-output", help="Skip saving the pipeline to file"), ] = False, ) -> None: - Pipelex.make(relative_config_folder_path="../../../pipelex/libraries", from_file=True) + Pipelex.make() typer.echo("=" * 70) typer.echo(typer.style("🔥 Starting pipe builder... 🚀", fg=typer.colors.GREEN)) typer.echo("") diff --git a/pipelex/cli/commands/common.py b/pipelex/cli/commands/common.py deleted file mode 100644 index 8b567cf65..000000000 --- a/pipelex/cli/commands/common.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import annotations - -import os -from typing import Final - -REQUIRED_PIPELEX_SUBDIRS: Final[list[str]] = [ - "pipelines", -] - - -def is_pipelex_libraries_folder(folder_path: str) -> bool: - """Check if the given folder path contains a valid pipelex libraries structure.""" - if not os.path.exists(folder_path) or not os.path.isdir(folder_path): - return False - - for subdir in REQUIRED_PIPELEX_SUBDIRS: - subdir_path = os.path.join(folder_path, subdir) - if not os.path.exists(subdir_path) or not os.path.isdir(subdir_path): - return False - - return True diff --git a/pipelex/cli/commands/show_cmd.py b/pipelex/cli/commands/show_cmd.py index f7068cb11..be6b14698 100644 --- a/pipelex/cli/commands/show_cmd.py +++ b/pipelex/cli/commands/show_cmd.py @@ -26,9 +26,9 @@ def do_show_config() -> None: raise PipelexConfigError(msg) from exc -def do_list_pipes(relative_config_folder_path: str = "pipelex_libraries") -> None: +def do_list_pipes() -> None: """List all available pipes.""" - Pipelex.make(relative_config_folder_path=relative_config_folder_path, from_file=False) + Pipelex.make() try: get_pipe_library().pretty_list_pipes() @@ -37,9 +37,9 @@ def do_list_pipes(relative_config_folder_path: str = "pipelex_libraries") -> Non raise PipelexCLIError(msg) from exc -def do_show_pipe(pipe_code: str, relative_config_folder_path: str = "./pipelex_libraries") -> None: +def do_show_pipe(pipe_code: str) -> None: """Show a single pipe definition from the library.""" - Pipelex.make(relative_config_folder_path=relative_config_folder_path, from_file=False) + Pipelex.make() pipe = get_required_pipe(pipe_code=pipe_code) pretty_print(pipe, title=f"Pipe '{pipe_code}'") @@ -54,33 +54,20 @@ def show_config_cmd() -> None: @show_app.command("pipes") -def list_pipes_cmd( - relative_config_folder_path: Annotated[ - str, - typer.Option("--config-folder-path", "-c", help="Relative path to the config folder path"), - ] = "pipelex_libraries", -) -> None: - do_list_pipes(relative_config_folder_path=relative_config_folder_path) +def list_pipes_cmd() -> None: + do_list_pipes() @show_app.command("pipe") def show_pipe_cmd( pipe_code: Annotated[str, typer.Argument(help="Pipeline code to show definition for")], - relative_config_folder_path: Annotated[ - str, - typer.Option("--config-folder-path", "-c", help="Relative path to the config folder path"), - ] = "./pipelex_libraries", ) -> None: - do_show_pipe(pipe_code=pipe_code, relative_config_folder_path=relative_config_folder_path) + do_show_pipe(pipe_code=pipe_code) @show_app.command("models") def show_models_cmd( backend_name: Annotated[str, typer.Argument(help="Backend name to list models for")], - relative_config_folder_path: Annotated[ - str, - typer.Option("--config-folder-path", "-c", help="Relative path to the config folder path"), - ] = "./pipelex_libraries", flat: Annotated[ bool, typer.Option("--flat", "-f", help="Output in flat CSV format for easy copy-pasting"), @@ -89,7 +76,6 @@ def show_models_cmd( asyncio.run( ModelLister.list_models( backend_name=backend_name, - relative_config_folder_path=relative_config_folder_path, flat=flat, ) ) diff --git a/pipelex/cli/commands/validate_cmd.py b/pipelex/cli/commands/validate_cmd.py index 01c05af4d..6deb49c50 100644 --- a/pipelex/cli/commands/validate_cmd.py +++ b/pipelex/cli/commands/validate_cmd.py @@ -6,33 +6,22 @@ import typer from pipelex import log -from pipelex.cli.commands.common import is_pipelex_libraries_folder from pipelex.hub import get_pipeline_tracker, get_pipes, get_required_pipe from pipelex.pipe_run.dry_run import dry_run_pipe, dry_run_pipes from pipelex.pipelex import Pipelex -def do_validate_all_libraries_and_dry_run(relative_config_folder_path: str = "./pipelex_libraries") -> None: +def do_validate_all_libraries_and_dry_run() -> None: """Validate libraries and dry-run all pipes.""" - if not is_pipelex_libraries_folder(relative_config_folder_path): - typer.echo(f"❌ No pipelex libraries folder found at '{relative_config_folder_path}'") - typer.echo("To create a pipelex libraries folder, run: pipelex init-libraries") - raise typer.Exit(1) - - pipelex_instance = Pipelex.make(relative_config_folder_path=relative_config_folder_path, from_file=False) + pipelex_instance = Pipelex.make() pipelex_instance.validate_libraries() asyncio.run(dry_run_pipes(pipes=get_pipes(), raise_on_failure=True)) log.info("Setup sequence passed OK, config and pipelines are validated.") -def do_dry_run_pipe(pipe_code: str, relative_config_folder_path: str = "./pipelex_libraries") -> None: +def do_dry_run_pipe(pipe_code: str) -> None: """Dry run a single pipe.""" - if not is_pipelex_libraries_folder(relative_config_folder_path): - typer.echo(f"❌ No pipelex libraries folder found at '{relative_config_folder_path}'") - typer.echo("To create a pipelex libraries folder, run: pipelex init-libraries") - raise typer.Exit(1) - - pipelex_instance = Pipelex.make(relative_config_folder_path=relative_config_folder_path, from_file=False) + pipelex_instance = Pipelex.make() pipelex_instance.validate_libraries() asyncio.run( @@ -49,21 +38,12 @@ def do_dry_run_pipe(pipe_code: str, relative_config_folder_path: str = "./pipele @validate_app.command("all") -def validate_all_cmd( - relative_config_folder_path: Annotated[ - str, - typer.Option("--config-folder-path", "-c", help="Relative path to the config folder path"), - ] = "./pipelex_libraries", -) -> None: - do_validate_all_libraries_and_dry_run(relative_config_folder_path=relative_config_folder_path) +def validate_all_cmd() -> None: + do_validate_all_libraries_and_dry_run() @validate_app.command("pipe") def dry_run_pipe_cmd( pipe_code: Annotated[str, typer.Argument(help="The pipe code to dry run")], - relative_config_folder_path: Annotated[ - str, - typer.Option("--config-folder-path", "-c", help="Relative path to the config folder path"), - ] = "./pipelex_libraries", ) -> None: - do_dry_run_pipe(pipe_code=pipe_code, relative_config_folder_path=relative_config_folder_path) + do_dry_run_pipe(pipe_code=pipe_code) diff --git a/pipelex/cogt/model_backends/model_lists.py b/pipelex/cogt/model_backends/model_lists.py index b7f7aec82..777571c18 100644 --- a/pipelex/cogt/model_backends/model_lists.py +++ b/pipelex/cogt/model_backends/model_lists.py @@ -31,17 +31,15 @@ class ModelLister: async def list_models( cls, backend_name: str, - relative_config_folder_path: str = "./pipelex_libraries", flat: bool = False, ) -> None: """List available models for a specific backend. Args: backend_name: Name of the backend to list models for - relative_config_folder_path: Path to pipelex libraries config flat: Whether to output in flat CSV format """ - Pipelex.make(relative_config_folder_path=relative_config_folder_path, from_file=False) + Pipelex.make() try: backend = get_models_manager().get_required_inference_backend(backend_name) diff --git a/pipelex/config.py b/pipelex/config.py index db7572ef3..a6437e5d8 100644 --- a/pipelex/config.py +++ b/pipelex/config.py @@ -9,7 +9,6 @@ from pipelex.exceptions import PipelexConfigError, StaticValidationErrorType from pipelex.hub import get_required_config from pipelex.language.plx_config import PlxConfig -from pipelex.libraries.library_config import LibraryConfig from pipelex.pipeline.track.tracker_config import TrackerConfig from pipelex.tools.aws.aws_config import AwsConfig from pipelex.tools.config.config_model import ConfigModel @@ -62,11 +61,6 @@ def validate_image_urls(cls, value: list[str]) -> list[str]: return value -class GenericTemplateNames(ConfigModel): - structure_from_preliminary_text_user: str - structure_from_preliminary_text_system: str - - class StructureConfig(ConfigModel): is_default_text_then_structure: bool @@ -105,9 +99,7 @@ class Pipelex(ConfigModel): log_config: LogConfig aws_config: AwsConfig - library_config: LibraryConfig static_validation_config: StaticValidationConfig - generic_template_names: GenericTemplateNames tracker_config: TrackerConfig structure_config: StructureConfig prompting_config: PromptingConfig diff --git a/pipelex/libraries/library_config.py b/pipelex/libraries/library_config.py deleted file mode 100644 index 94816ca9f..000000000 --- a/pipelex/libraries/library_config.py +++ /dev/null @@ -1,42 +0,0 @@ -from pathlib import Path -from typing import ClassVar - -from pipelex.tools.config.config_model import ConfigModel - -# PIPELEX_LIBRARIES_PATH = "libraries" - - -class LibraryConfig(ConfigModel): - package_name: ClassVar[str] = "pipelex" - config_dir_path: str = "pipelex_libraries" - - @property - def test_pipelines_dir_path(self) -> str: - return "tests/test_pipelines" - - @property - def failing_pipelines_file_paths(self) -> set[Path]: - return {Path("tests/test_pipelines/failing_pipelines.plx")} - - # def export_libraries(self, overwrite: bool = False) -> None: - # """Duplicate pipelex libraries files in the client project, preserving directory structure.""" - # copy_file_from_package( - # package_name=self.package_name, - # file_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/__init__.py", - # target_path=f"{self.config_dir_path}/__init__.py", - # overwrite=overwrite, - # ) - - # # pipelines - # copy_folder_from_package( - # package_name=self.package_name, - # folder_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/pipelines", - # target_dir=self.base_pipelines_dir_path, - # overwrite=overwrite, - # ) - # copy_file_from_package( - # package_name=self.package_name, - # file_path_in_package=f"{PIPELEX_LIBRARIES_PATH}/pipelines/__init__.py", - # target_path=f"{self.pipelines_dir_path}/__init__.py", - # overwrite=overwrite, - # ) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 646130896..b028c8df7 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -5,7 +5,6 @@ from typing_extensions import override from pipelex import log -from pipelex.config import get_config from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_factory import ConceptFactory @@ -32,7 +31,6 @@ PipeLibraryError, PipeLoadingError, ) -from pipelex.libraries.library_config import LibraryConfig from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.tools.class_registry_utils import ClassRegistryUtils from pipelex.tools.config.manager import config_manager @@ -68,12 +66,10 @@ def __init__( domain_library: DomainLibrary, concept_library: ConceptLibrary, pipe_library: PipeLibrary, - library_config: LibraryConfig, ): self.domain_library = domain_library self.concept_library = concept_library self.pipe_library = pipe_library - self.library_config = library_config @override def validate_libraries(self): @@ -267,12 +263,17 @@ def load_libraries( else: all_plx_paths: list[Path] = self._get_pipelex_plx_files_from_dirs(dirs_to_use) # Remove failing pipelines from the list - failing_pipelines_file_paths = get_config().pipelex.library_config.failing_pipelines_file_paths - valid_plx_paths = [path for path in all_plx_paths if path not in failing_pipelines_file_paths] + # failing_pipelines_file_paths = get_config().pipelex.library_config.failing_pipelines_file_paths + # valid_plx_paths = [path for path in all_plx_paths if path not in failing_pipelines_file_paths] + valid_plx_paths = all_plx_paths # Import modules to load them into sys.modules (but don't register classes yet) for library_dir in dirs_to_use: - ClassRegistryUtils.import_modules_in_folder(folder_path=str(library_dir)) + # Only import files that contain StructuredContent subclasses (uses AST pre-check) + ClassRegistryUtils.import_modules_in_folder( + folder_path=str(library_dir), + base_class_names=["StructuredContent"], + ) FuncRegistryUtils.register_funcs_in_folder(folder_path=str(library_dir)) # Auto-discover and register all StructuredContent classes from sys.modules diff --git a/pipelex/libraries/library_manager_factory.py b/pipelex/libraries/library_manager_factory.py index c63ff4412..47631d1df 100644 --- a/pipelex/libraries/library_manager_factory.py +++ b/pipelex/libraries/library_manager_factory.py @@ -1,7 +1,6 @@ from pipelex.core.concepts.concept_library import ConceptLibrary from pipelex.core.domains.domain_library import DomainLibrary from pipelex.core.pipes.pipe_library import PipeLibrary -from pipelex.libraries.library_config import LibraryConfig from pipelex.libraries.library_manager import LibraryManager @@ -9,17 +8,15 @@ class LibraryManagerFactory: """Factory for creating LibraryManager instances.""" @classmethod - def make_empty(cls, config_dir_path: str) -> "LibraryManager": + def make_empty(cls) -> "LibraryManager": domain_library = DomainLibrary.make_empty() concept_library = ConceptLibrary.make_empty() pipe_library = PipeLibrary.make_empty() - library_config = LibraryConfig(config_dir_path=config_dir_path) return LibraryManager( domain_library=domain_library, concept_library=concept_library, pipe_library=pipe_library, - library_config=library_config, ) @classmethod @@ -28,14 +25,10 @@ def make( domain_library: DomainLibrary, concept_library: ConceptLibrary, pipe_library: PipeLibrary, - config_dir_path: str, ) -> "LibraryManager": """Create a LibraryManager with provided libraries.""" - library_config = LibraryConfig(config_dir_path=config_dir_path) - return LibraryManager( domain_library=domain_library, concept_library=concept_library, pipe_library=pipe_library, - library_config=library_config, ) diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index ebbfd0ff8..8ec30b83b 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -1,5 +1,3 @@ -import inspect -import os from importlib.metadata import metadata from typing import cast @@ -70,7 +68,7 @@ class Pipelex(metaclass=MetaSingleton): def __init__( self, - config_dir_path: str, + config_dir_path: str = "./pipelex", # Dependency injection pipelex_hub: PipelexHub | None = None, config_cls: type[ConfigRoot] | None = None, @@ -134,7 +132,6 @@ def __init__( domain_library=domain_library, concept_library=concept_library, pipe_library=pipe_library, - config_dir_path=config_dir_path, ) self.pipelex_hub.set_library_manager(library_manager=self.library_manager) @@ -301,58 +298,24 @@ def teardown(self): # TODO: add kwargs to make() so that subclasses can employ specific parameters @classmethod - def make( - cls, - relative_config_folder_path: str | None = None, - absolute_config_folder_path: str | None = None, - from_file: bool | None = True, - ) -> Self: + def make(cls) -> Self: """Create and initialize a Pipelex instance. - Args: - relative_config_folder_path: Path to config folder relative to either the caller file or current working directory. - Cannot be used together with absolute_config_folder_path. - absolute_config_folder_path: Absolute path to config folder. - Cannot be used together with relative_config_folder_path. - from_file: Only used when relative_config_folder_path is provided. - If True (default), the relative path is resolved relative to the file where make() was called. - If False, the relative path is resolved relative to the current working directory (useful for CLI scenarios). - Returns: Initialized Pipelex instance. Raises: - PipelexSetupError: If both relative_config_folder_path and absolute_config_folder_path are provided. - Or if frame inspection fails when using relative paths with from_file=True. + if setup fails Note: If neither path is provided, defaults to "./pipelex_libraries". """ - if relative_config_folder_path is not None and absolute_config_folder_path is not None: - msg = "Cannot specify both relative_config_folder_path and absolute_config_folder_path" + if cls.get_optional_instance() is not None: + msg = "Pipelex is already initialized" raise PipelexSetupError(msg) - if relative_config_folder_path is not None: - if from_file: - current_frame = inspect.currentframe() - if current_frame is None: - msg = "Could not find relative config folder path because of: Failed to get current frame" - raise PipelexSetupError(msg) - if current_frame.f_back is None: - msg = "Could not find relative config folder path because of: Failed to get caller frame" - raise PipelexSetupError(msg) - caller_file = current_frame.f_back.f_code.co_filename - caller_dir = os.path.dirname(os.path.abspath(caller_file)) - config_dir_path = os.path.abspath(os.path.join(caller_dir, relative_config_folder_path)) - else: - config_dir_path = os.path.abspath(os.path.join(os.getcwd(), relative_config_folder_path)) - elif absolute_config_folder_path is not None: - config_dir_path = absolute_config_folder_path - else: - config_dir_path = "./pipelex_libraries" - - pipelex_instance = cls(config_dir_path=config_dir_path) + pipelex_instance = cls() pipelex_instance.setup() pipelex_instance.setup_libraries() return pipelex_instance diff --git a/pipelex/pipelex.toml b/pipelex/pipelex.toml index 3037153e7..e2add63a3 100644 --- a/pipelex/pipelex.toml +++ b/pipelex/pipelex.toml @@ -186,17 +186,6 @@ gemini = { tag_style = "xml" } [pipelex.structure_config] is_default_text_then_structure = false # turn this to true to get better results: generates text before structuring - -#################################################################################################### -# Pipelex libraries config -#################################################################################################### - -[pipelex.library_config] - -[pipelex.generic_template_names] -structure_from_preliminary_text_system = "structure_from_preliminary_text_system" -structure_from_preliminary_text_user = "structure_from_preliminary_text_user" - #################################################################################################### # Static validation config #################################################################################################### diff --git a/pipelex/tools/class_registry_utils.py b/pipelex/tools/class_registry_utils.py index 04da93099..4c600c876 100644 --- a/pipelex/tools/class_registry_utils.py +++ b/pipelex/tools/class_registry_utils.py @@ -13,7 +13,12 @@ from pydantic.fields import FieldInfo from pipelex import log -from pipelex.tools.typing.module_inspector import ModuleFileError, find_classes_in_module, import_module_from_file +from pipelex.tools.typing.module_inspector import ( + ModuleFileError, + find_classes_in_module, + import_module_from_file, + import_module_from_file_if_has_classes, +) _NoneType = type(None) _UnionType = getattr(types, "UnionType", None) # Py3.10+: types.UnionType @@ -175,15 +180,23 @@ def import_modules_in_folder( cls, folder_path: str, is_recursive: bool = True, + base_class_names: list[str] | None = None, ) -> None: """Import Python modules without registering their classes. This loads modules into sys.modules so their classes are available for discovery by auto_register_all_subclasses(). + If base_class_names is provided, uses AST parsing to first check if files + contain relevant classes before importing them. This avoids executing module-level + code in files that don't contain the classes you're looking for. + Args: folder_path: Path to folder containing Python files is_recursive: Whether to search recursively in subdirectories + base_class_names: Optional list of base class names (e.g. ["StructuredContent"]). + If provided, only imports files that contain classes inheriting + from these base classes. If None, imports all Python files. """ python_files = cls.find_files_in_dir( @@ -194,7 +207,15 @@ def import_modules_in_folder( for python_file in python_files: try: - import_module_from_file(str(python_file)) + if base_class_names is not None: + # Use AST-based import to avoid executing modules without relevant classes + import_module_from_file_if_has_classes( + str(python_file), + base_class_names=base_class_names, + ) + else: + # Import all modules regardless of content + import_module_from_file(str(python_file)) except ModuleFileError: # Expected: file validation issues (directories with .py extension, etc.) # log.debug(f"Skipping file {python_file}: {e}") diff --git a/pipelex/tools/typing/module_inspector.py b/pipelex/tools/typing/module_inspector.py index 3a5271dab..cae600ac4 100644 --- a/pipelex/tools/typing/module_inspector.py +++ b/pipelex/tools/typing/module_inspector.py @@ -1,3 +1,4 @@ +import ast import importlib.util import inspect import os @@ -100,6 +101,103 @@ def _convert_file_path_to_module_path(file_path: str) -> str: return result +def find_class_names_in_file(file_path: str, base_class_names: list[str] | None = None) -> list[str]: + """Find class names in a Python file without executing it using AST parsing. + + This is useful when you want to discover classes without running module-level code. + + Args: + file_path: Path to the Python file to analyze + base_class_names: Optional list of base class names to filter by. + Only returns classes that inherit from these bases. + If None, returns all class definitions. + + Returns: + List of class names found in the file + + Raises: + ModuleFileError: If the file cannot be read or parsed + + """ + # Validate that the file is a Python file + if not file_path.endswith(".py"): + msg = f"File {file_path} is not a Python file (must end with .py)" + raise ModuleFileError(msg) + + # Validate that the path exists and is a file + path = Path(file_path) + if not path.exists() or not path.is_file(): + msg = f"Path {file_path} does not exist or is not a file" + raise ModuleFileError(msg) + + try: + # Read and parse the file + with open(file_path, encoding="utf-8") as f: + source = f.read() + tree = ast.parse(source, filename=file_path) + except Exception as e: + msg = f"Failed to parse {file_path}: {e}" + raise ModuleFileError(msg) from e + + class_names: list[str] = [] + + # Walk through the AST to find class definitions + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + # If no base class filter, add all classes + if base_class_names is None: + class_names.append(node.name) + continue + + # Check if this class inherits from any of the specified base classes + for base in node.bases: + # Handle simple names like "StructuredContent" + if isinstance(base, ast.Name) and base.id in base_class_names: + class_names.append(node.name) + break + # Handle attribute access like "pipelex.StructuredContent" + if isinstance(base, ast.Attribute): + if base.attr in base_class_names: + class_names.append(node.name) + break + + return class_names + + +def import_module_from_file_if_has_classes( + file_path: str, + base_class_names: list[str] | None = None, +) -> Any | None: + """Import a module only if it contains classes (optionally filtered by base class). + + This function uses AST parsing to check if the file contains relevant classes + before importing, avoiding execution of modules that don't have the classes + you're looking for. + + Args: + file_path: Path to the Python file to potentially import + base_class_names: Optional list of base class names to filter by. + Only imports if file contains classes inheriting from these. + If None, imports if file contains any class definitions. + + Returns: + The imported module if it contains relevant classes, None otherwise + + Raises: + ModuleFileError: If the file is not a Python file or cannot be loaded + + """ + # First, use AST to check if file has relevant classes + class_names = find_class_names_in_file(file_path, base_class_names) + + # If no relevant classes found, skip import + if not class_names: + return None + + # File has relevant classes, import it + return import_module_from_file(file_path) + + def find_classes_in_module( module: Any, base_class: type[Any] | None, diff --git a/tests/conftest.py b/tests/conftest.py index e765fdfc5..108c4e6a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,7 +20,7 @@ def reset_pipelex_config_fixture(): # Code to run before each test Console().print("[magenta]pipelex setup[/magenta]") try: - pipelex_instance = pipelex.pipelex.Pipelex.make(relative_config_folder_path="../pipelex/libraries") + pipelex_instance = pipelex.pipelex.Pipelex.make() config = get_config() log.verbose(config, title="Test config") assert isinstance(config, pipelex.config.PipelexConfig) diff --git a/tests/integration/pipelex/pipes/test_pipe_running_variants.py b/tests/integration/pipelex/pipes/test_pipe_running_variants.py index fc1d0a738..b97a9e599 100644 --- a/tests/integration/pipelex/pipes/test_pipe_running_variants.py +++ b/tests/integration/pipelex/pipes/test_pipe_running_variants.py @@ -1,14 +1,12 @@ -from pathlib import Path from typing import Any import pytest from pytest import FixtureRequest from pipelex import log, pretty_print -from pipelex.config import get_config from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory from pipelex.core.stuffs.stuff import Stuff -from pipelex.hub import get_library_manager, get_pipe_router, get_required_pipe +from pipelex.hub import get_pipe_router, get_required_pipe from pipelex.pipe_run.pipe_job_factory import PipeJobFactory from pipelex.pipe_run.pipe_run_params import PipeOutputMultiplicity, PipeRunMode from pipelex.pipe_run.pipe_run_params_factory import PipeRunParamsFactory @@ -120,13 +118,13 @@ async def test_pipe_infinite_loop( exception: type[Exception], expected_error_message: str, ): - failing_pipelines_file_paths = get_config().pipelex.library_config.failing_pipelines_file_paths - library_manager = get_library_manager() + # failing_pipelines_file_paths = get_config().pipelex.library_config.failing_pipelines_file_paths + # library_manager = get_library_manager() # Reset library to avoid pipe name collisions from previous test runs - library_manager.reset() - library_manager.load_libraries( - library_file_paths=[Path(failing_pipeline_file_path) for failing_pipeline_file_path in failing_pipelines_file_paths], - ) + # library_manager.reset() + # library_manager.load_libraries( + # library_file_paths=[Path(failing_pipeline_file_path) for failing_pipeline_file_path in failing_pipelines_file_paths], + # ) log.verbose(f"This pipe '{pipe_code}' is supposed to cause an error of type: {exception.__name__}") with pytest.raises(exception) as exc: diff --git a/tests/integration/pipelex/test_data.py b/tests/integration/pipelex/test_data.py index 0c29d962c..3f36a8b8b 100644 --- a/tests/integration/pipelex/test_data.py +++ b/tests/integration/pipelex/test_data.py @@ -162,6 +162,7 @@ class PipeTestCases: class LibraryTestCases: + TEST_PIPELINES_DIR_PATH = "tests/test_pipelines" KNOWN_CONCEPTS_AND_PIPES: ClassVar[list[tuple[str, str]]] = [ # concept, pipe ("cars.CarDescription", "generate_car_description"), ("animals.AnimalDescription", "generate_animal_description"), diff --git a/tests/integration/pipelex/test_libraries.py b/tests/integration/pipelex/test_libraries.py index 928be73c2..42fca143f 100644 --- a/tests/integration/pipelex/test_libraries.py +++ b/tests/integration/pipelex/test_libraries.py @@ -6,7 +6,6 @@ from rich.table import Table from pipelex import pretty_print -from pipelex.config import get_config from pipelex.core.concepts.concept_library import ConceptLibrary from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.libraries.library_manager_factory import LibraryManagerFactory @@ -86,8 +85,8 @@ def test_load_combo_libraries( known_concept: str, known_pipe: str, ): - library_manager = LibraryManagerFactory.make_empty(config_dir_path="pipelex/libraries") - test_pipelines_dir = [Path(get_config().pipelex.library_config.test_pipelines_dir_path)] + library_manager = LibraryManagerFactory.make_empty() + test_pipelines_dir = [Path(LibraryTestCases.TEST_PIPELINES_DIR_PATH)] library_manager.load_libraries(library_dirs=test_pipelines_dir) # Verify that libraries were loaded assert len(library_manager.concept_library.root) > 0, "No concepts were loaded" diff --git a/tests/unit/pipelex/tools/typing/test_module_inspector.py b/tests/unit/pipelex/tools/typing/test_module_inspector.py index ab35c63ad..01e507641 100644 --- a/tests/unit/pipelex/tools/typing/test_module_inspector.py +++ b/tests/unit/pipelex/tools/typing/test_module_inspector.py @@ -4,7 +4,13 @@ import pytest -from pipelex.tools.typing.module_inspector import ModuleFileError, find_classes_in_module, import_module_from_file +from pipelex.tools.typing.module_inspector import ( + ModuleFileError, + find_class_names_in_file, + find_classes_in_module, + import_module_from_file, + import_module_from_file_if_has_classes, +) class TestModuleFileError: @@ -284,3 +290,171 @@ class ImportedSubClass(BaseClass): assert BaseClass in classes assert LocalSubClass in classes assert ImportedSubClass in classes + + +class TestFindClassNamesInFile: + def test_find_all_class_names(self, tmp_path: Path): + """Test finding all class names without filtering.""" + test_file_path = tmp_path / "test_classes.py" + test_file_path.write_text(""" +class ClassA: + pass + +class ClassB: + pass + +def some_function(): + pass +""") + class_names = find_class_names_in_file(str(test_file_path)) + assert len(class_names) == 2 + assert "ClassA" in class_names + assert "ClassB" in class_names + + def test_find_class_names_with_base_class_filter(self, tmp_path: Path): + """Test finding classes that inherit from specific base classes.""" + test_file_path = tmp_path / "test_inheritance.py" + test_file_path.write_text(""" +class BaseContent: + pass + +class StructuredContent: + pass + +class MyContent(StructuredContent): + pass + +class OtherContent(BaseContent): + pass + +class UnrelatedClass: + pass +""") + class_names = find_class_names_in_file( + str(test_file_path), + base_class_names=["StructuredContent"], + ) + assert len(class_names) == 1 + assert "MyContent" in class_names + assert "OtherContent" not in class_names + assert "UnrelatedClass" not in class_names + + def test_find_class_names_with_qualified_base_class(self, tmp_path: Path): + """Test finding classes with qualified base class names.""" + test_file_path = tmp_path / "test_qualified.py" + test_file_path.write_text(""" +from pipelex.core.stuffs.structured_content import StructuredContent + +class MyContent(StructuredContent): + pass + +class UnrelatedClass: + pass +""") + class_names = find_class_names_in_file( + str(test_file_path), + base_class_names=["StructuredContent"], + ) + assert len(class_names) == 1 + assert "MyContent" in class_names + + def test_find_class_names_empty_file(self, tmp_path: Path): + """Test with file containing no classes.""" + test_file_path = tmp_path / "test_empty.py" + test_file_path.write_text(""" +def some_function(): + pass + +variable = 42 +""") + class_names = find_class_names_in_file(str(test_file_path)) + assert len(class_names) == 0 + + def test_find_class_names_non_python_file_raises_error(self, tmp_path: Path): + """Test that non-Python file raises error.""" + test_file_path = tmp_path / "test.txt" + test_file_path.write_text("Not Python") + with pytest.raises(ModuleFileError) as excinfo: + find_class_names_in_file(str(test_file_path)) + assert "is not a Python file" in str(excinfo.value) + + def test_find_class_names_nonexistent_file_raises_error(self, tmp_path: Path): + """Test that nonexistent file raises error.""" + nonexistent_file_path = tmp_path / "nonexistent.py" + with pytest.raises(ModuleFileError) as excinfo: + find_class_names_in_file(str(nonexistent_file_path)) + assert "does not exist" in str(excinfo.value) + + +class TestImportModuleFromFileIfHasClasses: + @pytest.fixture(autouse=True) + def cleanup_sys_modules(self): + """Clean up sys.modules entries after each test.""" + yield + # Clean up sys.modules entries for test modules + modules_to_remove = [name for name in sys.modules if "test_module_" in name or name == "test_module"] + for module_name in modules_to_remove: + del sys.modules[module_name] + + def test_import_file_with_matching_classes(self, tmp_path: Path): + """Test that file with matching classes is imported.""" + test_file_path = tmp_path / "test_module_with_class.py" + test_file_path.write_text(""" +class StructuredContent: + pass + +class MyContent(StructuredContent): + value = "imported" +""") + module = import_module_from_file_if_has_classes( + str(test_file_path), + base_class_names=["StructuredContent"], + ) + assert module is not None + assert hasattr(module, "MyContent") + assert module.MyContent.value == "imported" + + def test_skip_file_without_matching_classes(self, tmp_path: Path): + """Test that file without matching classes is not imported.""" + test_file_path = tmp_path / "test_module_no_match.py" + # Add code that would execute and cause side effects + test_file_path.write_text(""" +print("This should not execute!") + +class UnrelatedClass: + pass + +def some_function(): + pass +""") + module = import_module_from_file_if_has_classes( + str(test_file_path), + base_class_names=["StructuredContent"], + ) + assert module is None + # Verify the module was NOT loaded into sys.modules + assert not any("test_module_no_match" in name for name in sys.modules) + + def test_import_all_files_with_classes_when_no_filter(self, tmp_path: Path): + """Test that any file with classes is imported when no filter is provided.""" + test_file_path = tmp_path / "test_module_any_class.py" + test_file_path.write_text(""" +class AnyClass: + value = "any_class" +""") + module = import_module_from_file_if_has_classes(str(test_file_path)) + assert module is not None + assert hasattr(module, "AnyClass") + assert module.AnyClass.value == "any_class" + + def test_skip_file_with_no_classes_when_no_filter(self, tmp_path: Path): + """Test that file with no classes is skipped even without filter.""" + test_file_path = tmp_path / "test_module_no_classes.py" + test_file_path.write_text(""" +def some_function(): + pass + +variable = 42 +""") + module = import_module_from_file_if_has_classes(str(test_file_path)) + assert module is None From 94a4b2cac98262519f0600486a7e5c083a3a281c Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 01:48:58 +0200 Subject: [PATCH 045/115] Avoid executing modules if they don't have decorated pipe_funcs --- pipelex/builder/builder.py | 2 + pipelex/builder/concept/concept_spec.py | 2 + pipelex/libraries/library_manager.py | 10 +- pipelex/pipelex.py | 48 ++++++--- pipelex/tools/func_registry.py | 67 ++++++++++++- pipelex/tools/func_registry_utils.py | 86 ++++++++++++++-- pipelex/tools/typing/module_inspector.py | 98 +++++++++++++++++++ .../test_pipelines/test_file_func_registry.py | 2 + 8 files changed, 290 insertions(+), 25 deletions(-) diff --git a/pipelex/builder/builder.py b/pipelex/builder/builder.py index 406f29299..fc5a4e4ad 100644 --- a/pipelex/builder/builder.py +++ b/pipelex/builder/builder.py @@ -42,6 +42,7 @@ ) from pipelex.hub import get_library_manager from pipelex.pipe_run.dry_run import dry_run_pipes +from pipelex.tools.func_registry import pipe_func from pipelex.tools.typing.pydantic_utils import format_pydantic_validation_error if TYPE_CHECKING: @@ -189,6 +190,7 @@ def _convert_pipe_spec(pipe_spec: PipeSpecUnion) -> PipeSpecUnion: return cast("PipeSpecUnion", pipe_class.model_validate(pipe_spec.model_dump(serialize_as_any=True))) +@pipe_func() async def assemble_pipelex_bundle_spec(working_memory: WorkingMemory) -> PipelexBundleSpec: """Construct a PipelexBundleSpec from working memory containing concept and pipe blueprints. diff --git a/pipelex/builder/concept/concept_spec.py b/pipelex/builder/concept/concept_spec.py index f418fa125..38abe9072 100644 --- a/pipelex/builder/concept/concept_spec.py +++ b/pipelex/builder/concept/concept_spec.py @@ -16,6 +16,7 @@ from pipelex.core.domains.domain_blueprint import DomainBlueprint from pipelex.core.memory.working_memory import WorkingMemory from pipelex.core.stuffs.structured_content import StructuredContent +from pipelex.tools.func_registry import pipe_func from pipelex.tools.misc.string_utils import is_pascal_case, normalize_to_ascii, snake_to_pascal_case from pipelex.types import Self, StrEnum @@ -250,6 +251,7 @@ def to_blueprint(self) -> ConceptBlueprint: return ConceptBlueprint(description=self.description, structure=converted_structure, refines=self.refines) +@pipe_func() async def create_concept_spec(working_memory: WorkingMemory) -> ConceptSpec: concept_spec_draft = working_memory.get_stuff_as(name="concept_spec_draft", content_type=ConceptSpecDraft) concept_spec_structures_stuff = working_memory.get_stuff_as_list(name="concept_spec_structures", item_type=ConceptStructureSpec) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index b028c8df7..8cde74323 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -34,6 +34,7 @@ from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.tools.class_registry_utils import ClassRegistryUtils from pipelex.tools.config.manager import config_manager +from pipelex.tools.func_registry import pipe_func from pipelex.tools.func_registry_utils import FuncRegistryUtils from pipelex.tools.misc.file_utils import find_files_in_dir from pipelex.types import StrEnum @@ -272,9 +273,14 @@ def load_libraries( # Only import files that contain StructuredContent subclasses (uses AST pre-check) ClassRegistryUtils.import_modules_in_folder( folder_path=str(library_dir), - base_class_names=["StructuredContent"], + base_class_names=[StructuredContent.__name__], + ) + # Only import files that contain @pipe_func decorated functions (uses AST pre-check) + FuncRegistryUtils.register_funcs_in_folder( + folder_path=str(library_dir), + decorator_names=[pipe_func.__name__], + require_decorator=True, ) - FuncRegistryUtils.register_funcs_in_folder(folder_path=str(library_dir)) # Auto-discover and register all StructuredContent classes from sys.modules num_registered = ClassRegistryUtils.auto_register_all_subclasses(base_class=StructuredContent) diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index 8ec30b83b..d3e728983 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -88,7 +88,9 @@ def __init__( try: self.pipelex_hub.setup_config(config_cls=config_cls or PipelexConfig) except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="config", validation_error=validation_error) + validation_error_msg = report_validation_error( + category="config", validation_error=validation_error + ) msg = f"Could not setup config because of: {validation_error_msg}" raise PipelexConfigError(msg) from validation_error @@ -115,7 +117,9 @@ def __init__( self.reporting_delegate: ReportingProtocol if get_config().pipelex.feature_config.is_reporting_enabled: - self.reporting_delegate = reporting_delegate or ReportingManager(reporting_config=get_config().pipelex.reporting_config) + self.reporting_delegate = reporting_delegate or ReportingManager( + reporting_config=get_config().pipelex.reporting_config + ) else: self.reporting_delegate = ReportingNoOp() self.pipelex_hub.set_report_delegate(self.reporting_delegate) @@ -140,7 +144,9 @@ def __init__( if pipeline_tracker: self.pipeline_tracker = pipeline_tracker elif get_config().pipelex.feature_config.is_pipeline_tracking_enabled: - self.pipeline_tracker = PipelineTracker(tracker_config=get_config().pipelex.tracker_config) + self.pipeline_tracker = PipelineTracker( + tracker_config=get_config().pipelex.tracker_config + ) else: self.pipeline_tracker = PipelineTrackerNoOp() self.pipelex_hub.set_pipeline_tracker(pipeline_tracker=self.pipeline_tracker) @@ -164,7 +170,9 @@ def _get_config_not_found_error_msg(component_name: str) -> str: return f"Config files are missing for the {component_name}. Run `pipelex init config` to generate the missing files." @staticmethod - def _get_validation_error_msg(component_name: str, validation_exc: Exception) -> str: + def _get_validation_error_msg( + component_name: str, validation_exc: Exception + ) -> str: """Generate error message for invalid config files.""" msg = "" cause_exc = validation_exc.__cause__ @@ -209,10 +217,14 @@ def setup( msg = self._get_config_not_found_error_msg("model deck") raise PipelexSetupError(msg) from deck_not_found_exc except RoutingProfileValidationError as routing_validation_exc: - msg = self._get_validation_error_msg("routing profile library", routing_validation_exc) + msg = self._get_validation_error_msg( + "routing profile library", routing_validation_exc + ) raise PipelexSetupError(msg) from routing_validation_exc except InferenceBackendLibraryValidationError as backend_validation_exc: - msg = self._get_validation_error_msg("inference backend library", backend_validation_exc) + msg = self._get_validation_error_msg( + "inference backend library", backend_validation_exc + ) raise PipelexSetupError(msg) from backend_validation_exc except ModelDeckValidationError as deck_validation_exc: msg = self._get_validation_error_msg("model deck", deck_validation_exc) @@ -251,27 +263,37 @@ def setup( observer_provider = observer_provider or LocalObserver() self.pipelex_hub.set_observer_provider(observer_provider=observer_provider) - self.pipelex_hub.set_pipe_router(pipe_router or PipeRouter(observer_provider=observer_provider)) + self.pipelex_hub.set_pipe_router( + pipe_router or PipeRouter(observer_provider=observer_provider) + ) # pipeline self.pipeline_tracker.setup() self.pipeline_manager.setup() - log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup done for {get_config().project_name}") + log.debug( + f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup done for {get_config().project_name}" + ) def setup_libraries(self): self.library_manager.setup() self.library_manager.load_libraries() - log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}") + log.debug( + f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}" + ) def validate_libraries(self): try: self.library_manager.validate_libraries() except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) + validation_error_msg = report_validation_error( + category="plx", validation_error=validation_error + ) msg = f"Could not validate libraries because of: {validation_error_msg}" raise PipelexSetupError(msg) from validation_error - log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}") + log.debug( + f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}" + ) def teardown(self): # pipelex @@ -290,7 +312,9 @@ def teardown(self): self.class_registry.teardown() func_registry.teardown() - log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} teardown done for {get_config().project_name} (except config & logs)") + log.debug( + f"{PACKAGE_NAME} version {PACKAGE_VERSION} teardown done for {get_config().project_name} (except config & logs)" + ) self.pipelex_hub.reset_config() # Clear the singleton instance from metaclass if self.__class__ in MetaSingleton.instances: diff --git a/pipelex/tools/func_registry.py b/pipelex/tools/func_registry.py index 7b7532417..5f97b9904 100644 --- a/pipelex/tools/func_registry.py +++ b/pipelex/tools/func_registry.py @@ -13,11 +13,51 @@ T = TypeVar("T") FuncRegistryDict = dict[str, Callable[..., Any]] +# Attribute name used by the decorator to mark functions for registration +PIPE_FUNC_MARKER = "_is_pipe_func" + class FuncRegistryError(ToolException): pass +def pipe_func(name: str | None = None) -> Callable[[T], T]: + """Decorator to mark a function for automatic registration in the func_registry. + + This decorator marks functions to be discovered and registered for use in PipeFunc operators. + Functions marked with this decorator must follow the PipeFunc signature: + - Accept exactly one parameter named "working_memory" of type WorkingMemory + - Return a StuffContent or subclass + + Args: + name: Optional custom name for registration. If not provided, uses function's __name__ + + Returns: + The decorated function unchanged, but marked for registration + + Example: + @pipe_func() + async def my_custom_function(working_memory: WorkingMemory) -> TextContent: + result = working_memory.get_stuff("input") + return TextContent(text=f"Processed: {result}") + + @pipe_func(name="custom_name") + async def another_function(working_memory: WorkingMemory) -> MyContent: + return MyContent(data="example") + + """ + + def decorator(func: T) -> T: + # Mark the function with the attribute + setattr(func, PIPE_FUNC_MARKER, True) + # Store custom name if provided + if name is not None: + func._pipe_func_name = name # type: ignore[attr-defined] # noqa: SLF001 + return func + + return decorator + + class FuncRegistry(RootModel[FuncRegistryDict]): root: FuncRegistryDict = Field(default_factory=dict) _logger: logging.Logger = PrivateAttr(logging.getLogger(FUNC_REGISTRY_LOGGER_CHANNEL_NAME)) @@ -113,15 +153,40 @@ def has_function(self, name: str) -> bool: """Checks if a function is in the registry by its name.""" return name in self.root - def is_eligible_function(self, func: Any) -> bool: + def is_marked_pipe_func(self, func: Any) -> bool: + """Checks if a function is marked with the @pipe_func decorator. + + Args: + func: The function to check + + Returns: + True if the function has the pipe_func marker attribute + + """ + return hasattr(func, PIPE_FUNC_MARKER) and getattr(func, PIPE_FUNC_MARKER) is True + + def is_eligible_function(self, func: Any, require_decorator: bool = False) -> bool: """Checks if a function matches the criteria for PipeFunc registration: - Must be callable - Exactly 1 parameter named "working_memory" with type WorkingMemory - Return type that is a subclass of StuffContent + - Optionally must be marked with @pipe_func decorator if require_decorator=True + + Args: + func: The function to check + require_decorator: If True, only functions marked with @pipe_func are eligible + + Returns: + True if the function meets all eligibility criteria + """ if not callable(func): return False + # If decorator is required, check for it first (fast check) + if require_decorator and not self.is_marked_pipe_func(func): + return False + the_function = cast("Callable[..., Any]", func) try: diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index ca9fd7ada..fa1a0bf11 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -6,7 +6,11 @@ from pipelex import log from pipelex.tools.func_registry import func_registry from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir -from pipelex.tools.typing.module_inspector import ModuleFileError, import_module_from_file +from pipelex.tools.typing.module_inspector import ( + ModuleFileError, + import_module_from_file, + import_module_from_file_if_has_decorated_functions, +) class FuncRegistryUtils: @@ -15,18 +19,30 @@ def register_funcs_in_folder( cls, folder_path: str, is_recursive: bool = True, + decorator_names: list[str] | None = None, + require_decorator: bool = False, ) -> None: """Discovers and attempts to register all functions in Python files within a folder. Only functions that meet the eligibility criteria will be registered: - Must be an async function - Exactly 1 parameter named "working_memory" with type WorkingMemory - Return type that is a subclass of StuffContent + - Optionally must be marked with a decorator (if decorator_names provided) + + If decorator_names is provided, uses AST parsing to first check if files + contain decorated functions before importing them. This avoids executing + module-level code in files that don't contain the functions you're looking for. The function name is used as the registry key. Args: folder_path: Path to folder containing Python files is_recursive: Whether to search recursively in subdirectories + decorator_names: Optional list of decorator names (e.g. ["pipe_func"]). + If provided, only imports files that contain functions with these decorators. + If None, imports all Python files. + require_decorator: If True, only functions with decorators in decorator_names are registered. + Only used if decorator_names is provided. """ python_files = cls._find_files_in_dir( @@ -36,21 +52,54 @@ def register_funcs_in_folder( ) for python_file in python_files: - cls._register_funcs_in_file(file_path=str(python_file)) + cls._register_funcs_in_file( + file_path=str(python_file), + decorator_names=decorator_names, + require_decorator=require_decorator, + ) @classmethod - def _register_funcs_in_file(cls, file_path: str) -> None: - """Processes a Python file to find and register eligible functions.""" + def _register_funcs_in_file( + cls, + file_path: str, + decorator_names: list[str] | None = None, + require_decorator: bool = False, + ) -> None: + """Processes a Python file to find and register eligible functions. + + Args: + file_path: Path to the Python file + decorator_names: Optional list of decorator names to filter by + require_decorator: If True, only functions with the specified decorators are registered + + """ try: - module = import_module_from_file(file_path) + # Import the module (potentially with AST pre-check if decorator_names provided) + if decorator_names is not None: + module = import_module_from_file_if_has_decorated_functions( + file_path, + decorator_names=decorator_names, + ) + # If no decorated functions found, module will be None + if module is None: + return + else: + module = import_module_from_file(file_path) # Find functions that match criteria - functions_to_register = cls._find_functions_in_module(module) + functions_to_register = cls._find_functions_in_module( + module, + require_decorator=require_decorator, + ) for func in functions_to_register: + # Check for custom name from decorator + custom_name = getattr(func, "_pipe_func_name", None) + func_name = custom_name if custom_name is not None else func.__name__ + func_registry.register_function( func=func, - name=func.__name__, + name=func_name, should_warn_if_already_registered=True, ) except ModuleFileError: @@ -66,8 +115,21 @@ def _register_funcs_in_file(cls, file_path: str) -> None: log.warning(f"Syntax error in {file_path}: {exc}") @classmethod - def _find_functions_in_module(cls, module: Any) -> list[Callable[..., Any]]: - """Finds all functions in a module (eligibility will be checked during registration).""" + def _find_functions_in_module( + cls, + module: Any, + require_decorator: bool = False, + ) -> list[Callable[..., Any]]: + """Finds all functions in a module (eligibility will be checked during registration). + + Args: + module: The module to search for functions + require_decorator: If True, only functions marked with @pipe_func are included + + Returns: + List of functions found in the module + + """ functions: list[Callable[..., Any]] = [] module_name = module.__name__ @@ -77,7 +139,11 @@ def _find_functions_in_module(cls, module: Any) -> list[Callable[..., Any]]: if obj.__module__ != module_name: continue - # Add all functions - eligibility will be checked by func_registry.register_function + # If decorator is required, check for it + if require_decorator and not func_registry.is_marked_pipe_func(obj): + continue + + # Add function - full eligibility will be checked by func_registry.register_function functions.append(obj) return functions diff --git a/pipelex/tools/typing/module_inspector.py b/pipelex/tools/typing/module_inspector.py index cae600ac4..13d63e959 100644 --- a/pipelex/tools/typing/module_inspector.py +++ b/pipelex/tools/typing/module_inspector.py @@ -164,6 +164,104 @@ def find_class_names_in_file(file_path: str, base_class_names: list[str] | None return class_names +def find_decorated_function_names_in_file( + file_path: str, + decorator_names: list[str], +) -> list[str]: + """Find function names decorated with specific decorators without executing the file. + + This uses AST parsing to find functions with specific decorators. + + Args: + file_path: Path to the Python file to analyze + decorator_names: List of decorator names to look for (e.g. ["pipe_func", "register_func"]) + + Returns: + List of function names that have the specified decorators + + Raises: + ModuleFileError: If the file cannot be read or parsed + + """ + # Validate that the file is a Python file + if not file_path.endswith(".py"): + msg = f"File {file_path} is not a Python file (must end with .py)" + raise ModuleFileError(msg) + + # Validate that the path exists and is a file + path = Path(file_path) + if not path.exists() or not path.is_file(): + msg = f"Path {file_path} does not exist or is not a file" + raise ModuleFileError(msg) + + try: + # Read and parse the file + with open(file_path, encoding="utf-8") as f: + source = f.read() + tree = ast.parse(source, filename=file_path) + except Exception as e: + msg = f"Failed to parse {file_path}: {e}" + raise ModuleFileError(msg) from e + + function_names: list[str] = [] + + # Walk through the AST to find function definitions with decorators + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + # Check if function has any of the specified decorators + for decorator in node.decorator_list: + decorator_name = None + + # Handle simple decorator names like @pipe_func + if isinstance(decorator, ast.Name): + decorator_name = decorator.id + # Handle decorator calls like @pipe_func() or @pipe_func(name="foo") + elif isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Name): + decorator_name = decorator.func.id + # Handle qualified names like @registry.pipe_func() + elif isinstance(decorator, ast.Attribute): + decorator_name = decorator.attr + + if decorator_name in decorator_names: + function_names.append(node.name) + break # Found a matching decorator, no need to check others + + return function_names + + +def import_module_from_file_if_has_decorated_functions( + file_path: str, + decorator_names: list[str], +) -> Any | None: + """Import a module only if it contains functions with specific decorators. + + This function uses AST parsing to check if the file contains functions decorated + with specific decorators before importing, avoiding execution of modules that don't + have the functions you're looking for. + + Args: + file_path: Path to the Python file to potentially import + decorator_names: List of decorator names to look for (e.g. ["pipe_func"]) + + Returns: + The imported module if it contains decorated functions, None otherwise + + Raises: + ModuleFileError: If the file is not a Python file or cannot be loaded + + """ + # First, use AST to check if file has decorated functions + function_names = find_decorated_function_names_in_file(file_path, decorator_names) + + # If no decorated functions found, skip import + if not function_names: + return None + + # File has decorated functions, import it + return import_module_from_file(file_path) + + def import_module_from_file_if_has_classes( file_path: str, base_class_names: list[str] | None = None, diff --git a/tests/test_pipelines/test_file_func_registry.py b/tests/test_pipelines/test_file_func_registry.py index 714678249..72f801770 100644 --- a/tests/test_pipelines/test_file_func_registry.py +++ b/tests/test_pipelines/test_file_func_registry.py @@ -3,6 +3,7 @@ from pipelex.core.memory.working_memory import WorkingMemory from pipelex.core.stuffs.list_content import ListContent from pipelex.core.stuffs.structured_content import StructuredContent +from pipelex.tools.func_registry import pipe_func class FilePath(StructuredContent): @@ -18,6 +19,7 @@ class CodebaseFileContent(StructuredContent): file_content: str = Field(description="Content of the codebase file") +@pipe_func() def read_file_content(working_memory: WorkingMemory) -> ListContent[CodebaseFileContent]: """Read the content of related codebase files. From 372b2eccbe02e1cc5681952f6203f385df5eac2e Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 02:23:28 +0200 Subject: [PATCH 046/115] Polish --- pipelex/cogt/exceptions.py | 8 +- pipelex/cogt/models/model_deck.py | 61 +++++++++++---- .../plugins/anthropic/anthropic_llm_worker.py | 75 ++++++++++++++----- pyproject.toml | 2 +- uv.lock | 2 +- 5 files changed, 114 insertions(+), 34 deletions(-) diff --git a/pipelex/cogt/exceptions.py b/pipelex/cogt/exceptions.py index 3897bc43e..689cc1421 100644 --- a/pipelex/cogt/exceptions.py +++ b/pipelex/cogt/exceptions.py @@ -58,6 +58,10 @@ class ModelDeckValidatonError(CogtError): pass +class ModelNotFoundError(CogtError): + pass + + class LLMHandleNotFoundError(CogtError): pass @@ -129,7 +133,9 @@ class ImgGenGeneratedTypeError(ImgGenGenerationError): class MissingDependencyError(CogtError): """Raised when a required dependency is not installed.""" - def __init__(self, dependency_name: str, extra_name: str, message: str | None = None): + def __init__( + self, dependency_name: str, extra_name: str, message: str | None = None + ): self.dependency_name = dependency_name self.extra_name = extra_name error_msg = f"Required dependency '{dependency_name}' is not installed." diff --git a/pipelex/cogt/models/model_deck.py b/pipelex/cogt/models/model_deck.py index 9b7108f35..03656bf53 100644 --- a/pipelex/cogt/models/model_deck.py +++ b/pipelex/cogt/models/model_deck.py @@ -10,10 +10,16 @@ LLMHandleNotFoundError, LLMSettingsValidationError, ModelDeckValidatonError, + ModelNotFoundError, ) from pipelex.cogt.extract.extract_setting import ExtractModelChoice, ExtractSetting from pipelex.cogt.img_gen.img_gen_setting import ImgGenModelChoice, ImgGenSetting -from pipelex.cogt.llm.llm_setting import LLMModelChoice, LLMSetting, LLMSettingChoices, LLMSettingChoicesDefaults +from pipelex.cogt.llm.llm_setting import ( + LLMModelChoice, + LLMSetting, + LLMSettingChoices, + LLMSettingChoicesDefaults, +) from pipelex.cogt.model_backends.model_constraints import ModelConstraints from pipelex.cogt.model_backends.model_spec import InferenceModelSpec from pipelex.tools.config.config_model import ConfigModel @@ -69,7 +75,11 @@ class ModelDeck(ConfigModel): img_gen_presets: dict[str, ImgGenSetting] = Field(default_factory=dict) img_gen_choice_default: ImgGenModelChoice - def check_llm_setting(self, llm_setting_or_preset_id: LLMModelChoice, is_disabled_allowed: bool = False): + def check_llm_setting( + self, + llm_setting_or_preset_id: LLMModelChoice, + is_disabled_allowed: bool = False, + ): if isinstance(llm_setting_or_preset_id, LLMSetting): return preset_id: str = llm_setting_or_preset_id @@ -116,9 +126,13 @@ def get_img_gen_setting(self, img_gen_choice: ImgGenModelChoice) -> ImgGenSettin @classmethod def final_validate(cls, deck: Self): for llm_preset_id, llm_setting in deck.llm_presets.items(): - inference_model = deck.get_required_inference_model(model_handle=llm_setting.model) + inference_model = deck.get_required_inference_model( + model_handle=llm_setting.model + ) try: - cls._validate_llm_setting(llm_setting=llm_setting, inference_model=inference_model) + cls._validate_llm_setting( + llm_setting=llm_setting, inference_model=inference_model + ) except ConfigValidationError as exc: msg = f"LLM preset '{llm_preset_id}' is invalid: {exc}" raise ModelDeckValidatonError(msg) from exc @@ -128,15 +142,22 @@ def final_validate(cls, deck: Self): ############################################################ @classmethod - def _validate_llm_setting(cls, llm_setting: LLMSetting, inference_model: InferenceModelSpec): - if inference_model.max_tokens is not None and (llm_setting_max_tokens := llm_setting.max_tokens): + def _validate_llm_setting( + cls, llm_setting: LLMSetting, inference_model: InferenceModelSpec + ): + if inference_model.max_tokens is not None and ( + llm_setting_max_tokens := llm_setting.max_tokens + ): if llm_setting_max_tokens > inference_model.max_tokens: msg = ( f"LLM setting '{llm_setting.model}' has a max_tokens of {llm_setting_max_tokens}, " f"which is greater than the model's max_tokens of {inference_model.max_tokens}" ) raise LLMSettingsValidationError(msg) - if ModelConstraints.TEMPERATURE_MUST_BE_1 in inference_model.constraints and llm_setting.temperature != 1: + if ( + ModelConstraints.TEMPERATURE_MUST_BE_1 in inference_model.constraints + and llm_setting.temperature != 1 + ): msg = ( f"LLM setting '{llm_setting.model}' has a temperature of {llm_setting.temperature}, " f"which is not allowed by the model's constraints: it must be 1" @@ -145,7 +166,9 @@ def _validate_llm_setting(cls, llm_setting: LLMSetting, inference_model: Inferen @field_validator("llm_choice_defaults", mode="after") @classmethod - def validate_llm_choice_defaults(cls, llm_choice_defaults: LLMSettingChoices) -> LLMSettingChoices: + def validate_llm_choice_defaults( + cls, llm_choice_defaults: LLMSettingChoices + ) -> LLMSettingChoices: if llm_choice_defaults.for_text is None: msg = "llm_choice_defaults.for_text cannot be None" raise ConfigValidationError(msg) @@ -156,7 +179,9 @@ def validate_llm_choice_defaults(cls, llm_choice_defaults: LLMSettingChoices) -> @field_validator("llm_choice_overrides", mode="after") @classmethod - def validate_llm_choice_overrides(cls, value: LLMSettingChoices) -> LLMSettingChoices: + def validate_llm_choice_overrides( + cls, value: LLMSettingChoices + ) -> LLMSettingChoices: if value.for_text == LLM_PRESET_DISABLED: value.for_text = None if value.for_object == LLM_PRESET_DISABLED: @@ -179,7 +204,9 @@ def _validate_llm_choices(self, llm_choices: LLMSettingChoices): for llm_setting in llm_choices.list_choices(): self.check_llm_setting(llm_setting_or_preset_id=llm_setting) - def get_optional_inference_model(self, model_handle: str) -> InferenceModelSpec | None: + def get_optional_inference_model( + self, model_handle: str + ) -> InferenceModelSpec | None: if inference_model := self.inference_models.get(model_handle): return inference_model if redirection := self.aliases.get(model_handle): @@ -189,9 +216,13 @@ def get_optional_inference_model(self, model_handle: str) -> InferenceModelSpec else: alias_list = redirection for alias in alias_list: - if inference_model := self.get_optional_inference_model(model_handle=alias): + if inference_model := self.get_optional_inference_model( + model_handle=alias + ): return inference_model - log.warning(f"Skipping model handle '{model_handle}' because it's not found in deck") + log.warning( + f"Skipping model handle '{model_handle}' because it's not found in deck" + ) return None def is_handle_defined(self, model_handle: str) -> bool: @@ -201,7 +232,9 @@ def get_required_inference_model(self, model_handle: str) -> InferenceModelSpec: inference_model = self.get_optional_inference_model(model_handle=model_handle) if inference_model is None: msg = f"Model handle '{model_handle}' not found in deck" - raise LLMHandleNotFoundError(msg) + raise ModelNotFoundError(msg) if model_handle not in self.inference_models: - log.dev(f"Model handle '{model_handle}' is an alias which resolves to '{inference_model.name}'") + log.dev( + f"Model handle '{model_handle}' is an alias which resolves to '{inference_model.name}'" + ) return inference_model diff --git a/pipelex/plugins/anthropic/anthropic_llm_worker.py b/pipelex/plugins/anthropic/anthropic_llm_worker.py index 88beb29b0..a685922ea 100644 --- a/pipelex/plugins/anthropic/anthropic_llm_worker.py +++ b/pipelex/plugins/anthropic/anthropic_llm_worker.py @@ -7,13 +7,22 @@ from pipelex import log from pipelex.cogt.exceptions import LLMCompletionError, SdkTypeError from pipelex.cogt.llm.llm_job import LLMJob -from pipelex.cogt.llm.llm_utils import dump_error, dump_kwargs, dump_response_from_structured_gen +from pipelex.cogt.llm.llm_utils import ( + dump_error, + dump_kwargs, + dump_response_from_structured_gen, +) from pipelex.cogt.llm.llm_worker_internal_abstract import LLMWorkerInternalAbstract from pipelex.cogt.llm.structured_output import StructureMethod from pipelex.cogt.model_backends.model_spec import InferenceModelSpec from pipelex.config import get_config -from pipelex.plugins.anthropic.anthropic_exceptions import AnthropicWorkerConfigurationError -from pipelex.plugins.anthropic.anthropic_factory import AnthropicFactory, AnthropicSdkVariant +from pipelex.plugins.anthropic.anthropic_exceptions import ( + AnthropicWorkerConfigurationError, +) +from pipelex.plugins.anthropic.anthropic_factory import ( + AnthropicFactory, + AnthropicSdkVariant, +) from pipelex.reporting.reporting_protocol import ReportingProtocol from pipelex.tools.typing.pydantic_utils import BaseModelTypeVar from pipelex.types import StrEnum @@ -60,8 +69,12 @@ def __init__( # Verify if the sdk_instance is compatible with the current LLM platform if isinstance(sdk_instance, (AsyncAnthropic, AsyncAnthropicBedrock)): - if (inference_model.sdk == AnthropicSdkVariant.ANTHROPIC and not (isinstance(sdk_instance, AsyncAnthropic))) or ( - inference_model.sdk == AnthropicSdkVariant.BEDROCK_ANTHROPIC and not (isinstance(sdk_instance, AsyncAnthropicBedrock)) + if ( + inference_model.sdk == AnthropicSdkVariant.ANTHROPIC + and not (isinstance(sdk_instance, AsyncAnthropic)) + ) or ( + inference_model.sdk == AnthropicSdkVariant.BEDROCK_ANTHROPIC + and not (isinstance(sdk_instance, AsyncAnthropicBedrock)) ): msg = f"Provided sdk_instance does not match LLMEngine platform:{sdk_instance}" raise SdkTypeError(msg) @@ -72,18 +85,29 @@ def __init__( self.anthropic_async_client = sdk_instance if structure_method: instructor_mode = structure_method.as_instructor_mode() - log.debug(f"Anthropic structure mode: {structure_method} --> {instructor_mode}") - self.instructor_for_objects = instructor.from_anthropic(client=sdk_instance, mode=instructor_mode) + log.debug( + f"Anthropic structure mode: {structure_method} --> {instructor_mode}" + ) + self.instructor_for_objects = instructor.from_anthropic( + client=sdk_instance, mode=instructor_mode + ) else: self.instructor_for_objects = instructor.from_anthropic(client=sdk_instance) instructor_config = get_config().cogt.llm_config.instructor_config if instructor_config.is_dump_kwargs_enabled: - self.instructor_for_objects.on(hook_name="completion:kwargs", handler=dump_kwargs) + self.instructor_for_objects.on( + hook_name="completion:kwargs", handler=dump_kwargs + ) if instructor_config.is_dump_response_enabled: - self.instructor_for_objects.on(hook_name="completion:response", handler=dump_response_from_structured_gen) + self.instructor_for_objects.on( + hook_name="completion:response", + handler=dump_response_from_structured_gen, + ) if instructor_config.is_dump_error_enabled: - self.instructor_for_objects.on(hook_name="completion:error", handler=dump_error) + self.instructor_for_objects.on( + hook_name="completion:error", handler=dump_error + ) ######################################################### # Instance methods @@ -93,9 +117,15 @@ def __init__( def _adapt_max_tokens(self, max_tokens: int | None) -> int: max_tokens = max_tokens or self.default_max_tokens - if (claude_4_tokens_limit := self.extra_config.get(AnthropicExtraField.CLAUDE_4_TOKENS_LIMIT)) and max_tokens > claude_4_tokens_limit: + if ( + claude_4_tokens_limit := self.extra_config.get( + AnthropicExtraField.CLAUDE_4_TOKENS_LIMIT + ) + ) and max_tokens > claude_4_tokens_limit: max_tokens = claude_4_tokens_limit - log.warning(f"Max tokens is greater than the claude 4 reduced tokens limit, reducing to {max_tokens}") + log.warning( + f"Max tokens is greater than the claude 4 reduced tokens limit, reducing to {max_tokens}" + ) if not max_tokens: msg = f"Max tokens is None for model {self.inference_model.desc}" raise AnthropicWorkerConfigurationError(msg) @@ -128,8 +158,12 @@ async def _gen_text( raise LLMCompletionError(msg) full_reply_content = single_content_block.text - if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and (usage := response.usage): - llm_tokens_usage.nb_tokens_by_category = AnthropicFactory.make_nb_tokens_by_category(usage=usage) + if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and ( + usage := response.usage + ): + llm_tokens_usage.nb_tokens_by_category = ( + AnthropicFactory.make_nb_tokens_by_category(usage=usage) + ) return full_reply_content @@ -141,7 +175,10 @@ async def _gen_object( ) -> BaseModelTypeVar: messages = await AnthropicFactory.make_simple_messages(llm_job=llm_job) max_tokens = self._adapt_max_tokens(max_tokens=llm_job.job_params.max_tokens) - result_object, completion = await self.instructor_for_objects.chat.completions.create_with_completion( + ( + result_object, + completion, + ) = await self.instructor_for_objects.chat.completions.create_with_completion( messages=messages, response_model=schema, max_retries=llm_job.job_config.max_retries, @@ -149,7 +186,11 @@ async def _gen_object( temperature=llm_job.job_params.temperature, max_tokens=max_tokens, ) - if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and (usage := completion.usage): - llm_tokens_usage.nb_tokens_by_category = AnthropicFactory.make_nb_tokens_by_category(usage=usage) + if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and ( + usage := completion.usage + ): + llm_tokens_usage.nb_tokens_by_category = ( + AnthropicFactory.make_nb_tokens_by_category(usage=usage) + ) return result_object diff --git a/pyproject.toml b/pyproject.toml index f80a4a58c..a1eb30e91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ Documentation = "https://docs.pipelex.com/" Changelog = "https://docs.pipelex.com/changelog/" [project.optional-dependencies] -anthropic = ["anthropic>=0.49.0"] +anthropic = ["anthropic>=0.60.0"] bedrock = ["boto3>=1.34.131", "aioboto3>=13.4.0"] fal = ["fal-client>=0.4.1"] google = ["google-auth-oauthlib>=1.2.1"] diff --git a/uv.lock b/uv.lock index 8836ae26f..9cbc04160 100644 --- a/uv.lock +++ b/uv.lock @@ -2237,7 +2237,7 @@ mistralai = [ requires-dist = [ { name = "aioboto3", marker = "extra == 'bedrock'", specifier = ">=13.4.0" }, { name = "aiofiles", specifier = ">=23.2.1" }, - { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.49.0" }, + { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.60.0" }, { name = "backports-strenum", marker = "python_full_version < '3.11'", specifier = ">=1.3.0" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34.131" }, { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, From 12dbc44a80c00332fac3f4fcc87102eec7d8b2a4 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 11:46:45 +0200 Subject: [PATCH 047/115] Better handle image prompts --- pipelex/cogt/exceptions.py | 4 +- pipelex/cogt/extract/extract_output.py | 4 +- pipelex/cogt/image/prompt_image.py | 9 +++- pipelex/cogt/image/prompt_image_factory.py | 31 ++++++------ pipelex/cogt/models/model_deck.py | 45 ++++------------- pipelex/core/stuffs/image_content.py | 4 +- pipelex/kit/configs/pipelex.toml | 26 ++++++---- .../llm/llm_prompt_blueprint.py | 18 ++----- pipelex/pipelex.py | 48 +++++------------- .../plugins/anthropic/anthropic_llm_worker.py | 50 +++++-------------- .../plugins/openai/openai_img_gen_worker.py | 4 +- pipelex/tools/misc/base_64_utils.py | 25 ++++++---- .../pipelex/cogt/test_llm_vision.py | 2 +- .../pipelex/plugins/test_openai_image_gen.py | 4 +- .../pipelex/tools/misc/test_base_64_utils.py | 6 +-- 15 files changed, 106 insertions(+), 174 deletions(-) diff --git a/pipelex/cogt/exceptions.py b/pipelex/cogt/exceptions.py index 689cc1421..5549e594b 100644 --- a/pipelex/cogt/exceptions.py +++ b/pipelex/cogt/exceptions.py @@ -133,9 +133,7 @@ class ImgGenGeneratedTypeError(ImgGenGenerationError): class MissingDependencyError(CogtError): """Raised when a required dependency is not installed.""" - def __init__( - self, dependency_name: str, extra_name: str, message: str | None = None - ): + def __init__(self, dependency_name: str, extra_name: str, message: str | None = None): self.dependency_name = dependency_name self.extra_name = extra_name error_msg = f"Required dependency '{dependency_name}' is not installed." diff --git a/pipelex/cogt/extract/extract_output.py b/pipelex/cogt/extract/extract_output.py index de6ce3fd9..11daeafa6 100644 --- a/pipelex/cogt/extract/extract_output.py +++ b/pipelex/cogt/extract/extract_output.py @@ -1,7 +1,7 @@ from pydantic import Field from pipelex import log -from pipelex.tools.misc.base_64_utils import save_base64_to_binary_file +from pipelex.tools.misc.base_64_utils import save_base_64_str_to_binary_file from pipelex.tools.misc.file_utils import ensure_directory_exists, save_text_to_path from pipelex.tools.typing.pydantic_utils import CustomBaseModel, empty_list_factory_of @@ -17,7 +17,7 @@ def save_to_directory(self, directory: str): if base_64 := self.base_64: filename = self.image_id file_path = f"{directory}/{filename}" - save_base64_to_binary_file(b64=base_64, file_path=file_path) + save_base_64_str_to_binary_file(base_64_str=base_64, file_path=file_path) class ExtractedImageFromPage(ExtractedImage): diff --git a/pipelex/cogt/image/prompt_image.py b/pipelex/cogt/image/prompt_image.py index 433712d3b..e7d56cef9 100644 --- a/pipelex/cogt/image/prompt_image.py +++ b/pipelex/cogt/image/prompt_image.py @@ -6,7 +6,13 @@ from typing_extensions import override from pipelex.tools.misc.attribute_utils import AttributePolisher -from pipelex.tools.misc.filetype_utils import FileType, detect_file_type_from_base64, detect_file_type_from_bytes, detect_file_type_from_path +from pipelex.tools.misc.file_utils import save_bytes_to_binary_file +from pipelex.tools.misc.filetype_utils import ( + FileType, + detect_file_type_from_base64, + detect_file_type_from_bytes, + detect_file_type_from_path, +) from pipelex.tools.typing.pydantic_utils import CustomBaseModel @@ -53,6 +59,7 @@ class PromptImageBase64(PromptImage): base_64: bytes def get_file_type(self) -> FileType: + save_bytes_to_binary_file("test.png", base64.b64decode(self.base_64)) return detect_file_type_from_base64(self.base_64) def get_mime_type(self) -> str: diff --git a/pipelex/cogt/image/prompt_image_factory.py b/pipelex/cogt/image/prompt_image_factory.py index b0b608042..a6f0ebb4e 100644 --- a/pipelex/cogt/image/prompt_image_factory.py +++ b/pipelex/cogt/image/prompt_image_factory.py @@ -1,12 +1,18 @@ from pipelex.cogt.exceptions import PromptImageFactoryError -from pipelex.cogt.image.prompt_image import PromptImage, PromptImageBase64, PromptImageBinary, PromptImagePath, PromptImageUrl +from pipelex.cogt.image.prompt_image import ( + PromptImage, + PromptImageBase64, + PromptImageBinary, + PromptImagePath, + PromptImageUrl, +) from pipelex.tools.misc.base_64_utils import ( encode_to_base64_async, load_binary_as_base64_async, load_binary_async, + strip_base64_str_if_needed, ) from pipelex.tools.misc.file_fetch_utils import fetch_file_from_url_httpx_async -from pipelex.tools.misc.path_utils import clarify_path_or_url class PromptImageFactory: @@ -16,28 +22,21 @@ def make_prompt_image( file_path: str | None = None, url: str | None = None, base_64: bytes | None = None, + base_64_str: str | None = None, ) -> PromptImage: - if file_path: + if base_64: + return PromptImageBase64(base_64=base_64) + elif base_64_str: + stripped_base_64_str = strip_base64_str_if_needed(base_64_str) + return PromptImageBase64(base_64=stripped_base_64_str.encode()) + elif file_path: return PromptImagePath(file_path=file_path) elif url: return PromptImageUrl(url=url) - elif base_64: - return PromptImageBase64(base_64=base_64) else: msg = "PromptImageFactory requires one of file_path, url, or image_bytes" raise PromptImageFactoryError(msg) - @classmethod - def make_prompt_image_from_uri( - cls, - uri: str, - ) -> PromptImage: - file_path, url = clarify_path_or_url(path_or_uri=uri) - return PromptImageFactory.make_prompt_image( - file_path=file_path, - url=url, - ) - @classmethod async def make_promptimagebase64_from_url_async( cls, diff --git a/pipelex/cogt/models/model_deck.py b/pipelex/cogt/models/model_deck.py index 03656bf53..51d952b84 100644 --- a/pipelex/cogt/models/model_deck.py +++ b/pipelex/cogt/models/model_deck.py @@ -126,13 +126,9 @@ def get_img_gen_setting(self, img_gen_choice: ImgGenModelChoice) -> ImgGenSettin @classmethod def final_validate(cls, deck: Self): for llm_preset_id, llm_setting in deck.llm_presets.items(): - inference_model = deck.get_required_inference_model( - model_handle=llm_setting.model - ) + inference_model = deck.get_required_inference_model(model_handle=llm_setting.model) try: - cls._validate_llm_setting( - llm_setting=llm_setting, inference_model=inference_model - ) + cls._validate_llm_setting(llm_setting=llm_setting, inference_model=inference_model) except ConfigValidationError as exc: msg = f"LLM preset '{llm_preset_id}' is invalid: {exc}" raise ModelDeckValidatonError(msg) from exc @@ -142,22 +138,15 @@ def final_validate(cls, deck: Self): ############################################################ @classmethod - def _validate_llm_setting( - cls, llm_setting: LLMSetting, inference_model: InferenceModelSpec - ): - if inference_model.max_tokens is not None and ( - llm_setting_max_tokens := llm_setting.max_tokens - ): + def _validate_llm_setting(cls, llm_setting: LLMSetting, inference_model: InferenceModelSpec): + if inference_model.max_tokens is not None and (llm_setting_max_tokens := llm_setting.max_tokens): if llm_setting_max_tokens > inference_model.max_tokens: msg = ( f"LLM setting '{llm_setting.model}' has a max_tokens of {llm_setting_max_tokens}, " f"which is greater than the model's max_tokens of {inference_model.max_tokens}" ) raise LLMSettingsValidationError(msg) - if ( - ModelConstraints.TEMPERATURE_MUST_BE_1 in inference_model.constraints - and llm_setting.temperature != 1 - ): + if ModelConstraints.TEMPERATURE_MUST_BE_1 in inference_model.constraints and llm_setting.temperature != 1: msg = ( f"LLM setting '{llm_setting.model}' has a temperature of {llm_setting.temperature}, " f"which is not allowed by the model's constraints: it must be 1" @@ -166,9 +155,7 @@ def _validate_llm_setting( @field_validator("llm_choice_defaults", mode="after") @classmethod - def validate_llm_choice_defaults( - cls, llm_choice_defaults: LLMSettingChoices - ) -> LLMSettingChoices: + def validate_llm_choice_defaults(cls, llm_choice_defaults: LLMSettingChoices) -> LLMSettingChoices: if llm_choice_defaults.for_text is None: msg = "llm_choice_defaults.for_text cannot be None" raise ConfigValidationError(msg) @@ -179,9 +166,7 @@ def validate_llm_choice_defaults( @field_validator("llm_choice_overrides", mode="after") @classmethod - def validate_llm_choice_overrides( - cls, value: LLMSettingChoices - ) -> LLMSettingChoices: + def validate_llm_choice_overrides(cls, value: LLMSettingChoices) -> LLMSettingChoices: if value.for_text == LLM_PRESET_DISABLED: value.for_text = None if value.for_object == LLM_PRESET_DISABLED: @@ -204,9 +189,7 @@ def _validate_llm_choices(self, llm_choices: LLMSettingChoices): for llm_setting in llm_choices.list_choices(): self.check_llm_setting(llm_setting_or_preset_id=llm_setting) - def get_optional_inference_model( - self, model_handle: str - ) -> InferenceModelSpec | None: + def get_optional_inference_model(self, model_handle: str) -> InferenceModelSpec | None: if inference_model := self.inference_models.get(model_handle): return inference_model if redirection := self.aliases.get(model_handle): @@ -216,13 +199,9 @@ def get_optional_inference_model( else: alias_list = redirection for alias in alias_list: - if inference_model := self.get_optional_inference_model( - model_handle=alias - ): + if inference_model := self.get_optional_inference_model(model_handle=alias): return inference_model - log.warning( - f"Skipping model handle '{model_handle}' because it's not found in deck" - ) + log.warning(f"Skipping model handle '{model_handle}' because it's not found in deck") return None def is_handle_defined(self, model_handle: str) -> bool: @@ -234,7 +213,5 @@ def get_required_inference_model(self, model_handle: str) -> InferenceModelSpec: msg = f"Model handle '{model_handle}' not found in deck" raise ModelNotFoundError(msg) if model_handle not in self.inference_models: - log.dev( - f"Model handle '{model_handle}' is an alias which resolves to '{inference_model.name}'" - ) + log.dev(f"Model handle '{model_handle}' is an alias which resolves to '{inference_model.name}'") return inference_model diff --git a/pipelex/core/stuffs/image_content.py b/pipelex/core/stuffs/image_content.py index 288f187ba..a4c930635 100644 --- a/pipelex/core/stuffs/image_content.py +++ b/pipelex/core/stuffs/image_content.py @@ -8,7 +8,7 @@ from pipelex.cogt.extract.extract_output import ExtractedImage from pipelex.core.stuffs.stuff_content import StuffContent -from pipelex.tools.misc.base_64_utils import save_base64_to_binary_file +from pipelex.tools.misc.base_64_utils import save_base_64_str_to_binary_file from pipelex.tools.misc.file_utils import ensure_directory_exists, get_incremental_file_path, save_text_to_path from pipelex.tools.misc.filetype_utils import detect_file_type_from_base64 from pipelex.tools.misc.path_utils import InterpretedPathOrUrl, interpret_path_or_url @@ -83,7 +83,7 @@ def save_to_directory(self, directory: str, base_name: str | None = None, extens extension=extension, avoid_suffix_if_possible=True, ) - save_base64_to_binary_file(b64=base_64, file_path=file_path) + save_base_64_str_to_binary_file(base_64_str=base_64, file_path=file_path) if caption := self.caption: caption_file_path = get_incremental_file_path( diff --git a/pipelex/kit/configs/pipelex.toml b/pipelex/kit/configs/pipelex.toml index 0f6eaf0a1..70170c31b 100644 --- a/pipelex/kit/configs/pipelex.toml +++ b/pipelex/kit/configs/pipelex.toml @@ -1,17 +1,21 @@ -[pipelex] -[pipelex.observer_config] -observer_dir = "results/observer" -[pipelex.aws_config] -api_key_method = "env" -# The possible values are "env" and "secret_provider". -# "env" means means that the env var are stored in your .env file. -# "secret_provider" means that the env var are stored in your Secret Manager (See the doc for injecting a secret provider). +[pipelex.log_config] +default_log_level = "INFO" + +[pipelex.log_config.package_log_levels] +pipelex = "INFO" -[cogt] +[cogt.llm_config] +is_dump_text_prompts_enabled = true +# is_dump_response_text_enabled = true -[cogt.extract_config] -page_output_text_file_name = "page_text.md" +[cogt.llm_config.instructor_config] +is_dump_kwargs_enabled = true +# is_dump_response_enabled = true +# is_dump_error_enabled = true + +[pipelex.observer_config] +observer_dir = "results/observer" [pipelex.feature_config] # WIP/Experimental feature flags diff --git a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py index 1416f606a..a1b281cbe 100644 --- a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py +++ b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py @@ -76,12 +76,8 @@ async def make_llm_prompt( # Try to get as a single ImageContent first try: prompt_image_content = context_provider.get_typed_object_or_attribute(name=user_image_name, wanted_type=ImageContent) - if prompt_image_content is not None: # An ImageContent can be optional - if base_64 := prompt_image_content.base_64: - user_image = PromptImageFactory.make_prompt_image(base_64=base_64) - else: - image_uri = prompt_image_content.url - user_image = PromptImageFactory.make_prompt_image_from_uri(uri=image_uri) + if isinstance(prompt_image_content, ImageContent): + user_image = PromptImageFactory.make_prompt_image(url=prompt_image_content.url, base_64_str=prompt_image_content.base_64) prompt_user_images[user_image_name] = user_image except ContextProviderException: # If single image failed, try to get as a collection (list or tuple) @@ -91,12 +87,7 @@ async def make_llm_prompt( if isinstance(image_collection, (list, tuple)): for image_item in image_collection: # type: ignore[assignment] if isinstance(image_item, ImageContent): - item_base_64 = image_item.base_64 - if item_base_64: - user_image = PromptImageFactory.make_prompt_image(base_64=item_base_64) # type: ignore[arg-type] - else: - image_uri = image_item.url - user_image = PromptImageFactory.make_prompt_image_from_uri(uri=image_uri) + user_image = PromptImageFactory.make_prompt_image(url=image_item.url, base_64_str=image_item.base_64) prompt_user_images[user_image_name] = user_image else: msg = ( @@ -114,7 +105,8 @@ async def make_llm_prompt( if prompt_user_images: if not extra_params: extra_params = {} - for image_index, image_name in enumerate(prompt_user_images.keys()): + image_names = list(prompt_user_images.keys()) + for image_index, image_name in enumerate(image_names): # Replacing image variable '{image_name}' with numbered tag '[Image {image_index + 1}]' extra_params[image_name] = f"[Image {image_index + 1}]" user_text: str | None = None diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index d3e728983..8ec30b83b 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -88,9 +88,7 @@ def __init__( try: self.pipelex_hub.setup_config(config_cls=config_cls or PipelexConfig) except ValidationError as validation_error: - validation_error_msg = report_validation_error( - category="config", validation_error=validation_error - ) + validation_error_msg = report_validation_error(category="config", validation_error=validation_error) msg = f"Could not setup config because of: {validation_error_msg}" raise PipelexConfigError(msg) from validation_error @@ -117,9 +115,7 @@ def __init__( self.reporting_delegate: ReportingProtocol if get_config().pipelex.feature_config.is_reporting_enabled: - self.reporting_delegate = reporting_delegate or ReportingManager( - reporting_config=get_config().pipelex.reporting_config - ) + self.reporting_delegate = reporting_delegate or ReportingManager(reporting_config=get_config().pipelex.reporting_config) else: self.reporting_delegate = ReportingNoOp() self.pipelex_hub.set_report_delegate(self.reporting_delegate) @@ -144,9 +140,7 @@ def __init__( if pipeline_tracker: self.pipeline_tracker = pipeline_tracker elif get_config().pipelex.feature_config.is_pipeline_tracking_enabled: - self.pipeline_tracker = PipelineTracker( - tracker_config=get_config().pipelex.tracker_config - ) + self.pipeline_tracker = PipelineTracker(tracker_config=get_config().pipelex.tracker_config) else: self.pipeline_tracker = PipelineTrackerNoOp() self.pipelex_hub.set_pipeline_tracker(pipeline_tracker=self.pipeline_tracker) @@ -170,9 +164,7 @@ def _get_config_not_found_error_msg(component_name: str) -> str: return f"Config files are missing for the {component_name}. Run `pipelex init config` to generate the missing files." @staticmethod - def _get_validation_error_msg( - component_name: str, validation_exc: Exception - ) -> str: + def _get_validation_error_msg(component_name: str, validation_exc: Exception) -> str: """Generate error message for invalid config files.""" msg = "" cause_exc = validation_exc.__cause__ @@ -217,14 +209,10 @@ def setup( msg = self._get_config_not_found_error_msg("model deck") raise PipelexSetupError(msg) from deck_not_found_exc except RoutingProfileValidationError as routing_validation_exc: - msg = self._get_validation_error_msg( - "routing profile library", routing_validation_exc - ) + msg = self._get_validation_error_msg("routing profile library", routing_validation_exc) raise PipelexSetupError(msg) from routing_validation_exc except InferenceBackendLibraryValidationError as backend_validation_exc: - msg = self._get_validation_error_msg( - "inference backend library", backend_validation_exc - ) + msg = self._get_validation_error_msg("inference backend library", backend_validation_exc) raise PipelexSetupError(msg) from backend_validation_exc except ModelDeckValidationError as deck_validation_exc: msg = self._get_validation_error_msg("model deck", deck_validation_exc) @@ -263,37 +251,27 @@ def setup( observer_provider = observer_provider or LocalObserver() self.pipelex_hub.set_observer_provider(observer_provider=observer_provider) - self.pipelex_hub.set_pipe_router( - pipe_router or PipeRouter(observer_provider=observer_provider) - ) + self.pipelex_hub.set_pipe_router(pipe_router or PipeRouter(observer_provider=observer_provider)) # pipeline self.pipeline_tracker.setup() self.pipeline_manager.setup() - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup done for {get_config().project_name}" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup done for {get_config().project_name}") def setup_libraries(self): self.library_manager.setup() self.library_manager.load_libraries() - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}") def validate_libraries(self): try: self.library_manager.validate_libraries() except ValidationError as validation_error: - validation_error_msg = report_validation_error( - category="plx", validation_error=validation_error - ) + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) msg = f"Could not validate libraries because of: {validation_error_msg}" raise PipelexSetupError(msg) from validation_error - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}") def teardown(self): # pipelex @@ -312,9 +290,7 @@ def teardown(self): self.class_registry.teardown() func_registry.teardown() - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} teardown done for {get_config().project_name} (except config & logs)" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} teardown done for {get_config().project_name} (except config & logs)") self.pipelex_hub.reset_config() # Clear the singleton instance from metaclass if self.__class__ in MetaSingleton.instances: diff --git a/pipelex/plugins/anthropic/anthropic_llm_worker.py b/pipelex/plugins/anthropic/anthropic_llm_worker.py index a685922ea..760bda121 100644 --- a/pipelex/plugins/anthropic/anthropic_llm_worker.py +++ b/pipelex/plugins/anthropic/anthropic_llm_worker.py @@ -69,12 +69,8 @@ def __init__( # Verify if the sdk_instance is compatible with the current LLM platform if isinstance(sdk_instance, (AsyncAnthropic, AsyncAnthropicBedrock)): - if ( - inference_model.sdk == AnthropicSdkVariant.ANTHROPIC - and not (isinstance(sdk_instance, AsyncAnthropic)) - ) or ( - inference_model.sdk == AnthropicSdkVariant.BEDROCK_ANTHROPIC - and not (isinstance(sdk_instance, AsyncAnthropicBedrock)) + if (inference_model.sdk == AnthropicSdkVariant.ANTHROPIC and not (isinstance(sdk_instance, AsyncAnthropic))) or ( + inference_model.sdk == AnthropicSdkVariant.BEDROCK_ANTHROPIC and not (isinstance(sdk_instance, AsyncAnthropicBedrock)) ): msg = f"Provided sdk_instance does not match LLMEngine platform:{sdk_instance}" raise SdkTypeError(msg) @@ -85,29 +81,21 @@ def __init__( self.anthropic_async_client = sdk_instance if structure_method: instructor_mode = structure_method.as_instructor_mode() - log.debug( - f"Anthropic structure mode: {structure_method} --> {instructor_mode}" - ) - self.instructor_for_objects = instructor.from_anthropic( - client=sdk_instance, mode=instructor_mode - ) + log.debug(f"Anthropic structure mode: {structure_method} --> {instructor_mode}") + self.instructor_for_objects = instructor.from_anthropic(client=sdk_instance, mode=instructor_mode) else: self.instructor_for_objects = instructor.from_anthropic(client=sdk_instance) instructor_config = get_config().cogt.llm_config.instructor_config if instructor_config.is_dump_kwargs_enabled: - self.instructor_for_objects.on( - hook_name="completion:kwargs", handler=dump_kwargs - ) + self.instructor_for_objects.on(hook_name="completion:kwargs", handler=dump_kwargs) if instructor_config.is_dump_response_enabled: self.instructor_for_objects.on( hook_name="completion:response", handler=dump_response_from_structured_gen, ) if instructor_config.is_dump_error_enabled: - self.instructor_for_objects.on( - hook_name="completion:error", handler=dump_error - ) + self.instructor_for_objects.on(hook_name="completion:error", handler=dump_error) ######################################################### # Instance methods @@ -117,15 +105,9 @@ def __init__( def _adapt_max_tokens(self, max_tokens: int | None) -> int: max_tokens = max_tokens or self.default_max_tokens - if ( - claude_4_tokens_limit := self.extra_config.get( - AnthropicExtraField.CLAUDE_4_TOKENS_LIMIT - ) - ) and max_tokens > claude_4_tokens_limit: + if (claude_4_tokens_limit := self.extra_config.get(AnthropicExtraField.CLAUDE_4_TOKENS_LIMIT)) and max_tokens > claude_4_tokens_limit: max_tokens = claude_4_tokens_limit - log.warning( - f"Max tokens is greater than the claude 4 reduced tokens limit, reducing to {max_tokens}" - ) + log.warning(f"Max tokens is greater than the claude 4 reduced tokens limit, reducing to {max_tokens}") if not max_tokens: msg = f"Max tokens is None for model {self.inference_model.desc}" raise AnthropicWorkerConfigurationError(msg) @@ -158,12 +140,8 @@ async def _gen_text( raise LLMCompletionError(msg) full_reply_content = single_content_block.text - if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and ( - usage := response.usage - ): - llm_tokens_usage.nb_tokens_by_category = ( - AnthropicFactory.make_nb_tokens_by_category(usage=usage) - ) + if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and (usage := response.usage): + llm_tokens_usage.nb_tokens_by_category = AnthropicFactory.make_nb_tokens_by_category(usage=usage) return full_reply_content @@ -186,11 +164,7 @@ async def _gen_object( temperature=llm_job.job_params.temperature, max_tokens=max_tokens, ) - if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and ( - usage := completion.usage - ): - llm_tokens_usage.nb_tokens_by_category = ( - AnthropicFactory.make_nb_tokens_by_category(usage=usage) - ) + if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and (usage := completion.usage): + llm_tokens_usage.nb_tokens_by_category = AnthropicFactory.make_nb_tokens_by_category(usage=usage) return result_object diff --git a/pipelex/plugins/openai/openai_img_gen_worker.py b/pipelex/plugins/openai/openai_img_gen_worker.py index c87d949e6..66c7f627d 100644 --- a/pipelex/plugins/openai/openai_img_gen_worker.py +++ b/pipelex/plugins/openai/openai_img_gen_worker.py @@ -13,7 +13,7 @@ from pipelex.cogt.model_backends.model_spec import InferenceModelSpec from pipelex.plugins.openai.openai_img_gen_factory import OpenAIImgGenFactory from pipelex.reporting.reporting_protocol import ReportingProtocol -from pipelex.tools.misc.base_64_utils import save_base64_to_binary_file +from pipelex.tools.misc.base_64_utils import save_base_64_str_to_binary_file from pipelex.tools.misc.file_utils import ensure_path TEMP_OUTPUTS_DIR = "temp/img_gen_by_gpt_image" @@ -80,7 +80,7 @@ async def _gen_image_list( folder_path = TEMP_OUTPUTS_DIR ensure_path(folder_path) img_path = f"{folder_path}/{image_id}_{image_index}.png" - save_base64_to_binary_file(b64=image_base64, file_path=img_path) + save_base_64_str_to_binary_file(base_64_str=image_base64, file_path=img_path) log.debug(f"Saved image to {img_path}") generated_image_list.append( GeneratedImage( diff --git a/pipelex/tools/misc/base_64_utils.py b/pipelex/tools/misc/base_64_utils.py index 14cda8867..05e5d39f6 100644 --- a/pipelex/tools/misc/base_64_utils.py +++ b/pipelex/tools/misc/base_64_utils.py @@ -31,19 +31,24 @@ async def encode_to_base64_async(data_bytes: bytes) -> bytes: return await asyncio.to_thread(base64.b64encode, data_bytes) -def save_base64_to_binary_file( - b64: str, - file_path: str, -): - # Ensure we're getting clean base64 data without any prefixes - base64_str = b64 - # Remove potential data URL prefix if present +def strip_base64_str_if_needed(base64_str: str) -> str: if "," in base64_str: - base64_str = base64_str.split(",", 1)[1] + return base64_str.split(",", 1)[1] if "data:" in base64_str and ";base64," in base64_str: - base64_str = base64_str.split(";base64,", 1)[1] + return base64_str.split(";base64,", 1)[1] + return base64_str + + +# def prefixed_base64_str_from_base64_bytes(b64_bytes: bytes) -> str: + + +def save_base_64_str_to_binary_file( + base_64_str: str, + file_path: str, +): + stripped_base_64_str = strip_base64_str_if_needed(base_64_str) # Decode base64 - byte_data = base64.b64decode(base64_str) + byte_data = base64.b64decode(stripped_base_64_str) save_bytes_to_binary_file(file_path=file_path, byte_data=byte_data) diff --git a/tests/integration/pipelex/cogt/test_llm_vision.py b/tests/integration/pipelex/cogt/test_llm_vision.py index d4c8ea51c..80923acff 100644 --- a/tests/integration/pipelex/cogt/test_llm_vision.py +++ b/tests/integration/pipelex/cogt/test_llm_vision.py @@ -18,7 +18,7 @@ class TestLLMVision: @pytest.mark.parametrize(("topic", "image_uri"), LLMVisionTestCases.IMAGE_URLS) async def test_gen_text_from_vision_by_url(self, llm_handle_for_vision: str, topic: str, image_uri: str): - prompt_image = PromptImageFactory.make_prompt_image_from_uri(uri=image_uri) + prompt_image = PromptImageFactory.make_prompt_image(url=image_uri) llm_worker = get_llm_worker(llm_handle=llm_handle_for_vision) log.info(f"Using llm_worker: {llm_worker.desc}") llm_job = LLMJobFactory.make_llm_job( diff --git a/tests/integration/pipelex/plugins/test_openai_image_gen.py b/tests/integration/pipelex/plugins/test_openai_image_gen.py index 86a41fb1d..7043b3c86 100644 --- a/tests/integration/pipelex/plugins/test_openai_image_gen.py +++ b/tests/integration/pipelex/plugins/test_openai_image_gen.py @@ -4,7 +4,7 @@ from pipelex.hub import get_models_manager from pipelex.plugins.openai.openai_factory import OpenAIFactory from pipelex.plugins.plugin_sdk_registry import Plugin -from pipelex.tools.misc.base_64_utils import save_base64_to_binary_file +from pipelex.tools.misc.base_64_utils import save_base_64_str_to_binary_file from pipelex.tools.misc.file_utils import ensure_path, get_incremental_file_path from tests.conftest import TEST_OUTPUTS_DIR from tests.integration.pipelex.test_data import ImageGenTestCases @@ -51,4 +51,4 @@ async def test_gpt_image_generation(self, topic: str, image_desc: str): extension="png", avoid_suffix_if_possible=True, ) - save_base64_to_binary_file(b64=image_base64, file_path=img_path) + save_base_64_str_to_binary_file(base_64_str=image_base64, file_path=img_path) diff --git a/tests/unit/pipelex/tools/misc/test_base_64_utils.py b/tests/unit/pipelex/tools/misc/test_base_64_utils.py index b63b9092e..779b3b5d0 100644 --- a/tests/unit/pipelex/tools/misc/test_base_64_utils.py +++ b/tests/unit/pipelex/tools/misc/test_base_64_utils.py @@ -8,7 +8,7 @@ encode_to_base64_async, load_binary_as_base64, load_binary_as_base64_async, - save_base64_to_binary_file, + save_base_64_str_to_binary_file, ) from tests.cases import FileHelperTestCases @@ -53,7 +53,7 @@ def test_save_base64_to_binary_file_plain(self, tmp_path: Path) -> None: b64 = base64.b64encode(data).decode() out_file = tmp_path / "out.bin" - save_base64_to_binary_file(b64=b64, file_path=str(out_file)) + save_base_64_str_to_binary_file(base_64_str=b64, file_path=str(out_file)) with open(out_file, "rb") as f: assert f.read() == data @@ -64,7 +64,7 @@ def test_save_base64_to_binary_file_data_url(self, tmp_path: Path) -> None: data_url = f"data:application/octet-stream;base64,{b64}" out_file = tmp_path / "out.bin" - save_base64_to_binary_file(b64=data_url, file_path=str(out_file)) + save_base_64_str_to_binary_file(base_64_str=data_url, file_path=str(out_file)) with open(out_file, "rb") as f: assert f.read() == data From 173c3fc60602849ef5cb4897312edd6c8014f1c9 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 14:20:51 +0200 Subject: [PATCH 048/115] WIP fixing prompt image --- pipelex/core/memory/working_memory.py | 4 +- pipelex/core/stuffs/image_content.py | 26 ----- .../llm/llm_prompt_blueprint.py | 3 +- pipelex/tools/exceptions.py | 4 + pipelex/tools/misc/dict_utils.py | 64 +++++++++++ .../pipelex/pipes/test_image_inputs.py | 2 +- .../misc_tests/test_image_inputs.plx | 4 +- .../pipelex/tools/misc/test_dict_utils.py | 108 +++++++++++++++++- 8 files changed, 182 insertions(+), 33 deletions(-) diff --git a/pipelex/core/memory/working_memory.py b/pipelex/core/memory/working_memory.py index 241063ccd..ceec2b920 100644 --- a/pipelex/core/memory/working_memory.py +++ b/pipelex/core/memory/working_memory.py @@ -197,8 +197,8 @@ def generate_jinja2_context(self) -> dict[str, Any]: # TODO: Add unit tests for this method artefact_dict: StuffArtefactDict = {} for name, stuff in self.root.items(): - a = stuff.make_artefact() - artefact_dict[name] = a + artefact = stuff.make_artefact() + artefact_dict[name] = artefact for alias, target in self.aliases.items(): artefact_dict[alias] = artefact_dict[target] return artefact_dict diff --git a/pipelex/core/stuffs/image_content.py b/pipelex/core/stuffs/image_content.py index a4c930635..3ec663a62 100644 --- a/pipelex/core/stuffs/image_content.py +++ b/pipelex/core/stuffs/image_content.py @@ -101,29 +101,3 @@ def save_to_directory(self, directory: str, base_name: str | None = None, extens avoid_suffix_if_possible=True, ) save_text_to_path(text=source_prompt, path=source_prompt_file_path) - - -class PDFContent(StuffContent): - url: str - - @property - @override - def short_desc(self) -> str: - url_desc = interpret_path_or_url(path_or_uri=self.url).desc - return f"{url_desc} of a PDF document" - - @override - def rendered_plain(self) -> str: - return self.url - - @override - def rendered_html(self) -> str: - doc = Doc() - doc.stag("a", href=self.url, klass="msg-pdf") - doc.text(self.url) - - return doc.getvalue() - - @override - def rendered_markdown(self, level: int = 1, is_pretty: bool = False) -> str: - return f"[{self.url}]({self.url})" diff --git a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py index a1b281cbe..6e88367f9 100644 --- a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py +++ b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py @@ -13,6 +13,7 @@ from pipelex.hub import get_content_generator from pipelex.tools.jinja2.jinja2_required_variables import detect_jinja2_required_variables from pipelex.tools.misc.context_provider_abstract import ContextProviderAbstract, ContextProviderException +from pipelex.tools.misc.dict_utils import substitute_nested_in_context if TYPE_CHECKING: from pipelex.cogt.image.prompt_image import PromptImage @@ -157,7 +158,7 @@ async def _unravel_text( context: dict[str, Any] = context_provider.generate_jinja2_context() if extra_params: - context.update(**extra_params) + context = substitute_nested_in_context(context=context, extra_params=extra_params) if jinja2_blueprint.extra_context: context.update(**jinja2_blueprint.extra_context) diff --git a/pipelex/tools/exceptions.py b/pipelex/tools/exceptions.py index 20eef6a10..bd3547502 100644 --- a/pipelex/tools/exceptions.py +++ b/pipelex/tools/exceptions.py @@ -14,6 +14,10 @@ class ToolException(RootException): pass +class NestedKeyConflictError(ToolException): + """Raised when attempting to create nested keys under a non-dict value.""" + + class CredentialsError(RootException): pass diff --git a/pipelex/tools/misc/dict_utils.py b/pipelex/tools/misc/dict_utils.py index fe8c88a83..98ca2dcee 100644 --- a/pipelex/tools/misc/dict_utils.py +++ b/pipelex/tools/misc/dict_utils.py @@ -3,6 +3,9 @@ from collections.abc import Callable from typing import Any, TypeVar, cast +from pipelex import log +from pipelex.tools.exceptions import NestedKeyConflictError + K = TypeVar("K") V = TypeVar("V") @@ -93,3 +96,64 @@ def apply_to_strings_in_list(data: list[Any], transform_func: Callable[[str], st else: result.append(item) return result + + +def substitute_nested_in_context(context: dict[str, Any], extra_params: dict[str, Any] | None = None) -> dict[str, Any]: + """Substitute nested values in context dict using dotted key notation. + + This function processes keys from extra_params that contain dots (e.g., "foo.bar.blip") + and creates nested dictionary structures in the context dict. Keys without dots are + added directly to the context. + + Args: + context: The context dictionary to mutate + extra_params: Dictionary with potentially dotted keys to process + + Returns: + The mutated context dictionary + + Raises: + NestedKeyConflictError: When attempting to create nested keys under a non-dict value + + Example: + >>> context = {} + >>> extra_params = {"foo.bar.blip": "hello"} + >>> substitute_nested_in_context(context, extra_params) + >>> context + {'foo': {'bar': {'blip': 'hello'}}} + + """ + if not extra_params: + return context + + original_context = context.copy() + + for key, value in extra_params.items(): + if "." not in key: + # Simple key without dots - add directly to context + context[key] = value + else: + # Dotted key - create nested structure + segments = key.split(".") + current = context + + # Navigate/create nested dicts for all segments except the last + for segment in segments[:-1]: + if segment not in current: + # Create new nested dict + current[segment] = {} + elif not (hasattr(current[segment], "__getitem__") and hasattr(current[segment], "__setitem__")): + # Conflict: trying to nest under a non-dict-like value + # Must support both __getitem__ and __setitem__ to be dict-like (e.g., dict, StuffArtefact) + error_message = f"Cannot set nested key '{key}': '{segment}' is not a dict-like object" + log.error(original_context, title="original_context") + log.error(extra_params, title="extra_params") + raise NestedKeyConflictError(error_message) + # Navigate into the nested dict + current = current[segment] + + # Set the final value + last_segment = segments[-1] + current[last_segment] = value + + return context diff --git a/tests/integration/pipelex/pipes/test_image_inputs.py b/tests/integration/pipelex/pipes/test_image_inputs.py index 569e3eeb9..4a76dc361 100644 --- a/tests/integration/pipelex/pipes/test_image_inputs.py +++ b/tests/integration/pipelex/pipes/test_image_inputs.py @@ -62,7 +62,7 @@ async def test_describe_page(self, request: FixtureRequest, pipe_run_mode: PipeR """ # Create the page content image_content = ImageContent(url=ImageTestCases.IMAGE_FILE_PATH_PNG) - text_and_images = TextAndImagesContent(text=TextContent(text="This is the description of the page blablabla"), images=[]) + text_and_images = TextAndImagesContent(text=TextContent(text="It was designed by Slartibartfast, a famous designer"), images=[]) page_content = PageContent(text_and_images=text_and_images, page_view=image_content) # Create stuff from page content diff --git a/tests/test_pipelines/misc_tests/test_image_inputs.plx b/tests/test_pipelines/misc_tests/test_image_inputs.plx index 78f454bf2..07a881919 100644 --- a/tests/test_pipelines/misc_tests/test_image_inputs.plx +++ b/tests/test_pipelines/misc_tests/test_image_inputs.plx @@ -27,11 +27,11 @@ system_prompt = """ You are an expert at describing page contents. """ prompt = """ -Extract the date and title of the article. +Extract the date and title. @page.page_view -Also, add this as the description of the article: +Also, add this to the description: $page.text_and_images.text.text """ diff --git a/tests/unit/pipelex/tools/misc/test_dict_utils.py b/tests/unit/pipelex/tools/misc/test_dict_utils.py index 4cc21158c..ea1271ab7 100644 --- a/tests/unit/pipelex/tools/misc/test_dict_utils.py +++ b/tests/unit/pipelex/tools/misc/test_dict_utils.py @@ -1,7 +1,10 @@ from typing import Any +import pytest + from pipelex.core.concepts.concept_native import NativeConceptCode -from pipelex.tools.misc.dict_utils import apply_to_strings_in_list, apply_to_strings_recursive, insert_before +from pipelex.tools.exceptions import NestedKeyConflictError +from pipelex.tools.misc.dict_utils import apply_to_strings_in_list, apply_to_strings_recursive, insert_before, substitute_nested_in_context class TestDictUtils: @@ -243,3 +246,106 @@ def transform(s: str) -> str: assert result[2][0] == "list in list list" assert result[2][1][0]["very_deep"] == "very deep value very_deep" assert result[2][1][1] == "deep list item deep_list" + + def test_substitute_nested_in_context_basic(self) -> None: + """Test basic nested substitution with dotted keys.""" + context: dict[str, Any] = {} + extra_params = {"foo.bar": "hello"} + + result = substitute_nested_in_context(context, extra_params) + + assert result is context # Should mutate original + assert context["foo"]["bar"] == "hello" + + def test_substitute_nested_in_context_deep_nesting(self) -> None: + """Test deep nesting with multiple levels.""" + context: dict[str, Any] = {} + extra_params = {"a.b.c.d": "deep_value"} + + substitute_nested_in_context(context, extra_params) + + assert context["a"]["b"]["c"]["d"] == "deep_value" + + def test_substitute_nested_in_context_multiple_keys(self) -> None: + """Test multiple nested keys, some sharing prefixes.""" + context: dict[str, Any] = {} + extra_params = { + "foo.bar.nested": "value3", + "foo.baz": "value2", + "other.key": "value4", + } + + substitute_nested_in_context(context, extra_params) + + assert context["foo"]["bar"]["nested"] == "value3" + assert context["foo"]["baz"] == "value2" + assert context["other"]["key"] == "value4" + + def test_substitute_nested_in_context_mixed_keys(self) -> None: + """Test mixed keys with and without dots.""" + context: dict[str, Any] = {} + extra_params = { + "simple": "simple_value", + "nested.key": "nested_value", + "another": 42, + } + + substitute_nested_in_context(context, extra_params) + + assert context["simple"] == "simple_value" + assert context["nested"]["key"] == "nested_value" + assert context["another"] == 42 + + def test_substitute_nested_in_context_existing_nested(self) -> None: + """Test extending existing nested structures.""" + context: dict[str, Any] = {"foo": {"existing": "old_value"}} + extra_params = {"foo.bar": "new_value"} + + substitute_nested_in_context(context, extra_params) + + assert context["foo"]["existing"] == "old_value" + assert context["foo"]["bar"] == "new_value" + + def test_substitute_nested_in_context_conflict(self) -> None: + """Test error when trying to nest under a non-dict-like value.""" + context: dict[str, Any] = {"foo": "string_value"} + extra_params = {"foo.bar": "new_value"} + + with pytest.raises(NestedKeyConflictError) as exc_info: + substitute_nested_in_context(context, extra_params) + + assert "foo.bar" in exc_info.value.message + assert "foo" in exc_info.value.message + assert "dict-like" in exc_info.value.message + + def test_substitute_nested_in_context_none_extra_params(self) -> None: + """Test function works when extra_params is None.""" + context: dict[str, Any] = {"existing": "value"} + + result = substitute_nested_in_context(context, None) + + assert result is context + assert context == {"existing": "value"} + + def test_substitute_nested_in_context_empty_extra_params(self) -> None: + """Test function works with empty extra_params.""" + context: dict[str, Any] = {"existing": "value"} + extra_params: dict[str, Any] = {} + + result = substitute_nested_in_context(context, extra_params) + + assert result is context + assert context == {"existing": "value"} + + def test_substitute_nested_in_context_mutation(self) -> None: + """Test that the original context dict is mutated.""" + context: dict[str, Any] = {"existing": "value"} + original_id = id(context) + extra_params = {"foo.bar": "hello"} + + result = substitute_nested_in_context(context, extra_params) + + assert id(result) == original_id + assert result is context + assert context["foo"]["bar"] == "hello" + assert context["existing"] == "value" From c634bbce36fec505c801634a5865b34567ef2148 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 16:40:53 +0200 Subject: [PATCH 049/115] Fix image url / base 64 issues --- .gitignore | 3 - .../content_generator_dry.py | 4 +- pipelex/cogt/exceptions.py | 8 ++ pipelex/cogt/image/prompt_image.py | 8 ++ pipelex/cogt/models/model_deck.py | 2 +- pipelex/core/interpreter.py | 14 -- pipelex/core/stuffs/image_content.py | 101 ++++++++------- pipelex/core/stuffs/page_content.py | 11 +- .../core/stuffs/text_and_images_content.py | 15 +-- pipelex/language/plx_factory.py | 12 -- pipelex/tools/misc/base_64_utils.py | 5 +- pyproject.toml | 1 - .../pipelex/pipes/test_image_inputs.py | 3 +- .../pipelex/cogt/models/test_model_deck.py | 16 ++- uv.lock | 121 ------------------ 15 files changed, 102 insertions(+), 222 deletions(-) diff --git a/.gitignore b/.gitignore index 7b442388d..00a8bc1ba 100644 --- a/.gitignore +++ b/.gitignore @@ -43,9 +43,6 @@ Thumbs.db reports/ results/ -# Pipelex libraries, duplicated from pipelex/libraries/ by `pipelex init-libraries` -/pipelex_libraries - # personnal pipelex config file that overrides the default one pipelex_super.toml diff --git a/pipelex/cogt/content_generation/content_generator_dry.py b/pipelex/cogt/content_generation/content_generator_dry.py index 9aa8359c4..f1ed386b7 100644 --- a/pipelex/cogt/content_generation/content_generator_dry.py +++ b/pipelex/cogt/content_generation/content_generator_dry.py @@ -21,6 +21,8 @@ from pipelex.tools.jinja2.jinja2_parsing import check_jinja2_parsing from pipelex.tools.typing.pydantic_utils import BaseModelTypeVar +DRY_BASE_64_IMAGE = "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC" + class ContentGeneratorDry(ContentGeneratorProtocol): """This class is used to generate mock content for testing purposes. @@ -222,7 +224,7 @@ async def make_extract_pages( extracted_images=[], page_view=ExtractedImageFromPage( image_id=f"page_view_{page_index}", - base_64="", + base_64=DRY_BASE_64_IMAGE, caption="DRY RUN: OCR text", ), ) diff --git a/pipelex/cogt/exceptions.py b/pipelex/cogt/exceptions.py index 5549e594b..980f91db4 100644 --- a/pipelex/cogt/exceptions.py +++ b/pipelex/cogt/exceptions.py @@ -10,6 +10,10 @@ class LLMConfigError(CogtError): pass +class ImageContentError(CogtError): + pass + + class InferenceManagerWorkerSetupError(CogtError, FatalError): pass @@ -110,6 +114,10 @@ class PromptImageFactoryError(CogtError): pass +class PromptImageDefinitionError(CogtError): + pass + + class PromptImageFormatError(CogtError): pass diff --git a/pipelex/cogt/image/prompt_image.py b/pipelex/cogt/image/prompt_image.py index e7d56cef9..8f39486bd 100644 --- a/pipelex/cogt/image/prompt_image.py +++ b/pipelex/cogt/image/prompt_image.py @@ -45,6 +45,14 @@ def __str__(self) -> str: class PromptImageUrl(PromptImage): url: str + # @field_validator("url") + # @classmethod + # def validate_url(cls, value: str) -> str: + # if not (value.startswith(("http", "file", "data:image/"))): + # msg = f"Image url must start with http, file, or data:image/, here it starts with '{value[:100]}'" + # raise PromptImageDefinitionError(msg) + # return value + @override def __str__(self) -> str: truncated_url = AttributePolisher.get_truncated_value(name="url", value=self.url) diff --git a/pipelex/cogt/models/model_deck.py b/pipelex/cogt/models/model_deck.py index 51d952b84..495c6ff4c 100644 --- a/pipelex/cogt/models/model_deck.py +++ b/pipelex/cogt/models/model_deck.py @@ -201,7 +201,7 @@ def get_optional_inference_model(self, model_handle: str) -> InferenceModelSpec for alias in alias_list: if inference_model := self.get_optional_inference_model(model_handle=alias): return inference_model - log.warning(f"Skipping model handle '{model_handle}' because it's not found in deck") + log.warning(f"Skipping model handle '{model_handle}' because it's not found in deck, it could be an external plugin.") return None def is_handle_defined(self, model_handle: str) -> bool: diff --git a/pipelex/core/interpreter.py b/pipelex/core/interpreter.py index 146bfad42..d85693850 100644 --- a/pipelex/core/interpreter.py +++ b/pipelex/core/interpreter.py @@ -20,20 +20,6 @@ class PipelexInterpreter(BaseModel): file_path: Path | None = None file_content: str | None = None - # @staticmethod - # def escape_plx_string(value: str | None) -> str: - # """Escape a string for plx serialization.""" - # if value is None: - # return "" - # # Escape backslashes first (must be done first) - # value = value.replace("\\", "\\\\") - # # Escape quotes - # value = value.replace('"', '\\"') - # # Replace actual newlines with escaped newlines - # value = value.replace("\n", "\\n") - # value = value.replace("\r", "\\r") - # return value.replace("\t", "\\t") - @model_validator(mode="after") def check_file_path_or_file_content(self) -> Self: """Need to check if there is at least one of file_path or file_content""" diff --git a/pipelex/core/stuffs/image_content.py b/pipelex/core/stuffs/image_content.py index 3ec663a62..3adae2c6d 100644 --- a/pipelex/core/stuffs/image_content.py +++ b/pipelex/core/stuffs/image_content.py @@ -6,12 +6,11 @@ from typing_extensions import override from yattag import Doc +from pipelex.cogt.exceptions import ImageContentError from pipelex.cogt.extract.extract_output import ExtractedImage from pipelex.core.stuffs.stuff_content import StuffContent -from pipelex.tools.misc.base_64_utils import save_base_64_str_to_binary_file -from pipelex.tools.misc.file_utils import ensure_directory_exists, get_incremental_file_path, save_text_to_path -from pipelex.tools.misc.filetype_utils import detect_file_type_from_base64 -from pipelex.tools.misc.path_utils import InterpretedPathOrUrl, interpret_path_or_url +from pipelex.tools.misc.base_64_utils import prefixed_base64_str_from_base64_bytes +from pipelex.tools.misc.path_utils import interpret_path_or_url from pipelex.types import Self @@ -29,7 +28,7 @@ def short_desc(self) -> str: @override def rendered_plain(self) -> str: - return self.url + return self.url[:500] @override def rendered_html(self) -> str: @@ -40,7 +39,7 @@ def rendered_html(self) -> str: @override def rendered_markdown(self, level: int = 1, is_pretty: bool = False) -> str: - return f"![{self.url}]({self.url})" + return f"![{self.url[:100]}]({self.url})" @override def rendered_json(self) -> str: @@ -48,56 +47,62 @@ def rendered_json(self) -> str: @classmethod def make_from_extracted_image(cls, extracted_image: ExtractedImage) -> Self: - return cls( - url=extracted_image.image_id, - base_64=extracted_image.base_64, - caption=extracted_image.caption, - ) + if base_64 := extracted_image.base_64: + prefixed_base64_str = prefixed_base64_str_from_base64_bytes(b64_bytes=base_64.encode()) + return cls( + url=prefixed_base64_str, + base_64=extracted_image.base_64, + caption=extracted_image.caption, + ) + else: + msg = f"Base 64 is required for image content: {extracted_image}" + raise ImageContentError(msg) @classmethod def make_from_image(cls, image: Image.Image) -> Self: buffer = BytesIO() image.save(buffer, format="PNG") base_64 = base64.b64encode(buffer.getvalue()).decode("utf-8") + prefixed_base64_str = prefixed_base64_str_from_base64_bytes(b64_bytes=base_64.encode()) return cls( - url=f"data:image/png;base64,{base_64}", + url=prefixed_base64_str, base_64=base_64, ) - def save_to_directory(self, directory: str, base_name: str | None = None, extension: str | None = None): - ensure_directory_exists(directory) - base_name = base_name or "img" - if (base_64 := self.base_64) and not extension: - match interpret_path_or_url(path_or_uri=self.url): - case InterpretedPathOrUrl.FILE_NAME: - parts = self.url.rsplit(".", 1) - base_name = parts[0] - extension = parts[1] - case InterpretedPathOrUrl.FILE_PATH | InterpretedPathOrUrl.FILE_URI | InterpretedPathOrUrl.URL | InterpretedPathOrUrl.BASE_64: - file_type = detect_file_type_from_base64(b64=base_64) - base_name = base_name or "img" - extension = file_type.extension - file_path = get_incremental_file_path( - base_path=directory, - base_name=base_name, - extension=extension, - avoid_suffix_if_possible=True, - ) - save_base_64_str_to_binary_file(base_64_str=base_64, file_path=file_path) + # def save_to_directory(self, directory: str, base_name: str | None = None, extension: str | None = None): + # ensure_directory_exists(directory) + # base_name = base_name or "img" + # if (base_64 := self.base_64) and not extension: + # match interpret_path_or_url(path_or_uri=self.url): + # case InterpretedPathOrUrl.FILE_NAME: + # parts = self.url.rsplit(".", 1) + # base_name = parts[0] + # extension = parts[1] + # case InterpretedPathOrUrl.FILE_PATH | InterpretedPathOrUrl.FILE_URI | InterpretedPathOrUrl.URL | InterpretedPathOrUrl.BASE_64: + # file_type = detect_file_type_from_base64(b64=base_64) + # base_name = base_name or "img" + # extension = file_type.extension + # file_path = get_incremental_file_path( + # base_path=directory, + # base_name=base_name, + # extension=extension, + # avoid_suffix_if_possible=True, + # ) + # save_base_64_str_to_binary_file(base_64_str=base_64, file_path=file_path) - if caption := self.caption: - caption_file_path = get_incremental_file_path( - base_path=directory, - base_name=f"{base_name}_caption", - extension="txt", - avoid_suffix_if_possible=True, - ) - save_text_to_path(text=caption, path=caption_file_path) - if source_prompt := self.source_prompt: - source_prompt_file_path = get_incremental_file_path( - base_path=directory, - base_name=f"{base_name}_source_prompt", - extension="txt", - avoid_suffix_if_possible=True, - ) - save_text_to_path(text=source_prompt, path=source_prompt_file_path) + # if caption := self.caption: + # caption_file_path = get_incremental_file_path( + # base_path=directory, + # base_name=f"{base_name}_caption", + # extension="txt", + # avoid_suffix_if_possible=True, + # ) + # save_text_to_path(text=caption, path=caption_file_path) + # if source_prompt := self.source_prompt: + # source_prompt_file_path = get_incremental_file_path( + # base_path=directory, + # base_name=f"{base_name}_source_prompt", + # extension="txt", + # avoid_suffix_if_possible=True, + # ) + # save_text_to_path(text=source_prompt, path=source_prompt_file_path) diff --git a/pipelex/core/stuffs/page_content.py b/pipelex/core/stuffs/page_content.py index c8957007d..4e32bc2d5 100644 --- a/pipelex/core/stuffs/page_content.py +++ b/pipelex/core/stuffs/page_content.py @@ -1,15 +1,14 @@ from pipelex.core.stuffs.image_content import ImageContent from pipelex.core.stuffs.structured_content import StructuredContent from pipelex.core.stuffs.text_and_images_content import TextAndImagesContent -from pipelex.tools.misc.file_utils import ensure_directory_exists class PageContent(StructuredContent): text_and_images: TextAndImagesContent page_view: ImageContent | None = None - def save_to_directory(self, directory: str): - ensure_directory_exists(directory) - self.text_and_images.save_to_directory(directory=directory) - if page_view := self.page_view: - page_view.save_to_directory(directory=directory, base_name="page_view") + # def save_to_directory(self, directory: str): + # ensure_directory_exists(directory) + # self.text_and_images.save_to_directory(directory=directory) + # if page_view := self.page_view: + # page_view.save_to_directory(directory=directory, base_name="page_view") diff --git a/pipelex/core/stuffs/text_and_images_content.py b/pipelex/core/stuffs/text_and_images_content.py index 03eab91a1..f1f93cb5b 100644 --- a/pipelex/core/stuffs/text_and_images_content.py +++ b/pipelex/core/stuffs/text_and_images_content.py @@ -3,7 +3,6 @@ from pipelex.core.stuffs.image_content import ImageContent from pipelex.core.stuffs.stuff_content import StuffContent from pipelex.core.stuffs.text_content import TextContent -from pipelex.tools.misc.file_utils import ensure_directory_exists class TextAndImagesContent(StuffContent): @@ -33,10 +32,10 @@ def rendered_html(self) -> str: rendered = "" return rendered - def save_to_directory(self, directory: str): - ensure_directory_exists(directory) - if text_content := self.text: - text_content.save_to_directory(directory=directory) - if images := self.images: - for image_content in images: - image_content.save_to_directory(directory=directory) + # def save_to_directory(self, directory: str): + # ensure_directory_exists(directory) + # if text_content := self.text: + # text_content.save_to_directory(directory=directory) + # if images := self.images: + # for image_content in images: + # image_content.save_to_directory(directory=directory) diff --git a/pipelex/language/plx_factory.py b/pipelex/language/plx_factory.py index 02b34518b..6494a979d 100644 --- a/pipelex/language/plx_factory.py +++ b/pipelex/language/plx_factory.py @@ -329,18 +329,6 @@ def dict_to_plx_styled_toml(cls, data: Mapping[str, Any]) -> str: return cls.add_spaces_to_inline_tables(toml_output) return toml_output - # @classmethod - # def _remove_pipe_category_from_pipes(cls, data: dict[str, Any]) -> dict[str, Any]: - # """Remove the technical pipe_category field from all pipe definitions.""" - # if "pipe" in data and isinstance(data["pipe"], dict): - # pipe_section = cast("dict[str, Any]", data["pipe"]) - # for pipe_def in pipe_section.values(): - # if isinstance(pipe_def, dict): - # pipe_def_dict = cast("dict[str, Any]", pipe_def) - # if "pipe_category" in pipe_def_dict: - # del pipe_def_dict["pipe_category"] - # return data - @classmethod def make_plx_content(cls, blueprint: PipelexBundleBlueprint) -> str: blueprint_dict = blueprint.model_dump(serialize_as_any=True) diff --git a/pipelex/tools/misc/base_64_utils.py b/pipelex/tools/misc/base_64_utils.py index 05e5d39f6..b4469271b 100644 --- a/pipelex/tools/misc/base_64_utils.py +++ b/pipelex/tools/misc/base_64_utils.py @@ -4,6 +4,7 @@ import aiofiles from pipelex.tools.misc.file_utils import save_bytes_to_binary_file +from pipelex.tools.misc.filetype_utils import detect_file_type_from_base64 def load_binary_as_base64(path: str) -> bytes: @@ -39,7 +40,9 @@ def strip_base64_str_if_needed(base64_str: str) -> str: return base64_str -# def prefixed_base64_str_from_base64_bytes(b64_bytes: bytes) -> str: +def prefixed_base64_str_from_base64_bytes(b64_bytes: bytes) -> str: + file_type = detect_file_type_from_base64(b64_bytes) + return f"data:{file_type.mime};base64,{base64.b64encode(b64_bytes).decode('utf-8')}" def save_base_64_str_to_binary_file( diff --git a/pyproject.toml b/pyproject.toml index a1eb30e91..4b1b6c5a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ dependencies = [ "json2html>=1.3.0", "kajson==0.3.1", "markdown>=3.6", - "markdown-parser-py==1.0.1", "networkx>=3.4.2", "openai>=1.60.1", "openpyxl>=3.1.5", diff --git a/tests/integration/pipelex/pipes/test_image_inputs.py b/tests/integration/pipelex/pipes/test_image_inputs.py index 4a76dc361..657f0b9a1 100644 --- a/tests/integration/pipelex/pipes/test_image_inputs.py +++ b/tests/integration/pipelex/pipes/test_image_inputs.py @@ -61,7 +61,8 @@ async def test_describe_page(self, request: FixtureRequest, pipe_run_mode: PipeR But also accepts basic objects """ # Create the page content - image_content = ImageContent(url=ImageTestCases.IMAGE_FILE_PATH_PNG) + # image_content = ImageContent(url=ImageTestCases.IMAGE_FILE_PATH_PNG) + image_content = ImageContent(url=f"file://{ImageTestCases.IMAGE_FILE_PATH_PNG}") text_and_images = TextAndImagesContent(text=TextContent(text="It was designed by Slartibartfast, a famous designer"), images=[]) page_content = PageContent(text_and_images=text_and_images, page_view=image_content) diff --git a/tests/unit/pipelex/cogt/models/test_model_deck.py b/tests/unit/pipelex/cogt/models/test_model_deck.py index 85b159f18..06d044333 100644 --- a/tests/unit/pipelex/cogt/models/test_model_deck.py +++ b/tests/unit/pipelex/cogt/models/test_model_deck.py @@ -61,7 +61,9 @@ def test_direct_model_lookup_not_found(self, mocker: MockerFixture): # Assert assert result is None - mock_log.warning.assert_called_once_with("Skipping model handle 'nonexistent-model' because it's not found in deck") + mock_log.warning.assert_called_once_with( + "Skipping model handle 'nonexistent-model' because it's not found in deck, it could be an external plugin." + ) def test_simple_string_alias_resolution_success(self, mocker: MockerFixture): # Arrange @@ -88,7 +90,7 @@ def test_simple_string_alias_resolution_not_found(self, mocker: MockerFixture): assert result is None mock_log.debug.assert_called_once_with("Redirection for 'best-gpt': nonexistent-model") # The final warning is about the original alias, not the target - mock_log.warning.assert_called_with("Skipping model handle 'best-gpt' because it's not found in deck") + mock_log.warning.assert_called_with("Skipping model handle 'best-gpt' because it's not found in deck, it could be an external plugin.") def test_list_alias_resolution_first_success(self, mocker: MockerFixture): # Arrange @@ -176,7 +178,9 @@ def test_empty_alias_list(self, mocker: MockerFixture): assert result is None # Empty list evaluates to False, so no debug log is called mock_log.debug.assert_not_called() - mock_log.warning.assert_called_once_with("Skipping model handle 'empty-alias' because it's not found in deck") + mock_log.warning.assert_called_once_with( + "Skipping model handle 'empty-alias' because it's not found in deck, it could be an external plugin." + ) def test_circular_alias_prevention(self, mocker: MockerFixture): # Note: The current implementation doesn't have explicit circular reference detection, @@ -253,7 +257,9 @@ def test_edge_case_llm_handles(self, llm_handle: str, mocker: MockerFixture): # Assert assert result is None - mock_log.warning.assert_called_once_with(f"Skipping model handle '{llm_handle}' because it's not found in deck") + mock_log.warning.assert_called_once_with( + f"Skipping model handle '{llm_handle}' because it's not found in deck, it could be an external plugin." + ) def test_logging_behavior(self, mocker: MockerFixture): # Arrange @@ -268,4 +274,4 @@ def test_logging_behavior(self, mocker: MockerFixture): mock_log.debug.assert_called_with("Redirection for 'test-alias': target-model") # The final warning is about the original alias, not the target - mock_log.warning.assert_called_with("Skipping model handle 'test-alias' because it's not found in deck") + mock_log.warning.assert_called_with("Skipping model handle 'test-alias' because it's not found in deck, it could be an external plugin.") diff --git a/uv.lock b/uv.lock index 9cbc04160..bf4c32296 100644 --- a/uv.lock +++ b/uv.lock @@ -1292,18 +1292,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] -[[package]] -name = "markdown-parser-py" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "regex" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/39/29/d680c74bf64dc510ffd83c0321db569cf17f4a752cb086a47e18401bc6a1/markdown_parser_py-1.0.1.tar.gz", hash = "sha256:ed05765c8ba9d9459280aba7a6af6314fdadeb814e0258b17809da29dd4f957e", size = 6718, upload-time = "2025-09-26T15:32:41.831Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/ec/23a5d686200344695258d15fa12be9e3e87004dfb80d62b0f1515546278f/markdown_parser_py-1.0.1-py3-none-any.whl", hash = "sha256:687792834352afc1fe9dc37c2ad20f7182612159e5c5222a34261093686e1d3a", size = 7032, upload-time = "2025-09-26T15:32:40.46Z" }, -] - [[package]] name = "markupsafe" version = "3.0.3" @@ -2163,7 +2151,6 @@ dependencies = [ { name = "json2html" }, { name = "kajson" }, { name = "markdown" }, - { name = "markdown-parser-py" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "openai" }, @@ -2253,7 +2240,6 @@ requires-dist = [ { name = "json2html", specifier = ">=1.3.0" }, { name = "kajson", specifier = "==0.3.1" }, { name = "markdown", specifier = ">=3.6" }, - { name = "markdown-parser-py", specifier = "==1.0.1" }, { name = "mistralai", marker = "extra == 'mistralai'", specifier = "==1.5.2" }, { name = "mkdocs", marker = "extra == 'docs'", specifier = "==1.6.1" }, { name = "mkdocs-glightbox", marker = "extra == 'docs'", specifier = "==0.4.0" }, @@ -2892,113 +2878,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/11/432f32f8097b03e3cd5fe57e88efb685d964e2e5178a48ed61e841f7fdce/pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04", size = 4722, upload-time = "2025-05-13T15:23:59.629Z" }, ] -[[package]] -name = "regex" -version = "2025.9.18" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/49/d3/eaa0d28aba6ad1827ad1e716d9a93e1ba963ada61887498297d3da715133/regex-2025.9.18.tar.gz", hash = "sha256:c5ba23274c61c6fef447ba6a39333297d0c247f53059dba0bca415cac511edc4", size = 400917, upload-time = "2025-09-19T00:38:35.79Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/d8/7e06171db8e55f917c5b8e89319cea2d86982e3fc46b677f40358223dece/regex-2025.9.18-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:12296202480c201c98a84aecc4d210592b2f55e200a1d193235c4db92b9f6788", size = 484829, upload-time = "2025-09-19T00:35:05.215Z" }, - { url = "https://files.pythonhosted.org/packages/8d/70/bf91bb39e5bedf75ce730ffbaa82ca585584d13335306d637458946b8b9f/regex-2025.9.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:220381f1464a581f2ea988f2220cf2a67927adcef107d47d6897ba5a2f6d51a4", size = 288993, upload-time = "2025-09-19T00:35:08.154Z" }, - { url = "https://files.pythonhosted.org/packages/fe/89/69f79b28365eda2c46e64c39d617d5f65a2aa451a4c94de7d9b34c2dc80f/regex-2025.9.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:87f681bfca84ebd265278b5daa1dcb57f4db315da3b5d044add7c30c10442e61", size = 286624, upload-time = "2025-09-19T00:35:09.717Z" }, - { url = "https://files.pythonhosted.org/packages/44/31/81e62955726c3a14fcc1049a80bc716765af6c055706869de5e880ddc783/regex-2025.9.18-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34d674cbba70c9398074c8a1fcc1a79739d65d1105de2a3c695e2b05ea728251", size = 780473, upload-time = "2025-09-19T00:35:11.013Z" }, - { url = "https://files.pythonhosted.org/packages/fb/23/07072b7e191fbb6e213dc03b2f5b96f06d3c12d7deaded84679482926fc7/regex-2025.9.18-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:385c9b769655cb65ea40b6eea6ff763cbb6d69b3ffef0b0db8208e1833d4e746", size = 849290, upload-time = "2025-09-19T00:35:12.348Z" }, - { url = "https://files.pythonhosted.org/packages/b3/f0/aec7f6a01f2a112210424d77c6401b9015675fb887ced7e18926df4ae51e/regex-2025.9.18-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8900b3208e022570ae34328712bef6696de0804c122933414014bae791437ab2", size = 897335, upload-time = "2025-09-19T00:35:14.058Z" }, - { url = "https://files.pythonhosted.org/packages/cc/90/2e5f9da89d260de7d0417ead91a1bc897f19f0af05f4f9323313b76c47f2/regex-2025.9.18-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c204e93bf32cd7a77151d44b05eb36f469d0898e3fba141c026a26b79d9914a0", size = 789946, upload-time = "2025-09-19T00:35:15.403Z" }, - { url = "https://files.pythonhosted.org/packages/2b/d5/1c712c7362f2563d389be66bae131c8bab121a3fabfa04b0b5bfc9e73c51/regex-2025.9.18-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3acc471d1dd7e5ff82e6cacb3b286750decd949ecd4ae258696d04f019817ef8", size = 780787, upload-time = "2025-09-19T00:35:17.061Z" }, - { url = "https://files.pythonhosted.org/packages/4f/92/c54cdb4aa41009632e69817a5aa452673507f07e341076735a2f6c46a37c/regex-2025.9.18-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6479d5555122433728760e5f29edb4c2b79655a8deb681a141beb5c8a025baea", size = 773632, upload-time = "2025-09-19T00:35:18.57Z" }, - { url = "https://files.pythonhosted.org/packages/db/99/75c996dc6a2231a8652d7ad0bfbeaf8a8c77612d335580f520f3ec40e30b/regex-2025.9.18-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:431bd2a8726b000eb6f12429c9b438a24062a535d06783a93d2bcbad3698f8a8", size = 844104, upload-time = "2025-09-19T00:35:20.259Z" }, - { url = "https://files.pythonhosted.org/packages/1c/f7/25aba34cc130cb6844047dbfe9716c9b8f9629fee8b8bec331aa9241b97b/regex-2025.9.18-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0cc3521060162d02bd36927e20690129200e5ac9d2c6d32b70368870b122db25", size = 834794, upload-time = "2025-09-19T00:35:22.002Z" }, - { url = "https://files.pythonhosted.org/packages/51/eb/64e671beafa0ae29712268421597596d781704973551312b2425831d4037/regex-2025.9.18-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a021217b01be2d51632ce056d7a837d3fa37c543ede36e39d14063176a26ae29", size = 778535, upload-time = "2025-09-19T00:35:23.298Z" }, - { url = "https://files.pythonhosted.org/packages/26/33/c0ebc0b07bd0bf88f716cca240546b26235a07710ea58e271cfe390ae273/regex-2025.9.18-cp310-cp310-win32.whl", hash = "sha256:4a12a06c268a629cb67cc1d009b7bb0be43e289d00d5111f86a2efd3b1949444", size = 264115, upload-time = "2025-09-19T00:35:25.206Z" }, - { url = "https://files.pythonhosted.org/packages/59/39/aeb11a4ae68faaec2498512cadae09f2d8a91f1f65730fe62b9bffeea150/regex-2025.9.18-cp310-cp310-win_amd64.whl", hash = "sha256:47acd811589301298c49db2c56bde4f9308d6396da92daf99cba781fa74aa450", size = 276143, upload-time = "2025-09-19T00:35:26.785Z" }, - { url = "https://files.pythonhosted.org/packages/29/04/37f2d3fc334a1031fc2767c9d89cec13c2e72207c7e7f6feae8a47f4e149/regex-2025.9.18-cp310-cp310-win_arm64.whl", hash = "sha256:16bd2944e77522275e5ee36f867e19995bcaa533dcb516753a26726ac7285442", size = 268473, upload-time = "2025-09-19T00:35:28.39Z" }, - { url = "https://files.pythonhosted.org/packages/58/61/80eda662fc4eb32bfedc331f42390974c9e89c7eac1b79cd9eea4d7c458c/regex-2025.9.18-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:51076980cd08cd13c88eb7365427ae27f0d94e7cebe9ceb2bb9ffdae8fc4d82a", size = 484832, upload-time = "2025-09-19T00:35:30.011Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d9/33833d9abddf3f07ad48504ddb53fe3b22f353214bbb878a72eee1e3ddbf/regex-2025.9.18-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:828446870bd7dee4e0cbeed767f07961aa07f0ea3129f38b3ccecebc9742e0b8", size = 288994, upload-time = "2025-09-19T00:35:31.733Z" }, - { url = "https://files.pythonhosted.org/packages/2a/b3/526ee96b0d70ea81980cbc20c3496fa582f775a52e001e2743cc33b2fa75/regex-2025.9.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c28821d5637866479ec4cc23b8c990f5bc6dd24e5e4384ba4a11d38a526e1414", size = 286619, upload-time = "2025-09-19T00:35:33.221Z" }, - { url = "https://files.pythonhosted.org/packages/65/4f/c2c096b02a351b33442aed5895cdd8bf87d372498d2100927c5a053d7ba3/regex-2025.9.18-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:726177ade8e481db669e76bf99de0b278783be8acd11cef71165327abd1f170a", size = 792454, upload-time = "2025-09-19T00:35:35.361Z" }, - { url = "https://files.pythonhosted.org/packages/24/15/b562c9d6e47c403c4b5deb744f8b4bf6e40684cf866c7b077960a925bdff/regex-2025.9.18-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f5cca697da89b9f8ea44115ce3130f6c54c22f541943ac8e9900461edc2b8bd4", size = 858723, upload-time = "2025-09-19T00:35:36.949Z" }, - { url = "https://files.pythonhosted.org/packages/f2/01/dba305409849e85b8a1a681eac4c03ed327d8de37895ddf9dc137f59c140/regex-2025.9.18-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dfbde38f38004703c35666a1e1c088b778e35d55348da2b7b278914491698d6a", size = 905899, upload-time = "2025-09-19T00:35:38.723Z" }, - { url = "https://files.pythonhosted.org/packages/fe/d0/c51d1e6a80eab11ef96a4cbad17fc0310cf68994fb01a7283276b7e5bbd6/regex-2025.9.18-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f2f422214a03fab16bfa495cfec72bee4aaa5731843b771860a471282f1bf74f", size = 798981, upload-time = "2025-09-19T00:35:40.416Z" }, - { url = "https://files.pythonhosted.org/packages/c4/5e/72db90970887bbe02296612bd61b0fa31e6d88aa24f6a4853db3e96c575e/regex-2025.9.18-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a295916890f4df0902e4286bc7223ee7f9e925daa6dcdec4192364255b70561a", size = 781900, upload-time = "2025-09-19T00:35:42.077Z" }, - { url = "https://files.pythonhosted.org/packages/50/ff/596be45eea8e9bc31677fde243fa2904d00aad1b32c31bce26c3dbba0b9e/regex-2025.9.18-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5db95ff632dbabc8c38c4e82bf545ab78d902e81160e6e455598014f0abe66b9", size = 852952, upload-time = "2025-09-19T00:35:43.751Z" }, - { url = "https://files.pythonhosted.org/packages/e5/1b/2dfa348fa551e900ed3f5f63f74185b6a08e8a76bc62bc9c106f4f92668b/regex-2025.9.18-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fb967eb441b0f15ae610b7069bdb760b929f267efbf522e814bbbfffdf125ce2", size = 844355, upload-time = "2025-09-19T00:35:45.309Z" }, - { url = "https://files.pythonhosted.org/packages/f4/bf/aefb1def27fe33b8cbbb19c75c13aefccfbef1c6686f8e7f7095705969c7/regex-2025.9.18-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f04d2f20da4053d96c08f7fde6e1419b7ec9dbcee89c96e3d731fca77f411b95", size = 787254, upload-time = "2025-09-19T00:35:46.904Z" }, - { url = "https://files.pythonhosted.org/packages/e3/4e/8ef042e7cf0dbbb401e784e896acfc1b367b95dfbfc9ada94c2ed55a081f/regex-2025.9.18-cp311-cp311-win32.whl", hash = "sha256:895197241fccf18c0cea7550c80e75f185b8bd55b6924fcae269a1a92c614a07", size = 264129, upload-time = "2025-09-19T00:35:48.597Z" }, - { url = "https://files.pythonhosted.org/packages/b4/7d/c4fcabf80dcdd6821c0578ad9b451f8640b9110fb3dcb74793dd077069ff/regex-2025.9.18-cp311-cp311-win_amd64.whl", hash = "sha256:7e2b414deae99166e22c005e154a5513ac31493db178d8aec92b3269c9cce8c9", size = 276160, upload-time = "2025-09-19T00:36:00.45Z" }, - { url = "https://files.pythonhosted.org/packages/64/f8/0e13c8ae4d6df9d128afaba138342d532283d53a4c1e7a8c93d6756c8f4a/regex-2025.9.18-cp311-cp311-win_arm64.whl", hash = "sha256:fb137ec7c5c54f34a25ff9b31f6b7b0c2757be80176435bf367111e3f71d72df", size = 268471, upload-time = "2025-09-19T00:36:02.149Z" }, - { url = "https://files.pythonhosted.org/packages/b0/99/05859d87a66ae7098222d65748f11ef7f2dff51bfd7482a4e2256c90d72b/regex-2025.9.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:436e1b31d7efd4dcd52091d076482031c611dde58bf9c46ca6d0a26e33053a7e", size = 486335, upload-time = "2025-09-19T00:36:03.661Z" }, - { url = "https://files.pythonhosted.org/packages/97/7e/d43d4e8b978890932cf7b0957fce58c5b08c66f32698f695b0c2c24a48bf/regex-2025.9.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c190af81e5576b9c5fdc708f781a52ff20f8b96386c6e2e0557a78402b029f4a", size = 289720, upload-time = "2025-09-19T00:36:05.471Z" }, - { url = "https://files.pythonhosted.org/packages/bb/3b/ff80886089eb5dcf7e0d2040d9aaed539e25a94300403814bb24cc775058/regex-2025.9.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e4121f1ce2b2b5eec4b397cc1b277686e577e658d8f5870b7eb2d726bd2300ab", size = 287257, upload-time = "2025-09-19T00:36:07.072Z" }, - { url = "https://files.pythonhosted.org/packages/ee/66/243edf49dd8720cba8d5245dd4d6adcb03a1defab7238598c0c97cf549b8/regex-2025.9.18-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:300e25dbbf8299d87205e821a201057f2ef9aa3deb29caa01cd2cac669e508d5", size = 797463, upload-time = "2025-09-19T00:36:08.399Z" }, - { url = "https://files.pythonhosted.org/packages/df/71/c9d25a1142c70432e68bb03211d4a82299cd1c1fbc41db9409a394374ef5/regex-2025.9.18-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b47fcf9f5316c0bdaf449e879407e1b9937a23c3b369135ca94ebc8d74b1742", size = 862670, upload-time = "2025-09-19T00:36:10.101Z" }, - { url = "https://files.pythonhosted.org/packages/f8/8f/329b1efc3a64375a294e3a92d43372bf1a351aa418e83c21f2f01cf6ec41/regex-2025.9.18-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:57a161bd3acaa4b513220b49949b07e252165e6b6dc910ee7617a37ff4f5b425", size = 910881, upload-time = "2025-09-19T00:36:12.223Z" }, - { url = "https://files.pythonhosted.org/packages/35/9e/a91b50332a9750519320ed30ec378b74c996f6befe282cfa6bb6cea7e9fd/regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f130c3a7845ba42de42f380fff3c8aebe89a810747d91bcf56d40a069f15352", size = 802011, upload-time = "2025-09-19T00:36:13.901Z" }, - { url = "https://files.pythonhosted.org/packages/a4/1d/6be3b8d7856b6e0d7ee7f942f437d0a76e0d5622983abbb6d21e21ab9a17/regex-2025.9.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f96fa342b6f54dcba928dd452e8d8cb9f0d63e711d1721cd765bb9f73bb048d", size = 786668, upload-time = "2025-09-19T00:36:15.391Z" }, - { url = "https://files.pythonhosted.org/packages/cb/ce/4a60e53df58bd157c5156a1736d3636f9910bdcc271d067b32b7fcd0c3a8/regex-2025.9.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f0d676522d68c207828dcd01fb6f214f63f238c283d9f01d85fc664c7c85b56", size = 856578, upload-time = "2025-09-19T00:36:16.845Z" }, - { url = "https://files.pythonhosted.org/packages/86/e8/162c91bfe7217253afccde112868afb239f94703de6580fb235058d506a6/regex-2025.9.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:40532bff8a1a0621e7903ae57fce88feb2e8a9a9116d341701302c9302aef06e", size = 849017, upload-time = "2025-09-19T00:36:18.597Z" }, - { url = "https://files.pythonhosted.org/packages/35/34/42b165bc45289646ea0959a1bc7531733e90b47c56a72067adfe6b3251f6/regex-2025.9.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:039f11b618ce8d71a1c364fdee37da1012f5a3e79b1b2819a9f389cd82fd6282", size = 788150, upload-time = "2025-09-19T00:36:20.464Z" }, - { url = "https://files.pythonhosted.org/packages/79/5d/cdd13b1f3c53afa7191593a7ad2ee24092a5a46417725ffff7f64be8342d/regex-2025.9.18-cp312-cp312-win32.whl", hash = "sha256:e1dd06f981eb226edf87c55d523131ade7285137fbde837c34dc9d1bf309f459", size = 264536, upload-time = "2025-09-19T00:36:21.922Z" }, - { url = "https://files.pythonhosted.org/packages/e0/f5/4a7770c9a522e7d2dc1fa3ffc83ab2ab33b0b22b447e62cffef186805302/regex-2025.9.18-cp312-cp312-win_amd64.whl", hash = "sha256:3d86b5247bf25fa3715e385aa9ff272c307e0636ce0c9595f64568b41f0a9c77", size = 275501, upload-time = "2025-09-19T00:36:23.4Z" }, - { url = "https://files.pythonhosted.org/packages/df/05/9ce3e110e70d225ecbed455b966003a3afda5e58e8aec2964042363a18f4/regex-2025.9.18-cp312-cp312-win_arm64.whl", hash = "sha256:032720248cbeeae6444c269b78cb15664458b7bb9ed02401d3da59fe4d68c3a5", size = 268601, upload-time = "2025-09-19T00:36:25.092Z" }, - { url = "https://files.pythonhosted.org/packages/d2/c7/5c48206a60ce33711cf7dcaeaed10dd737733a3569dc7e1dce324dd48f30/regex-2025.9.18-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2a40f929cd907c7e8ac7566ac76225a77701a6221bca937bdb70d56cb61f57b2", size = 485955, upload-time = "2025-09-19T00:36:26.822Z" }, - { url = "https://files.pythonhosted.org/packages/e9/be/74fc6bb19a3c491ec1ace943e622b5a8539068771e8705e469b2da2306a7/regex-2025.9.18-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c90471671c2cdf914e58b6af62420ea9ecd06d1554d7474d50133ff26ae88feb", size = 289583, upload-time = "2025-09-19T00:36:28.577Z" }, - { url = "https://files.pythonhosted.org/packages/25/c4/9ceaa433cb5dc515765560f22a19578b95b92ff12526e5a259321c4fc1a0/regex-2025.9.18-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a351aff9e07a2dabb5022ead6380cff17a4f10e4feb15f9100ee56c4d6d06af", size = 287000, upload-time = "2025-09-19T00:36:30.161Z" }, - { url = "https://files.pythonhosted.org/packages/7d/e6/68bc9393cb4dc68018456568c048ac035854b042bc7c33cb9b99b0680afa/regex-2025.9.18-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc4b8e9d16e20ddfe16430c23468a8707ccad3365b06d4536142e71823f3ca29", size = 797535, upload-time = "2025-09-19T00:36:31.876Z" }, - { url = "https://files.pythonhosted.org/packages/6a/1c/ebae9032d34b78ecfe9bd4b5e6575b55351dc8513485bb92326613732b8c/regex-2025.9.18-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4b8cdbddf2db1c5e80338ba2daa3cfa3dec73a46fff2a7dda087c8efbf12d62f", size = 862603, upload-time = "2025-09-19T00:36:33.344Z" }, - { url = "https://files.pythonhosted.org/packages/3b/74/12332c54b3882557a4bcd2b99f8be581f5c6a43cf1660a85b460dd8ff468/regex-2025.9.18-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a276937d9d75085b2c91fb48244349c6954f05ee97bba0963ce24a9d915b8b68", size = 910829, upload-time = "2025-09-19T00:36:34.826Z" }, - { url = "https://files.pythonhosted.org/packages/86/70/ba42d5ed606ee275f2465bfc0e2208755b06cdabd0f4c7c4b614d51b57ab/regex-2025.9.18-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92a8e375ccdc1256401c90e9dc02b8642894443d549ff5e25e36d7cf8a80c783", size = 802059, upload-time = "2025-09-19T00:36:36.664Z" }, - { url = "https://files.pythonhosted.org/packages/da/c5/fcb017e56396a7f2f8357412638d7e2963440b131a3ca549be25774b3641/regex-2025.9.18-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0dc6893b1f502d73037cf807a321cdc9be29ef3d6219f7970f842475873712ac", size = 786781, upload-time = "2025-09-19T00:36:38.168Z" }, - { url = "https://files.pythonhosted.org/packages/c6/ee/21c4278b973f630adfb3bcb23d09d83625f3ab1ca6e40ebdffe69901c7a1/regex-2025.9.18-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a61e85bfc63d232ac14b015af1261f826260c8deb19401c0597dbb87a864361e", size = 856578, upload-time = "2025-09-19T00:36:40.129Z" }, - { url = "https://files.pythonhosted.org/packages/87/0b/de51550dc7274324435c8f1539373ac63019b0525ad720132866fff4a16a/regex-2025.9.18-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1ef86a9ebc53f379d921fb9a7e42b92059ad3ee800fcd9e0fe6181090e9f6c23", size = 849119, upload-time = "2025-09-19T00:36:41.651Z" }, - { url = "https://files.pythonhosted.org/packages/60/52/383d3044fc5154d9ffe4321696ee5b2ee4833a28c29b137c22c33f41885b/regex-2025.9.18-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d3bc882119764ba3a119fbf2bd4f1b47bc56c1da5d42df4ed54ae1e8e66fdf8f", size = 788219, upload-time = "2025-09-19T00:36:43.575Z" }, - { url = "https://files.pythonhosted.org/packages/20/bd/2614fc302671b7359972ea212f0e3a92df4414aaeacab054a8ce80a86073/regex-2025.9.18-cp313-cp313-win32.whl", hash = "sha256:3810a65675845c3bdfa58c3c7d88624356dd6ee2fc186628295e0969005f928d", size = 264517, upload-time = "2025-09-19T00:36:45.503Z" }, - { url = "https://files.pythonhosted.org/packages/07/0f/ab5c1581e6563a7bffdc1974fb2d25f05689b88e2d416525271f232b1946/regex-2025.9.18-cp313-cp313-win_amd64.whl", hash = "sha256:16eaf74b3c4180ede88f620f299e474913ab6924d5c4b89b3833bc2345d83b3d", size = 275481, upload-time = "2025-09-19T00:36:46.965Z" }, - { url = "https://files.pythonhosted.org/packages/49/22/ee47672bc7958f8c5667a587c2600a4fba8b6bab6e86bd6d3e2b5f7cac42/regex-2025.9.18-cp313-cp313-win_arm64.whl", hash = "sha256:4dc98ba7dd66bd1261927a9f49bd5ee2bcb3660f7962f1ec02617280fc00f5eb", size = 268598, upload-time = "2025-09-19T00:36:48.314Z" }, - { url = "https://files.pythonhosted.org/packages/e8/83/6887e16a187c6226cb85d8301e47d3b73ecc4505a3a13d8da2096b44fd76/regex-2025.9.18-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:fe5d50572bc885a0a799410a717c42b1a6b50e2f45872e2b40f4f288f9bce8a2", size = 489765, upload-time = "2025-09-19T00:36:49.996Z" }, - { url = "https://files.pythonhosted.org/packages/51/c5/e2f7325301ea2916ff301c8d963ba66b1b2c1b06694191df80a9c4fea5d0/regex-2025.9.18-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b9d9a2d6cda6621551ca8cf7a06f103adf72831153f3c0d982386110870c4d3", size = 291228, upload-time = "2025-09-19T00:36:51.654Z" }, - { url = "https://files.pythonhosted.org/packages/91/60/7d229d2bc6961289e864a3a3cfebf7d0d250e2e65323a8952cbb7e22d824/regex-2025.9.18-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:13202e4c4ac0ef9a317fff817674b293c8f7e8c68d3190377d8d8b749f566e12", size = 289270, upload-time = "2025-09-19T00:36:53.118Z" }, - { url = "https://files.pythonhosted.org/packages/3c/d7/b4f06868ee2958ff6430df89857fbf3d43014bbf35538b6ec96c2704e15d/regex-2025.9.18-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:874ff523b0fecffb090f80ae53dc93538f8db954c8bb5505f05b7787ab3402a0", size = 806326, upload-time = "2025-09-19T00:36:54.631Z" }, - { url = "https://files.pythonhosted.org/packages/d6/e4/bca99034a8f1b9b62ccf337402a8e5b959dd5ba0e5e5b2ead70273df3277/regex-2025.9.18-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d13ab0490128f2bb45d596f754148cd750411afc97e813e4b3a61cf278a23bb6", size = 871556, upload-time = "2025-09-19T00:36:56.208Z" }, - { url = "https://files.pythonhosted.org/packages/6d/df/e06ffaf078a162f6dd6b101a5ea9b44696dca860a48136b3ae4a9caf25e2/regex-2025.9.18-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:05440bc172bc4b4b37fb9667e796597419404dbba62e171e1f826d7d2a9ebcef", size = 913817, upload-time = "2025-09-19T00:36:57.807Z" }, - { url = "https://files.pythonhosted.org/packages/9e/05/25b05480b63292fd8e84800b1648e160ca778127b8d2367a0a258fa2e225/regex-2025.9.18-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5514b8e4031fdfaa3d27e92c75719cbe7f379e28cacd939807289bce76d0e35a", size = 811055, upload-time = "2025-09-19T00:36:59.762Z" }, - { url = "https://files.pythonhosted.org/packages/70/97/7bc7574655eb651ba3a916ed4b1be6798ae97af30104f655d8efd0cab24b/regex-2025.9.18-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:65d3c38c39efce73e0d9dc019697b39903ba25b1ad45ebbd730d2cf32741f40d", size = 794534, upload-time = "2025-09-19T00:37:01.405Z" }, - { url = "https://files.pythonhosted.org/packages/b4/c2/d5da49166a52dda879855ecdba0117f073583db2b39bb47ce9a3378a8e9e/regex-2025.9.18-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ae77e447ebc144d5a26d50055c6ddba1d6ad4a865a560ec7200b8b06bc529368", size = 866684, upload-time = "2025-09-19T00:37:03.441Z" }, - { url = "https://files.pythonhosted.org/packages/bd/2d/0a5c4e6ec417de56b89ff4418ecc72f7e3feca806824c75ad0bbdae0516b/regex-2025.9.18-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e3ef8cf53dc8df49d7e28a356cf824e3623764e9833348b655cfed4524ab8a90", size = 853282, upload-time = "2025-09-19T00:37:04.985Z" }, - { url = "https://files.pythonhosted.org/packages/f4/8e/d656af63e31a86572ec829665d6fa06eae7e144771e0330650a8bb865635/regex-2025.9.18-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9feb29817df349c976da9a0debf775c5c33fc1c8ad7b9f025825da99374770b7", size = 797830, upload-time = "2025-09-19T00:37:06.697Z" }, - { url = "https://files.pythonhosted.org/packages/db/ce/06edc89df8f7b83ffd321b6071be4c54dc7332c0f77860edc40ce57d757b/regex-2025.9.18-cp313-cp313t-win32.whl", hash = "sha256:168be0d2f9b9d13076940b1ed774f98595b4e3c7fc54584bba81b3cc4181742e", size = 267281, upload-time = "2025-09-19T00:37:08.568Z" }, - { url = "https://files.pythonhosted.org/packages/83/9a/2b5d9c8b307a451fd17068719d971d3634ca29864b89ed5c18e499446d4a/regex-2025.9.18-cp313-cp313t-win_amd64.whl", hash = "sha256:d59ecf3bb549e491c8104fea7313f3563c7b048e01287db0a90485734a70a730", size = 278724, upload-time = "2025-09-19T00:37:10.023Z" }, - { url = "https://files.pythonhosted.org/packages/3d/70/177d31e8089a278a764f8ec9a3faac8d14a312d622a47385d4b43905806f/regex-2025.9.18-cp313-cp313t-win_arm64.whl", hash = "sha256:dbef80defe9fb21310948a2595420b36c6d641d9bea4c991175829b2cc4bc06a", size = 269771, upload-time = "2025-09-19T00:37:13.041Z" }, - { url = "https://files.pythonhosted.org/packages/44/b7/3b4663aa3b4af16819f2ab6a78c4111c7e9b066725d8107753c2257448a5/regex-2025.9.18-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c6db75b51acf277997f3adcd0ad89045d856190d13359f15ab5dda21581d9129", size = 486130, upload-time = "2025-09-19T00:37:14.527Z" }, - { url = "https://files.pythonhosted.org/packages/80/5b/4533f5d7ac9c6a02a4725fe8883de2aebc713e67e842c04cf02626afb747/regex-2025.9.18-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8f9698b6f6895d6db810e0bda5364f9ceb9e5b11328700a90cae573574f61eea", size = 289539, upload-time = "2025-09-19T00:37:16.356Z" }, - { url = "https://files.pythonhosted.org/packages/b8/8d/5ab6797c2750985f79e9995fad3254caa4520846580f266ae3b56d1cae58/regex-2025.9.18-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29cd86aa7cb13a37d0f0d7c21d8d949fe402ffa0ea697e635afedd97ab4b69f1", size = 287233, upload-time = "2025-09-19T00:37:18.025Z" }, - { url = "https://files.pythonhosted.org/packages/cb/1e/95afcb02ba8d3a64e6ffeb801718ce73471ad6440c55d993f65a4a5e7a92/regex-2025.9.18-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c9f285a071ee55cd9583ba24dde006e53e17780bb309baa8e4289cd472bcc47", size = 797876, upload-time = "2025-09-19T00:37:19.609Z" }, - { url = "https://files.pythonhosted.org/packages/c8/fb/720b1f49cec1f3b5a9fea5b34cd22b88b5ebccc8c1b5de9cc6f65eed165a/regex-2025.9.18-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5adf266f730431e3be9021d3e5b8d5ee65e563fec2883ea8093944d21863b379", size = 863385, upload-time = "2025-09-19T00:37:21.65Z" }, - { url = "https://files.pythonhosted.org/packages/a9/ca/e0d07ecf701e1616f015a720dc13b84c582024cbfbb3fc5394ae204adbd7/regex-2025.9.18-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1137cabc0f38807de79e28d3f6e3e3f2cc8cfb26bead754d02e6d1de5f679203", size = 910220, upload-time = "2025-09-19T00:37:23.723Z" }, - { url = "https://files.pythonhosted.org/packages/b6/45/bba86413b910b708eca705a5af62163d5d396d5f647ed9485580c7025209/regex-2025.9.18-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cc9e5525cada99699ca9223cce2d52e88c52a3d2a0e842bd53de5497c604164", size = 801827, upload-time = "2025-09-19T00:37:25.684Z" }, - { url = "https://files.pythonhosted.org/packages/b8/a6/740fbd9fcac31a1305a8eed30b44bf0f7f1e042342be0a4722c0365ecfca/regex-2025.9.18-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bbb9246568f72dce29bcd433517c2be22c7791784b223a810225af3b50d1aafb", size = 786843, upload-time = "2025-09-19T00:37:27.62Z" }, - { url = "https://files.pythonhosted.org/packages/80/a7/0579e8560682645906da640c9055506465d809cb0f5415d9976f417209a6/regex-2025.9.18-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6a52219a93dd3d92c675383efff6ae18c982e2d7651c792b1e6d121055808743", size = 857430, upload-time = "2025-09-19T00:37:29.362Z" }, - { url = "https://files.pythonhosted.org/packages/8d/9b/4dc96b6c17b38900cc9fee254fc9271d0dde044e82c78c0811b58754fde5/regex-2025.9.18-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ae9b3840c5bd456780e3ddf2f737ab55a79b790f6409182012718a35c6d43282", size = 848612, upload-time = "2025-09-19T00:37:31.42Z" }, - { url = "https://files.pythonhosted.org/packages/b3/6a/6f659f99bebb1775e5ac81a3fb837b85897c1a4ef5acffd0ff8ffe7e67fb/regex-2025.9.18-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d488c236ac497c46a5ac2005a952c1a0e22a07be9f10c3e735bc7d1209a34773", size = 787967, upload-time = "2025-09-19T00:37:34.019Z" }, - { url = "https://files.pythonhosted.org/packages/61/35/9e35665f097c07cf384a6b90a1ac11b0b1693084a0b7a675b06f760496c6/regex-2025.9.18-cp314-cp314-win32.whl", hash = "sha256:0c3506682ea19beefe627a38872d8da65cc01ffa25ed3f2e422dffa1474f0788", size = 269847, upload-time = "2025-09-19T00:37:35.759Z" }, - { url = "https://files.pythonhosted.org/packages/af/64/27594dbe0f1590b82de2821ebfe9a359b44dcb9b65524876cd12fabc447b/regex-2025.9.18-cp314-cp314-win_amd64.whl", hash = "sha256:57929d0f92bebb2d1a83af372cd0ffba2263f13f376e19b1e4fa32aec4efddc3", size = 278755, upload-time = "2025-09-19T00:37:37.367Z" }, - { url = "https://files.pythonhosted.org/packages/30/a3/0cd8d0d342886bd7d7f252d701b20ae1a3c72dc7f34ef4b2d17790280a09/regex-2025.9.18-cp314-cp314-win_arm64.whl", hash = "sha256:6a4b44df31d34fa51aa5c995d3aa3c999cec4d69b9bd414a8be51984d859f06d", size = 271873, upload-time = "2025-09-19T00:37:39.125Z" }, - { url = "https://files.pythonhosted.org/packages/99/cb/8a1ab05ecf404e18b54348e293d9b7a60ec2bd7aa59e637020c5eea852e8/regex-2025.9.18-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b176326bcd544b5e9b17d6943f807697c0cb7351f6cfb45bf5637c95ff7e6306", size = 489773, upload-time = "2025-09-19T00:37:40.968Z" }, - { url = "https://files.pythonhosted.org/packages/93/3b/6543c9b7f7e734d2404fa2863d0d710c907bef99d4598760ed4563d634c3/regex-2025.9.18-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:0ffd9e230b826b15b369391bec167baed57c7ce39efc35835448618860995946", size = 291221, upload-time = "2025-09-19T00:37:42.901Z" }, - { url = "https://files.pythonhosted.org/packages/cd/91/e9fdee6ad6bf708d98c5d17fded423dcb0661795a49cba1b4ffb8358377a/regex-2025.9.18-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ec46332c41add73f2b57e2f5b642f991f6b15e50e9f86285e08ffe3a512ac39f", size = 289268, upload-time = "2025-09-19T00:37:44.823Z" }, - { url = "https://files.pythonhosted.org/packages/94/a6/bc3e8a918abe4741dadeaeb6c508e3a4ea847ff36030d820d89858f96a6c/regex-2025.9.18-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b80fa342ed1ea095168a3f116637bd1030d39c9ff38dc04e54ef7c521e01fc95", size = 806659, upload-time = "2025-09-19T00:37:46.684Z" }, - { url = "https://files.pythonhosted.org/packages/2b/71/ea62dbeb55d9e6905c7b5a49f75615ea1373afcad95830047e4e310db979/regex-2025.9.18-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4d97071c0ba40f0cf2a93ed76e660654c399a0a04ab7d85472239460f3da84b", size = 871701, upload-time = "2025-09-19T00:37:48.882Z" }, - { url = "https://files.pythonhosted.org/packages/6a/90/fbe9dedb7dad24a3a4399c0bae64bfa932ec8922a0a9acf7bc88db30b161/regex-2025.9.18-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0ac936537ad87cef9e0e66c5144484206c1354224ee811ab1519a32373e411f3", size = 913742, upload-time = "2025-09-19T00:37:51.015Z" }, - { url = "https://files.pythonhosted.org/packages/f0/1c/47e4a8c0e73d41eb9eb9fdeba3b1b810110a5139a2526e82fd29c2d9f867/regex-2025.9.18-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dec57f96d4def58c422d212d414efe28218d58537b5445cf0c33afb1b4768571", size = 811117, upload-time = "2025-09-19T00:37:52.686Z" }, - { url = "https://files.pythonhosted.org/packages/2a/da/435f29fddfd015111523671e36d30af3342e8136a889159b05c1d9110480/regex-2025.9.18-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:48317233294648bf7cd068857f248e3a57222259a5304d32c7552e2284a1b2ad", size = 794647, upload-time = "2025-09-19T00:37:54.626Z" }, - { url = "https://files.pythonhosted.org/packages/23/66/df5e6dcca25c8bc57ce404eebc7342310a0d218db739d7882c9a2b5974a3/regex-2025.9.18-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:274687e62ea3cf54846a9b25fc48a04459de50af30a7bd0b61a9e38015983494", size = 866747, upload-time = "2025-09-19T00:37:56.367Z" }, - { url = "https://files.pythonhosted.org/packages/82/42/94392b39b531f2e469b2daa40acf454863733b674481fda17462a5ffadac/regex-2025.9.18-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a78722c86a3e7e6aadf9579e3b0ad78d955f2d1f1a8ca4f67d7ca258e8719d4b", size = 853434, upload-time = "2025-09-19T00:37:58.39Z" }, - { url = "https://files.pythonhosted.org/packages/a8/f8/dcc64c7f7bbe58842a8f89622b50c58c3598fbbf4aad0a488d6df2c699f1/regex-2025.9.18-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:06104cd203cdef3ade989a1c45b6215bf42f8b9dd705ecc220c173233f7cba41", size = 798024, upload-time = "2025-09-19T00:38:00.397Z" }, - { url = "https://files.pythonhosted.org/packages/20/8d/edf1c5d5aa98f99a692313db813ec487732946784f8f93145e0153d910e5/regex-2025.9.18-cp314-cp314t-win32.whl", hash = "sha256:2e1eddc06eeaffd249c0adb6fafc19e2118e6308c60df9db27919e96b5656096", size = 273029, upload-time = "2025-09-19T00:38:02.383Z" }, - { url = "https://files.pythonhosted.org/packages/a7/24/02d4e4f88466f17b145f7ea2b2c11af3a942db6222429c2c146accf16054/regex-2025.9.18-cp314-cp314t-win_amd64.whl", hash = "sha256:8620d247fb8c0683ade51217b459cb4a1081c0405a3072235ba43a40d355c09a", size = 282680, upload-time = "2025-09-19T00:38:04.102Z" }, - { url = "https://files.pythonhosted.org/packages/1f/a3/c64894858aaaa454caa7cc47e2f225b04d3ed08ad649eacf58d45817fad2/regex-2025.9.18-cp314-cp314t-win_arm64.whl", hash = "sha256:b7531a8ef61de2c647cdf68b3229b071e46ec326b3138b2180acb4275f470b01", size = 273034, upload-time = "2025-09-19T00:38:05.807Z" }, -] - [[package]] name = "requests" version = "2.32.5" From eff1a0f2be5cec73ce3b08e4b480fe7ba52f9348 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 19:00:18 +0200 Subject: [PATCH 050/115] Fix double base64 encoding bug on images --- pipelex/cogt/image/prompt_image.py | 2 -- pipelex/cogt/image/prompt_image_factory.py | 7 +++++-- pipelex/core/stuffs/image_content.py | 6 +++--- pipelex/tools/misc/base_64_utils.py | 17 +++++++++++++++-- pipelex/tools/misc/filetype_utils.py | 6 +++++- 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/pipelex/cogt/image/prompt_image.py b/pipelex/cogt/image/prompt_image.py index 8f39486bd..319b2eca4 100644 --- a/pipelex/cogt/image/prompt_image.py +++ b/pipelex/cogt/image/prompt_image.py @@ -6,7 +6,6 @@ from typing_extensions import override from pipelex.tools.misc.attribute_utils import AttributePolisher -from pipelex.tools.misc.file_utils import save_bytes_to_binary_file from pipelex.tools.misc.filetype_utils import ( FileType, detect_file_type_from_base64, @@ -67,7 +66,6 @@ class PromptImageBase64(PromptImage): base_64: bytes def get_file_type(self) -> FileType: - save_bytes_to_binary_file("test.png", base64.b64decode(self.base_64)) return detect_file_type_from_base64(self.base_64) def get_mime_type(self) -> str: diff --git a/pipelex/cogt/image/prompt_image_factory.py b/pipelex/cogt/image/prompt_image_factory.py index a6f0ebb4e..6033fab55 100644 --- a/pipelex/cogt/image/prompt_image_factory.py +++ b/pipelex/cogt/image/prompt_image_factory.py @@ -1,3 +1,4 @@ +from pipelex import log from pipelex.cogt.exceptions import PromptImageFactoryError from pipelex.cogt.image.prompt_image import ( PromptImage, @@ -10,7 +11,7 @@ encode_to_base64_async, load_binary_as_base64_async, load_binary_async, - strip_base64_str_if_needed, + strip_base_64_str_if_needed, ) from pipelex.tools.misc.file_fetch_utils import fetch_file_from_url_httpx_async @@ -25,9 +26,11 @@ def make_prompt_image( base_64_str: str | None = None, ) -> PromptImage: if base_64: + log.debug(f"Base 64: {base_64[:100]!r}") return PromptImageBase64(base_64=base_64) elif base_64_str: - stripped_base_64_str = strip_base64_str_if_needed(base_64_str) + stripped_base_64_str = strip_base_64_str_if_needed(base_64_str) + log.debug(f"Stripped base 64 str: {stripped_base_64_str[:100]}") return PromptImageBase64(base_64=stripped_base_64_str.encode()) elif file_path: return PromptImagePath(file_path=file_path) diff --git a/pipelex/core/stuffs/image_content.py b/pipelex/core/stuffs/image_content.py index 3adae2c6d..94f5872cb 100644 --- a/pipelex/core/stuffs/image_content.py +++ b/pipelex/core/stuffs/image_content.py @@ -9,7 +9,7 @@ from pipelex.cogt.exceptions import ImageContentError from pipelex.cogt.extract.extract_output import ExtractedImage from pipelex.core.stuffs.stuff_content import StuffContent -from pipelex.tools.misc.base_64_utils import prefixed_base64_str_from_base64_bytes +from pipelex.tools.misc.base_64_utils import prefixed_base64_str_from_base64_str from pipelex.tools.misc.path_utils import interpret_path_or_url from pipelex.types import Self @@ -48,7 +48,7 @@ def rendered_json(self) -> str: @classmethod def make_from_extracted_image(cls, extracted_image: ExtractedImage) -> Self: if base_64 := extracted_image.base_64: - prefixed_base64_str = prefixed_base64_str_from_base64_bytes(b64_bytes=base_64.encode()) + prefixed_base64_str = prefixed_base64_str_from_base64_str(b64_str=base_64) return cls( url=prefixed_base64_str, base_64=extracted_image.base_64, @@ -63,7 +63,7 @@ def make_from_image(cls, image: Image.Image) -> Self: buffer = BytesIO() image.save(buffer, format="PNG") base_64 = base64.b64encode(buffer.getvalue()).decode("utf-8") - prefixed_base64_str = prefixed_base64_str_from_base64_bytes(b64_bytes=base_64.encode()) + prefixed_base64_str = prefixed_base64_str_from_base64_str(b64_str=base_64) return cls( url=prefixed_base64_str, base_64=base_64, diff --git a/pipelex/tools/misc/base_64_utils.py b/pipelex/tools/misc/base_64_utils.py index b4469271b..fe7cab3ce 100644 --- a/pipelex/tools/misc/base_64_utils.py +++ b/pipelex/tools/misc/base_64_utils.py @@ -32,7 +32,7 @@ async def encode_to_base64_async(data_bytes: bytes) -> bytes: return await asyncio.to_thread(base64.b64encode, data_bytes) -def strip_base64_str_if_needed(base64_str: str) -> str: +def strip_base_64_str_if_needed(base64_str: str) -> str: if "," in base64_str: return base64_str.split(",", 1)[1] if "data:" in base64_str and ";base64," in base64_str: @@ -45,11 +45,24 @@ def prefixed_base64_str_from_base64_bytes(b64_bytes: bytes) -> str: return f"data:{file_type.mime};base64,{base64.b64encode(b64_bytes).decode('utf-8')}" +def prefixed_base64_str_from_base64_str(b64_str: str) -> str: + """Create a data URL from an already base64-encoded string. + + Args: + b64_str: Base64-encoded string (without data URL prefix) + + Returns: + Data URL string: data:{mime};base64,{b64_str} + """ + file_type = detect_file_type_from_base64(b64_str) + return f"data:{file_type.mime};base64,{b64_str}" + + def save_base_64_str_to_binary_file( base_64_str: str, file_path: str, ): - stripped_base_64_str = strip_base64_str_if_needed(base_64_str) + stripped_base_64_str = strip_base_64_str_if_needed(base_64_str) # Decode base64 byte_data = base64.b64decode(stripped_base_64_str) diff --git a/pipelex/tools/misc/filetype_utils.py b/pipelex/tools/misc/filetype_utils.py index af274d770..1341c8f44 100644 --- a/pipelex/tools/misc/filetype_utils.py +++ b/pipelex/tools/misc/filetype_utils.py @@ -5,6 +5,7 @@ import filetype from pydantic import BaseModel +from pipelex import log from pipelex.tools.exceptions import ToolException @@ -76,16 +77,19 @@ def detect_file_type_from_base64(b64: str | bytes) -> FileType: """ # Normalise to bytes holding only the Base-64 alphabet if isinstance(b64, bytes): + log.debug(f"b64 is already bytes: {b64[:100]!r}") b64_bytes = b64 else: # str → handle optional data-URL header + log.debug(f"b64 is a string: {b64[:100]!r}") if b64.lstrip().startswith("data:") and "," in b64: b64 = b64.split(",", 1)[1] + log.debug(f"b64 after split: {b64[:100]!r}") b64_bytes = b64.encode("ascii") # Base-64 is pure ASCII try: raw = base64.b64decode(b64_bytes, validate=True) except binascii.Error as exc: # malformed Base-64 - msg = f"Could not identify file type of given bytes because input is not valid Base-64: {exc}" + msg = f"Could not identify file type of given bytes because input is not valid Base-64: {exc}\n{b64_bytes[:100]!r}" raise FileTypeException(msg) from exc return detect_file_type_from_bytes(buf=raw) From 7374f089d091a3b08ac6197851d7a993655ce58d Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 19:07:02 +0200 Subject: [PATCH 051/115] Cleanup and restore image features --- pipelex/cogt/image/prompt_image_factory.py | 3 - pipelex/core/stuffs/image_content.py | 78 ++++++++++--------- pipelex/core/stuffs/page_content.py | 11 +-- .../core/stuffs/text_and_images_content.py | 15 ++-- 4 files changed, 54 insertions(+), 53 deletions(-) diff --git a/pipelex/cogt/image/prompt_image_factory.py b/pipelex/cogt/image/prompt_image_factory.py index 6033fab55..1790087a1 100644 --- a/pipelex/cogt/image/prompt_image_factory.py +++ b/pipelex/cogt/image/prompt_image_factory.py @@ -1,4 +1,3 @@ -from pipelex import log from pipelex.cogt.exceptions import PromptImageFactoryError from pipelex.cogt.image.prompt_image import ( PromptImage, @@ -26,11 +25,9 @@ def make_prompt_image( base_64_str: str | None = None, ) -> PromptImage: if base_64: - log.debug(f"Base 64: {base_64[:100]!r}") return PromptImageBase64(base_64=base_64) elif base_64_str: stripped_base_64_str = strip_base_64_str_if_needed(base_64_str) - log.debug(f"Stripped base 64 str: {stripped_base_64_str[:100]}") return PromptImageBase64(base_64=stripped_base_64_str.encode()) elif file_path: return PromptImagePath(file_path=file_path) diff --git a/pipelex/core/stuffs/image_content.py b/pipelex/core/stuffs/image_content.py index 94f5872cb..2c08a98cd 100644 --- a/pipelex/core/stuffs/image_content.py +++ b/pipelex/core/stuffs/image_content.py @@ -9,8 +9,10 @@ from pipelex.cogt.exceptions import ImageContentError from pipelex.cogt.extract.extract_output import ExtractedImage from pipelex.core.stuffs.stuff_content import StuffContent -from pipelex.tools.misc.base_64_utils import prefixed_base64_str_from_base64_str -from pipelex.tools.misc.path_utils import interpret_path_or_url +from pipelex.tools.misc.base_64_utils import prefixed_base64_str_from_base64_str, save_base_64_str_to_binary_file +from pipelex.tools.misc.file_utils import ensure_directory_exists, get_incremental_file_path, save_text_to_path +from pipelex.tools.misc.filetype_utils import detect_file_type_from_base64 +from pipelex.tools.misc.path_utils import InterpretedPathOrUrl, interpret_path_or_url from pipelex.types import Self @@ -69,40 +71,40 @@ def make_from_image(cls, image: Image.Image) -> Self: base_64=base_64, ) - # def save_to_directory(self, directory: str, base_name: str | None = None, extension: str | None = None): - # ensure_directory_exists(directory) - # base_name = base_name or "img" - # if (base_64 := self.base_64) and not extension: - # match interpret_path_or_url(path_or_uri=self.url): - # case InterpretedPathOrUrl.FILE_NAME: - # parts = self.url.rsplit(".", 1) - # base_name = parts[0] - # extension = parts[1] - # case InterpretedPathOrUrl.FILE_PATH | InterpretedPathOrUrl.FILE_URI | InterpretedPathOrUrl.URL | InterpretedPathOrUrl.BASE_64: - # file_type = detect_file_type_from_base64(b64=base_64) - # base_name = base_name or "img" - # extension = file_type.extension - # file_path = get_incremental_file_path( - # base_path=directory, - # base_name=base_name, - # extension=extension, - # avoid_suffix_if_possible=True, - # ) - # save_base_64_str_to_binary_file(base_64_str=base_64, file_path=file_path) + def save_to_directory(self, directory: str, base_name: str | None = None, extension: str | None = None): + ensure_directory_exists(directory) + base_name = base_name or "img" + if (base_64 := self.base_64) and not extension: + match interpret_path_or_url(path_or_uri=self.url): + case InterpretedPathOrUrl.FILE_NAME: + parts = self.url.rsplit(".", 1) + base_name = parts[0] + extension = parts[1] + case InterpretedPathOrUrl.FILE_PATH | InterpretedPathOrUrl.FILE_URI | InterpretedPathOrUrl.URL | InterpretedPathOrUrl.BASE_64: + file_type = detect_file_type_from_base64(b64=base_64) + base_name = base_name or "img" + extension = file_type.extension + file_path = get_incremental_file_path( + base_path=directory, + base_name=base_name, + extension=extension, + avoid_suffix_if_possible=True, + ) + save_base_64_str_to_binary_file(base_64_str=base_64, file_path=file_path) - # if caption := self.caption: - # caption_file_path = get_incremental_file_path( - # base_path=directory, - # base_name=f"{base_name}_caption", - # extension="txt", - # avoid_suffix_if_possible=True, - # ) - # save_text_to_path(text=caption, path=caption_file_path) - # if source_prompt := self.source_prompt: - # source_prompt_file_path = get_incremental_file_path( - # base_path=directory, - # base_name=f"{base_name}_source_prompt", - # extension="txt", - # avoid_suffix_if_possible=True, - # ) - # save_text_to_path(text=source_prompt, path=source_prompt_file_path) + if caption := self.caption: + caption_file_path = get_incremental_file_path( + base_path=directory, + base_name=f"{base_name}_caption", + extension="txt", + avoid_suffix_if_possible=True, + ) + save_text_to_path(text=caption, path=caption_file_path) + if source_prompt := self.source_prompt: + source_prompt_file_path = get_incremental_file_path( + base_path=directory, + base_name=f"{base_name}_source_prompt", + extension="txt", + avoid_suffix_if_possible=True, + ) + save_text_to_path(text=source_prompt, path=source_prompt_file_path) diff --git a/pipelex/core/stuffs/page_content.py b/pipelex/core/stuffs/page_content.py index 4e32bc2d5..c8957007d 100644 --- a/pipelex/core/stuffs/page_content.py +++ b/pipelex/core/stuffs/page_content.py @@ -1,14 +1,15 @@ from pipelex.core.stuffs.image_content import ImageContent from pipelex.core.stuffs.structured_content import StructuredContent from pipelex.core.stuffs.text_and_images_content import TextAndImagesContent +from pipelex.tools.misc.file_utils import ensure_directory_exists class PageContent(StructuredContent): text_and_images: TextAndImagesContent page_view: ImageContent | None = None - # def save_to_directory(self, directory: str): - # ensure_directory_exists(directory) - # self.text_and_images.save_to_directory(directory=directory) - # if page_view := self.page_view: - # page_view.save_to_directory(directory=directory, base_name="page_view") + def save_to_directory(self, directory: str): + ensure_directory_exists(directory) + self.text_and_images.save_to_directory(directory=directory) + if page_view := self.page_view: + page_view.save_to_directory(directory=directory, base_name="page_view") diff --git a/pipelex/core/stuffs/text_and_images_content.py b/pipelex/core/stuffs/text_and_images_content.py index f1f93cb5b..03eab91a1 100644 --- a/pipelex/core/stuffs/text_and_images_content.py +++ b/pipelex/core/stuffs/text_and_images_content.py @@ -3,6 +3,7 @@ from pipelex.core.stuffs.image_content import ImageContent from pipelex.core.stuffs.stuff_content import StuffContent from pipelex.core.stuffs.text_content import TextContent +from pipelex.tools.misc.file_utils import ensure_directory_exists class TextAndImagesContent(StuffContent): @@ -32,10 +33,10 @@ def rendered_html(self) -> str: rendered = "" return rendered - # def save_to_directory(self, directory: str): - # ensure_directory_exists(directory) - # if text_content := self.text: - # text_content.save_to_directory(directory=directory) - # if images := self.images: - # for image_content in images: - # image_content.save_to_directory(directory=directory) + def save_to_directory(self, directory: str): + ensure_directory_exists(directory) + if text_content := self.text: + text_content.save_to_directory(directory=directory) + if images := self.images: + for image_content in images: + image_content.save_to_directory(directory=directory) From 82827a5a39d962ac77b2ca30e6b6b7e471609c7d Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 19:34:00 +0200 Subject: [PATCH 052/115] Formatting --- pipelex/cogt/exceptions.py | 4 +- pipelex/cogt/models/model_deck.py | 45 ++++------------- pipelex/pipelex.py | 48 +++++------------- .../plugins/anthropic/anthropic_llm_worker.py | 50 +++++-------------- 4 files changed, 36 insertions(+), 111 deletions(-) diff --git a/pipelex/cogt/exceptions.py b/pipelex/cogt/exceptions.py index 689cc1421..5549e594b 100644 --- a/pipelex/cogt/exceptions.py +++ b/pipelex/cogt/exceptions.py @@ -133,9 +133,7 @@ class ImgGenGeneratedTypeError(ImgGenGenerationError): class MissingDependencyError(CogtError): """Raised when a required dependency is not installed.""" - def __init__( - self, dependency_name: str, extra_name: str, message: str | None = None - ): + def __init__(self, dependency_name: str, extra_name: str, message: str | None = None): self.dependency_name = dependency_name self.extra_name = extra_name error_msg = f"Required dependency '{dependency_name}' is not installed." diff --git a/pipelex/cogt/models/model_deck.py b/pipelex/cogt/models/model_deck.py index 03656bf53..51d952b84 100644 --- a/pipelex/cogt/models/model_deck.py +++ b/pipelex/cogt/models/model_deck.py @@ -126,13 +126,9 @@ def get_img_gen_setting(self, img_gen_choice: ImgGenModelChoice) -> ImgGenSettin @classmethod def final_validate(cls, deck: Self): for llm_preset_id, llm_setting in deck.llm_presets.items(): - inference_model = deck.get_required_inference_model( - model_handle=llm_setting.model - ) + inference_model = deck.get_required_inference_model(model_handle=llm_setting.model) try: - cls._validate_llm_setting( - llm_setting=llm_setting, inference_model=inference_model - ) + cls._validate_llm_setting(llm_setting=llm_setting, inference_model=inference_model) except ConfigValidationError as exc: msg = f"LLM preset '{llm_preset_id}' is invalid: {exc}" raise ModelDeckValidatonError(msg) from exc @@ -142,22 +138,15 @@ def final_validate(cls, deck: Self): ############################################################ @classmethod - def _validate_llm_setting( - cls, llm_setting: LLMSetting, inference_model: InferenceModelSpec - ): - if inference_model.max_tokens is not None and ( - llm_setting_max_tokens := llm_setting.max_tokens - ): + def _validate_llm_setting(cls, llm_setting: LLMSetting, inference_model: InferenceModelSpec): + if inference_model.max_tokens is not None and (llm_setting_max_tokens := llm_setting.max_tokens): if llm_setting_max_tokens > inference_model.max_tokens: msg = ( f"LLM setting '{llm_setting.model}' has a max_tokens of {llm_setting_max_tokens}, " f"which is greater than the model's max_tokens of {inference_model.max_tokens}" ) raise LLMSettingsValidationError(msg) - if ( - ModelConstraints.TEMPERATURE_MUST_BE_1 in inference_model.constraints - and llm_setting.temperature != 1 - ): + if ModelConstraints.TEMPERATURE_MUST_BE_1 in inference_model.constraints and llm_setting.temperature != 1: msg = ( f"LLM setting '{llm_setting.model}' has a temperature of {llm_setting.temperature}, " f"which is not allowed by the model's constraints: it must be 1" @@ -166,9 +155,7 @@ def _validate_llm_setting( @field_validator("llm_choice_defaults", mode="after") @classmethod - def validate_llm_choice_defaults( - cls, llm_choice_defaults: LLMSettingChoices - ) -> LLMSettingChoices: + def validate_llm_choice_defaults(cls, llm_choice_defaults: LLMSettingChoices) -> LLMSettingChoices: if llm_choice_defaults.for_text is None: msg = "llm_choice_defaults.for_text cannot be None" raise ConfigValidationError(msg) @@ -179,9 +166,7 @@ def validate_llm_choice_defaults( @field_validator("llm_choice_overrides", mode="after") @classmethod - def validate_llm_choice_overrides( - cls, value: LLMSettingChoices - ) -> LLMSettingChoices: + def validate_llm_choice_overrides(cls, value: LLMSettingChoices) -> LLMSettingChoices: if value.for_text == LLM_PRESET_DISABLED: value.for_text = None if value.for_object == LLM_PRESET_DISABLED: @@ -204,9 +189,7 @@ def _validate_llm_choices(self, llm_choices: LLMSettingChoices): for llm_setting in llm_choices.list_choices(): self.check_llm_setting(llm_setting_or_preset_id=llm_setting) - def get_optional_inference_model( - self, model_handle: str - ) -> InferenceModelSpec | None: + def get_optional_inference_model(self, model_handle: str) -> InferenceModelSpec | None: if inference_model := self.inference_models.get(model_handle): return inference_model if redirection := self.aliases.get(model_handle): @@ -216,13 +199,9 @@ def get_optional_inference_model( else: alias_list = redirection for alias in alias_list: - if inference_model := self.get_optional_inference_model( - model_handle=alias - ): + if inference_model := self.get_optional_inference_model(model_handle=alias): return inference_model - log.warning( - f"Skipping model handle '{model_handle}' because it's not found in deck" - ) + log.warning(f"Skipping model handle '{model_handle}' because it's not found in deck") return None def is_handle_defined(self, model_handle: str) -> bool: @@ -234,7 +213,5 @@ def get_required_inference_model(self, model_handle: str) -> InferenceModelSpec: msg = f"Model handle '{model_handle}' not found in deck" raise ModelNotFoundError(msg) if model_handle not in self.inference_models: - log.dev( - f"Model handle '{model_handle}' is an alias which resolves to '{inference_model.name}'" - ) + log.dev(f"Model handle '{model_handle}' is an alias which resolves to '{inference_model.name}'") return inference_model diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index d3e728983..8ec30b83b 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -88,9 +88,7 @@ def __init__( try: self.pipelex_hub.setup_config(config_cls=config_cls or PipelexConfig) except ValidationError as validation_error: - validation_error_msg = report_validation_error( - category="config", validation_error=validation_error - ) + validation_error_msg = report_validation_error(category="config", validation_error=validation_error) msg = f"Could not setup config because of: {validation_error_msg}" raise PipelexConfigError(msg) from validation_error @@ -117,9 +115,7 @@ def __init__( self.reporting_delegate: ReportingProtocol if get_config().pipelex.feature_config.is_reporting_enabled: - self.reporting_delegate = reporting_delegate or ReportingManager( - reporting_config=get_config().pipelex.reporting_config - ) + self.reporting_delegate = reporting_delegate or ReportingManager(reporting_config=get_config().pipelex.reporting_config) else: self.reporting_delegate = ReportingNoOp() self.pipelex_hub.set_report_delegate(self.reporting_delegate) @@ -144,9 +140,7 @@ def __init__( if pipeline_tracker: self.pipeline_tracker = pipeline_tracker elif get_config().pipelex.feature_config.is_pipeline_tracking_enabled: - self.pipeline_tracker = PipelineTracker( - tracker_config=get_config().pipelex.tracker_config - ) + self.pipeline_tracker = PipelineTracker(tracker_config=get_config().pipelex.tracker_config) else: self.pipeline_tracker = PipelineTrackerNoOp() self.pipelex_hub.set_pipeline_tracker(pipeline_tracker=self.pipeline_tracker) @@ -170,9 +164,7 @@ def _get_config_not_found_error_msg(component_name: str) -> str: return f"Config files are missing for the {component_name}. Run `pipelex init config` to generate the missing files." @staticmethod - def _get_validation_error_msg( - component_name: str, validation_exc: Exception - ) -> str: + def _get_validation_error_msg(component_name: str, validation_exc: Exception) -> str: """Generate error message for invalid config files.""" msg = "" cause_exc = validation_exc.__cause__ @@ -217,14 +209,10 @@ def setup( msg = self._get_config_not_found_error_msg("model deck") raise PipelexSetupError(msg) from deck_not_found_exc except RoutingProfileValidationError as routing_validation_exc: - msg = self._get_validation_error_msg( - "routing profile library", routing_validation_exc - ) + msg = self._get_validation_error_msg("routing profile library", routing_validation_exc) raise PipelexSetupError(msg) from routing_validation_exc except InferenceBackendLibraryValidationError as backend_validation_exc: - msg = self._get_validation_error_msg( - "inference backend library", backend_validation_exc - ) + msg = self._get_validation_error_msg("inference backend library", backend_validation_exc) raise PipelexSetupError(msg) from backend_validation_exc except ModelDeckValidationError as deck_validation_exc: msg = self._get_validation_error_msg("model deck", deck_validation_exc) @@ -263,37 +251,27 @@ def setup( observer_provider = observer_provider or LocalObserver() self.pipelex_hub.set_observer_provider(observer_provider=observer_provider) - self.pipelex_hub.set_pipe_router( - pipe_router or PipeRouter(observer_provider=observer_provider) - ) + self.pipelex_hub.set_pipe_router(pipe_router or PipeRouter(observer_provider=observer_provider)) # pipeline self.pipeline_tracker.setup() self.pipeline_manager.setup() - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup done for {get_config().project_name}" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup done for {get_config().project_name}") def setup_libraries(self): self.library_manager.setup() self.library_manager.load_libraries() - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}") def validate_libraries(self): try: self.library_manager.validate_libraries() except ValidationError as validation_error: - validation_error_msg = report_validation_error( - category="plx", validation_error=validation_error - ) + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) msg = f"Could not validate libraries because of: {validation_error_msg}" raise PipelexSetupError(msg) from validation_error - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}") def teardown(self): # pipelex @@ -312,9 +290,7 @@ def teardown(self): self.class_registry.teardown() func_registry.teardown() - log.debug( - f"{PACKAGE_NAME} version {PACKAGE_VERSION} teardown done for {get_config().project_name} (except config & logs)" - ) + log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} teardown done for {get_config().project_name} (except config & logs)") self.pipelex_hub.reset_config() # Clear the singleton instance from metaclass if self.__class__ in MetaSingleton.instances: diff --git a/pipelex/plugins/anthropic/anthropic_llm_worker.py b/pipelex/plugins/anthropic/anthropic_llm_worker.py index a685922ea..760bda121 100644 --- a/pipelex/plugins/anthropic/anthropic_llm_worker.py +++ b/pipelex/plugins/anthropic/anthropic_llm_worker.py @@ -69,12 +69,8 @@ def __init__( # Verify if the sdk_instance is compatible with the current LLM platform if isinstance(sdk_instance, (AsyncAnthropic, AsyncAnthropicBedrock)): - if ( - inference_model.sdk == AnthropicSdkVariant.ANTHROPIC - and not (isinstance(sdk_instance, AsyncAnthropic)) - ) or ( - inference_model.sdk == AnthropicSdkVariant.BEDROCK_ANTHROPIC - and not (isinstance(sdk_instance, AsyncAnthropicBedrock)) + if (inference_model.sdk == AnthropicSdkVariant.ANTHROPIC and not (isinstance(sdk_instance, AsyncAnthropic))) or ( + inference_model.sdk == AnthropicSdkVariant.BEDROCK_ANTHROPIC and not (isinstance(sdk_instance, AsyncAnthropicBedrock)) ): msg = f"Provided sdk_instance does not match LLMEngine platform:{sdk_instance}" raise SdkTypeError(msg) @@ -85,29 +81,21 @@ def __init__( self.anthropic_async_client = sdk_instance if structure_method: instructor_mode = structure_method.as_instructor_mode() - log.debug( - f"Anthropic structure mode: {structure_method} --> {instructor_mode}" - ) - self.instructor_for_objects = instructor.from_anthropic( - client=sdk_instance, mode=instructor_mode - ) + log.debug(f"Anthropic structure mode: {structure_method} --> {instructor_mode}") + self.instructor_for_objects = instructor.from_anthropic(client=sdk_instance, mode=instructor_mode) else: self.instructor_for_objects = instructor.from_anthropic(client=sdk_instance) instructor_config = get_config().cogt.llm_config.instructor_config if instructor_config.is_dump_kwargs_enabled: - self.instructor_for_objects.on( - hook_name="completion:kwargs", handler=dump_kwargs - ) + self.instructor_for_objects.on(hook_name="completion:kwargs", handler=dump_kwargs) if instructor_config.is_dump_response_enabled: self.instructor_for_objects.on( hook_name="completion:response", handler=dump_response_from_structured_gen, ) if instructor_config.is_dump_error_enabled: - self.instructor_for_objects.on( - hook_name="completion:error", handler=dump_error - ) + self.instructor_for_objects.on(hook_name="completion:error", handler=dump_error) ######################################################### # Instance methods @@ -117,15 +105,9 @@ def __init__( def _adapt_max_tokens(self, max_tokens: int | None) -> int: max_tokens = max_tokens or self.default_max_tokens - if ( - claude_4_tokens_limit := self.extra_config.get( - AnthropicExtraField.CLAUDE_4_TOKENS_LIMIT - ) - ) and max_tokens > claude_4_tokens_limit: + if (claude_4_tokens_limit := self.extra_config.get(AnthropicExtraField.CLAUDE_4_TOKENS_LIMIT)) and max_tokens > claude_4_tokens_limit: max_tokens = claude_4_tokens_limit - log.warning( - f"Max tokens is greater than the claude 4 reduced tokens limit, reducing to {max_tokens}" - ) + log.warning(f"Max tokens is greater than the claude 4 reduced tokens limit, reducing to {max_tokens}") if not max_tokens: msg = f"Max tokens is None for model {self.inference_model.desc}" raise AnthropicWorkerConfigurationError(msg) @@ -158,12 +140,8 @@ async def _gen_text( raise LLMCompletionError(msg) full_reply_content = single_content_block.text - if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and ( - usage := response.usage - ): - llm_tokens_usage.nb_tokens_by_category = ( - AnthropicFactory.make_nb_tokens_by_category(usage=usage) - ) + if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and (usage := response.usage): + llm_tokens_usage.nb_tokens_by_category = AnthropicFactory.make_nb_tokens_by_category(usage=usage) return full_reply_content @@ -186,11 +164,7 @@ async def _gen_object( temperature=llm_job.job_params.temperature, max_tokens=max_tokens, ) - if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and ( - usage := completion.usage - ): - llm_tokens_usage.nb_tokens_by_category = ( - AnthropicFactory.make_nb_tokens_by_category(usage=usage) - ) + if (llm_tokens_usage := llm_job.job_report.llm_tokens_usage) and (usage := completion.usage): + llm_tokens_usage.nb_tokens_by_category = AnthropicFactory.make_nb_tokens_by_category(usage=usage) return result_object From c7d759d0230937426393932ee57d2019f004c246 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 21:16:27 +0200 Subject: [PATCH 053/115] kit migrations --- pipelex/cli/commands/kit_cmd.py | 41 +++++++++++++++---- pipelex/kit/cursor_export.py | 8 ++-- pipelex/kit/migrations_export.py | 30 ++++++++++++++ pipelex/kit/paths.py | 9 ++++ pipelex/kit/targets_update.py | 6 ++- .../pipelex/kit/test_cursor_export.py | 27 ++++++------ .../pipelex/kit/test_merged_rules.py | 13 ++---- .../pipelex/kit/test_targets_update.py | 13 ++---- 8 files changed, 102 insertions(+), 45 deletions(-) create mode 100644 pipelex/kit/migrations_export.py diff --git a/pipelex/cli/commands/kit_cmd.py b/pipelex/cli/commands/kit_cmd.py index 988bde29a..9c110ca45 100644 --- a/pipelex/cli/commands/kit_cmd.py +++ b/pipelex/cli/commands/kit_cmd.py @@ -8,14 +8,14 @@ from pipelex.exceptions import PipelexCLIError from pipelex.kit.cursor_export import export_cursor_rules from pipelex.kit.index_loader import load_index -from pipelex.kit.paths import get_agents_dir +from pipelex.kit.migrations_export import export_migration_instructions from pipelex.kit.targets_update import build_merged_rules, update_targets kit_app = typer.Typer(help="Manage kit assets: export Cursor rules and merge agent docs", no_args_is_help=True) -@kit_app.command("sync") -def sync( +@kit_app.command("rules") +def agent_rules( repo_root: Annotated[Path | None, typer.Option("--repo-root", dir_okay=True, writable=True, help="Repository root directory")] = None, cursor: Annotated[bool, typer.Option("--cursor/--no-cursor", help="Export Cursor rules to .cursor/rules")] = True, single_files: Annotated[bool, typer.Option("--single-files/--no-single-files", help="Update single-file agent documentation targets")] = True, @@ -34,16 +34,14 @@ def sync( repo_root = Path() idx = load_index() - agents_dir = get_agents_dir() if cursor: typer.echo("📤 Exporting Cursor rules...") - cursor_rules_dir = repo_root / ".cursor" / "rules" - export_cursor_rules(agents_dir, cursor_rules_dir, idx, dry_run=dry_run) + export_cursor_rules(repo_root, idx, dry_run=dry_run) if single_files: typer.echo("📝 Building merged agent documentation...") - merged_md = build_merged_rules(agents_dir, idx) + merged_md = build_merged_rules(idx) typer.echo("📝 Updating target files...") update_targets(repo_root, merged_md, idx.agent_rules.targets, dry_run=dry_run, diff=diff, backup=backup) @@ -53,5 +51,32 @@ def sync( typer.echo("✅ Kit sync completed successfully") except Exception as exc: - msg = f"Failed to sync kit assets: {exc}" + msg = f"Failed to sync kit assets for agent rules: {exc}" + raise PipelexCLIError(msg) from exc + + +@kit_app.command("migrations") +def migration_instructions( + repo_root: Annotated[Path | None, typer.Option("--repo-root", dir_okay=True, writable=True, help="Repository root directory")] = None, + dry_run: Annotated[bool, typer.Option("--dry-run", help="Show what would be done without making changes")] = False, +) -> None: + """Sync migration instructions from kit to .pipelex/migrations. + + This command copies migration documentation files from the pipelex.kit + package to the user's .pipelex/migrations directory. + """ + try: + if repo_root is None: + repo_root = Path() + + typer.echo("📄 Syncing migration instructions...") + export_migration_instructions(repo_root, dry_run=dry_run) + + if dry_run: + typer.echo("✅ Dry run completed - no changes made") + else: + typer.echo(f"✅ Migration instructions synced to {repo_root / '.pipelex' / 'migrations'}") + + except Exception as exc: + msg = f"Failed to sync migration instructions: {exc}" raise PipelexCLIError(msg) from exc diff --git a/pipelex/kit/cursor_export.py b/pipelex/kit/cursor_export.py index ec1c7d260..c0490d48f 100644 --- a/pipelex/kit/cursor_export.py +++ b/pipelex/kit/cursor_export.py @@ -9,6 +9,7 @@ import yaml from pipelex.kit.index_models import KitIndex +from pipelex.kit.paths import get_agents_dir def _iter_agent_files(agents_dir: Traversable) -> Iterable[tuple[str, str]]: @@ -42,15 +43,16 @@ def _front_matter_for(name: str, idx: KitIndex) -> dict[str, Any]: return base -def export_cursor_rules(agents_dir: Traversable, out_dir: Path, idx: KitIndex, dry_run: bool = False) -> None: +def export_cursor_rules(repo_root: Path, idx: KitIndex, dry_run: bool = False) -> None: """Export agent markdown files to Cursor .mdc files with YAML front-matter. Args: - agents_dir: Traversable pointing to agents directory - out_dir: Output directory for .mdc files + repo_root: Repository root directory idx: Kit index configuration dry_run: If True, only print what would be done """ + agents_dir = get_agents_dir() + out_dir = repo_root / ".cursor" / "rules" out_dir.mkdir(parents=True, exist_ok=True) for fname, body in _iter_agent_files(agents_dir): diff --git a/pipelex/kit/migrations_export.py b/pipelex/kit/migrations_export.py new file mode 100644 index 000000000..7c4af2aae --- /dev/null +++ b/pipelex/kit/migrations_export.py @@ -0,0 +1,30 @@ +"""Export migration documentation from kit to user's .pipelex directory.""" + +from pathlib import Path + +import typer + +from pipelex.kit.paths import get_migrations_dir + + +def export_migration_instructions(repo_root: Path, dry_run: bool = False) -> None: + """Export migration documentation files to user's .pipelex/migrations directory. + + Args: + repo_root: Repository root directory + dry_run: If True, only print what would be done + """ + migrations_dir = get_migrations_dir() + out_dir = repo_root / ".pipelex" / "migrations" + out_dir.mkdir(parents=True, exist_ok=True) + + for child in migrations_dir.iterdir(): + if child.name.endswith(".md") and child.is_file(): + content = child.read_text(encoding="utf-8") + out_path = out_dir / child.name + + if dry_run: + typer.echo(f"[DRY] write {out_path}") + else: + out_path.write_text(content, encoding="utf-8") + typer.echo(f"✅ Copied {child.name}") diff --git a/pipelex/kit/paths.py b/pipelex/kit/paths.py index 6fda709b0..87c0499a7 100644 --- a/pipelex/kit/paths.py +++ b/pipelex/kit/paths.py @@ -29,3 +29,12 @@ def get_configs_dir() -> Traversable: Traversable object pointing to pipelex.kit/configs """ return get_kit_root() / "configs" + + +def get_migrations_dir() -> Traversable: + """Get the migrations directory within the kit package. + + Returns: + Traversable object pointing to pipelex.kit/migrations + """ + return get_kit_root() / "migrations" diff --git a/pipelex/kit/targets_update.py b/pipelex/kit/targets_update.py index 05afddf25..7b1ed0b27 100644 --- a/pipelex/kit/targets_update.py +++ b/pipelex/kit/targets_update.py @@ -9,6 +9,7 @@ from pipelex.kit.index_models import KitIndex, Target from pipelex.kit.markers import find_span, replace_span, wrap +from pipelex.kit.paths import get_agents_dir def _read_agent_file(agents_dir: Traversable, name: str) -> str: @@ -48,17 +49,18 @@ def demote_match(match: re.Match[str]) -> str: return re.sub(pattern, demote_match, md_content, flags=re.MULTILINE) -def build_merged_rules(agents_dir: Traversable, idx: KitIndex, agent_set: str | None = None) -> str: +def build_merged_rules(idx: KitIndex, agent_set: str | None = None) -> str: """Build merged agent documentation from ordered files. Args: - agents_dir: Traversable pointing to agents directory idx: Kit index configuration agent_set: Name of the agent set to use (defaults to idx.agents.default_set) Returns: Merged markdown content with demoted headings """ + agents_dir = get_agents_dir() + if agent_set is None: agent_set = idx.agent_rules.default_set diff --git a/tests/integration/pipelex/kit/test_cursor_export.py b/tests/integration/pipelex/kit/test_cursor_export.py index 6cb380819..2a74c6243 100644 --- a/tests/integration/pipelex/kit/test_cursor_export.py +++ b/tests/integration/pipelex/kit/test_cursor_export.py @@ -2,7 +2,6 @@ from pipelex.kit.cursor_export import export_cursor_rules from pipelex.kit.index_loader import load_index -from pipelex.kit.paths import get_agents_dir class TestCursorExport: @@ -11,36 +10,36 @@ class TestCursorExport: def test_export_cursor_rules_dry_run(self, tmp_path: Path): """Test Cursor export in dry-run mode.""" idx = load_index() - agents_dir = get_agents_dir() - out_dir = tmp_path / "cursor_rules" + repo_root = tmp_path # Dry run should not create files - export_cursor_rules(agents_dir, out_dir, idx, dry_run=True) - assert not out_dir.exists() or len(list(out_dir.iterdir())) == 0 + export_cursor_rules(repo_root, idx, dry_run=True) + cursor_rules_dir = repo_root / ".cursor" / "rules" + assert not cursor_rules_dir.exists() or len(list(cursor_rules_dir.iterdir())) == 0 def test_export_cursor_rules_creates_mdc_files(self, tmp_path: Path): """Test that Cursor export creates .mdc files.""" idx = load_index() - agents_dir = get_agents_dir() - out_dir = tmp_path / "cursor_rules" + repo_root = tmp_path - export_cursor_rules(agents_dir, out_dir, idx, dry_run=False) + export_cursor_rules(repo_root, idx, dry_run=False) # Verify output directory exists and contains .mdc files - assert out_dir.exists() - mdc_files = list(out_dir.glob("*.mdc")) + cursor_rules_dir = repo_root / ".cursor" / "rules" + assert cursor_rules_dir.exists() + mdc_files = list(cursor_rules_dir.glob("*.mdc")) assert len(mdc_files) > 0, "Expected .mdc files to be created" def test_export_cursor_rules_have_front_matter(self, tmp_path: Path): """Test that exported .mdc files have YAML front-matter.""" idx = load_index() - agents_dir = get_agents_dir() - out_dir = tmp_path / "cursor_rules" + repo_root = tmp_path - export_cursor_rules(agents_dir, out_dir, idx, dry_run=False) + export_cursor_rules(repo_root, idx, dry_run=False) # Check first .mdc file for front-matter - mdc_files = list(out_dir.glob("*.mdc")) + cursor_rules_dir = repo_root / ".cursor" / "rules" + mdc_files = list(cursor_rules_dir.glob("*.mdc")) if mdc_files: content = mdc_files[0].read_text(encoding="utf-8") assert content.startswith("---\n"), "Expected YAML front-matter to start with ---" diff --git a/tests/integration/pipelex/kit/test_merged_rules.py b/tests/integration/pipelex/kit/test_merged_rules.py index de9aeba3e..fdad74a6b 100644 --- a/tests/integration/pipelex/kit/test_merged_rules.py +++ b/tests/integration/pipelex/kit/test_merged_rules.py @@ -3,7 +3,6 @@ import pytest from pipelex.kit.index_loader import load_index -from pipelex.kit.paths import get_agents_dir from pipelex.kit.targets_update import build_merged_rules @@ -13,9 +12,8 @@ class TestMergedRules: def test_build_merged_rules_default_set(self): """Test building merged rules with default set.""" idx = load_index() - agents_dir = get_agents_dir() - merged = build_merged_rules(agents_dir, idx) + merged = build_merged_rules(idx) assert merged is not None assert len(merged) > 0 assert merged.endswith("\n") @@ -23,27 +21,24 @@ def test_build_merged_rules_default_set(self): def test_build_merged_rules_specific_set(self): """Test building merged rules with specific set.""" idx = load_index() - agents_dir = get_agents_dir() # Test with 'all' set - merged = build_merged_rules(agents_dir, idx, agent_set="all") + merged = build_merged_rules(idx, agent_set="all") assert merged is not None assert len(merged) > 0 def test_build_merged_rules_invalid_set(self): """Test building merged rules with invalid set name.""" idx = load_index() - agents_dir = get_agents_dir() with pytest.raises(ValueError, match="Agent set 'nonexistent' not found"): - build_merged_rules(agents_dir, idx, agent_set="nonexistent") + build_merged_rules(idx, agent_set="nonexistent") def test_merged_rules_contain_demoted_headings(self): """Test that merged rules have demoted headings.""" idx = load_index() - agents_dir = get_agents_dir() - merged = build_merged_rules(agents_dir, idx) + merged = build_merged_rules(idx) # If demote is 1, check that we have ## headings (demoted from #) if idx.agent_rules.demote > 0: diff --git a/tests/integration/pipelex/kit/test_targets_update.py b/tests/integration/pipelex/kit/test_targets_update.py index 4b3dbec91..36a23917d 100644 --- a/tests/integration/pipelex/kit/test_targets_update.py +++ b/tests/integration/pipelex/kit/test_targets_update.py @@ -2,7 +2,6 @@ from pipelex.kit.index_loader import load_index from pipelex.kit.markers import find_span -from pipelex.kit.paths import get_agents_dir from pipelex.kit.targets_update import build_merged_rules, update_targets @@ -12,7 +11,6 @@ class TestTargetsUpdate: def test_update_targets_dry_run(self, tmp_path: Path): """Test updating targets in dry-run mode.""" idx = load_index() - agents_dir = get_agents_dir() # Create a temporary repo root with a target file repo_root = tmp_path / "repo" @@ -20,7 +18,7 @@ def test_update_targets_dry_run(self, tmp_path: Path): target_file = repo_root / "test_target.md" target_file.write_text("# Test\n\nOriginal content\n", encoding="utf-8") - merged_rules = build_merged_rules(agents_dir, idx) + merged_rules = build_merged_rules(idx) # Create a test target test_targets = {"test": idx.agent_rules.targets["agents"].model_copy(update={"path": "test_target.md"})} @@ -35,14 +33,13 @@ def test_update_targets_dry_run(self, tmp_path: Path): def test_update_targets_inserts_with_markers(self, tmp_path: Path): """Test that update_targets inserts content with markers.""" idx = load_index() - agents_dir = get_agents_dir() repo_root = tmp_path / "repo" repo_root.mkdir() target_file = repo_root / "test_target.md" target_file.write_text("# Test\n\nOriginal content\n", encoding="utf-8") - merged_rules = build_merged_rules(agents_dir, idx) + merged_rules = build_merged_rules(idx) test_targets = {"test": idx.agent_rules.targets["agents"].model_copy(update={"path": "test_target.md"})} @@ -62,7 +59,6 @@ def test_update_targets_inserts_with_markers(self, tmp_path: Path): def test_update_targets_replaces_existing_markers(self, tmp_path: Path): """Test that update_targets replaces content between existing markers.""" idx = load_index() - agents_dir = get_agents_dir() repo_root = tmp_path / "repo" repo_root.mkdir() @@ -74,7 +70,7 @@ def test_update_targets_replaces_existing_markers(self, tmp_path: Path): initial_content = f"# Test\n\n{marker_begin}\nOld content\n{marker_end}\n" target_file.write_text(initial_content, encoding="utf-8") - merged_rules = build_merged_rules(agents_dir, idx) + merged_rules = build_merged_rules(idx) test_targets = { "test": idx.agent_rules.targets["agents"].model_copy( @@ -100,7 +96,6 @@ def test_update_targets_replaces_existing_markers(self, tmp_path: Path): def test_update_targets_creates_backup(self, tmp_path: Path): """Test that update_targets creates backup files when requested.""" idx = load_index() - agents_dir = get_agents_dir() repo_root = tmp_path / "repo" repo_root.mkdir() @@ -108,7 +103,7 @@ def test_update_targets_creates_backup(self, tmp_path: Path): original_content = "# Test\n\nOriginal content\n" target_file.write_text(original_content, encoding="utf-8") - merged_rules = build_merged_rules(agents_dir, idx) + merged_rules = build_merged_rules(idx) test_targets = {"test": idx.agent_rules.targets["agents"].model_copy(update={"path": "test_target.md"})} From f64fda02f670635b2cb8321431bf6a9d87f09289 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 23:04:33 +0200 Subject: [PATCH 054/115] Remove untested code from migration guide --- .../inference/backends/pipelex_inference.toml | 7 + docs/pages/advanced-customization/index.md | 8 +- .../define_your_concepts.md | 8 +- .../kick-off-a-knowledge-pipeline-project.md | 74 ++- .../config-technical/library-config.md | 320 ++++++++--- docs/pages/installation/index.md | 38 +- docs/pages/quick-start/index.md | 13 +- docs/pages/tools/cli.md | 20 +- pipelex/cli/commands/init_cmd.py | 26 - pipelex/cogt/models/model_manager.py | 2 +- .../inference/backends/pipelex_inference.toml | 7 + .../kit/migrations/migrate_0.11.0_0.12.0.md | 501 +++++++++++++----- uv.lock | 6 +- 13 files changed, 730 insertions(+), 300 deletions(-) diff --git a/.pipelex/inference/backends/pipelex_inference.toml b/.pipelex/inference/backends/pipelex_inference.toml index d850714e8..eb82d8c71 100644 --- a/.pipelex/inference/backends/pipelex_inference.toml +++ b/.pipelex/inference/backends/pipelex_inference.toml @@ -101,6 +101,13 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 0.10, output = 0.40 } +["gemini-2.5-pro"] +model_id = "gemini-2.5-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 1.25, output = 10.0 } + ["gemini-2.5-flash"] model_id = "gemini/gemini-2.5-flash" inputs = ["text", "images"] diff --git a/docs/pages/advanced-customization/index.md b/docs/pages/advanced-customization/index.md index ceb53e5d6..af83f63f8 100644 --- a/docs/pages/advanced-customization/index.md +++ b/docs/pages/advanced-customization/index.md @@ -13,15 +13,11 @@ There are two main ways to inject custom implementations: ```python from pipelex import Pipelex -pipelex = Pipelex( - config_dir_path="./pipelex_libraries", +pipelex = Pipelex.make( template_provider=MyTemplateProvider(), pipeline_tracker=MyPipelineTracker(), activity_manager=MyActivityManager(), - reporting_delegate=MyReportingDelegate() -) - -pipelex.setup( + reporting_delegate=MyReportingDelegate(), secrets_provider=MySecretsProvider(), content_generator=MyContentGenerator(), pipe_router=MyPipeRouter() diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md index 73408ce72..bd231a903 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md @@ -53,7 +53,7 @@ Those concepts will be Text-based by default. If you want to use sutrctured outp Group concepts that naturally belong together in the same domain. A domain acts as a namespace for a set of related concepts and pipes, helping you organize and reuse your pipeline components. You can learn more about them in [Kick off a Knowledge Pipeline Project](kick-off-a-knowledge-pipeline-project.md#what-are-domains). ```plx -# pipelex_libraries/pipelines/finance.plx +# finance.plx domain = "finance" description = "Financial document processing" @@ -76,7 +76,7 @@ While text definitions help LLMs understand your concepts, Python models ensure For each concept that needs structured output, create a corresponding Python class: ```python -# pipelex_libraries/pipelines/finance.py +# finance.py from datetime import datetime from typing import List, Optional from pydantic import Field @@ -148,7 +148,7 @@ class Meeting(StructuredContent): The connection between PLX definitions and Python models happens automatically through naming: ```plx -# pipelex_libraries/pipelines/hr.plx +# hr.plx domain = "hr" [concept] @@ -159,7 +159,7 @@ Department = "An organizational unit within a company" # No Python model => tex ``` ```python -# pipelex_libraries/pipelines/hr.py +# hr.py from pipelex.core.stuffs.structured_content import StructuredContent from datetime import datetime from typing import List, Optional diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md index aad64e211..f5ddefddd 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md @@ -2,10 +2,10 @@ ## Creating Your First Pipeline -A pipeline in Pipelex is a collection of related concepts and pipes. Start by creating a PLX file in the `pipelines` directory: +A pipeline in Pipelex is a collection of related concepts and pipes. Start by creating a PLX file in your project: ```plx -# pipelex_libraries/pipelines/tutorial.plx +# tutorial.plx domain = "tutorial" description = "My first Pipelex library" @@ -99,8 +99,10 @@ A domain consists of: 2. **Organization** - One domain per topic/functionality. - - Match Python file names with domain names (`finance.plx` -> `finance.py`). + - Use `_struct.py` suffix for structure files (`finance.plx` -> `finance_struct.py`). - Keep related concepts within the same domain. + - Place your `.plx` files anywhere in your project - Pipelex automatically discovers them. + - Keep `.pipelex/` configuration directory at repository root. 3. **Documentation** - Always add a description to your domain. @@ -132,25 +134,63 @@ Consistent naming makes your pipeline code discoverable and maintainable: - For multi-word domains, use underscores: domain "customer_service" → `customer_service.plx` ### Python Model Files -- It is recommended to match the PLX filename exactly: `legal.plx` → `legal.py` -- But in any case, Pipelex will load models from all python modules in the `pipelines` directory or its subdirectories. +- It is recommended to name structure files with a `_struct.py` suffix: `legal.plx` → `legal_struct.py` +- Pipelex will automatically discover and load structure classes from all Python files in your project (excluding common directories like `.venv`, `.git`, etc.) ## Project Structure -Every Pipelex project follows a simple directory structure that keeps your knowledge pipelines organized and maintainable: +**Key principle:** Put `.plx` files where they belong in YOUR codebase. Pipelex automatically finds them. +### Recommended Patterns + +**Topic-Based (Best for organized codebases):** +``` +your-project/ +├── my_project/ # Your Python package +│ ├── finance/ +│ │ ├── models.py +│ │ ├── services.py +│ │ ├── invoices.plx # Pipeline with finance code +│ │ └── invoices_struct.py # Structure classes +│ └── legal/ +│ ├── models.py +│ ├── contracts.plx # Pipeline with legal code +│ └── contracts_struct.py +├── .pipelex/ # Config at repo root +│ ├── pipelex.toml +│ └── inference/ +└── requirements.txt +``` + +**Centralized (If you prefer grouping pipelines):** ``` your-project/ -├── pipelex_libraries/ # All your pipeline code lives here -│ ├── pipelines/ # Pipeline definitions and models -│ │ ├── __init__.py -│ │ ├── characters.plx # Domain definitions -│ │ └── characters.py # Python models for concepts -│ ├── templates/ # Reusable prompt templates -│ ├── llm_integrations/ # LLM provider configurations -│ └── llm_deck/ # LLM model presets -├── main.py # Your application code -└── requirements.txt # Python dependencies +├── my_project/ +│ ├── pipelines/ # All pipelines together +│ │ ├── finance.plx +│ │ ├── finance_struct.py +│ │ ├── legal.plx +│ │ └── legal_struct.py +│ └── core/ +│ └── (your code) +└── .pipelex/ ``` -The `pipelex_libraries/pipelines` directory is where Pipelex looks for your pipeline definitions. This standardized structure means you can share libraries between projects, version control them separately, and maintain clean separation between your pipeline logic and application code. +**Flat (Small projects):** +``` +your-project/ +├── my_project/ +│ ├── invoice_pipeline.plx +│ ├── invoice_struct.py +│ └── main.py +└── .pipelex/ +``` + +### Key Points + +- **Flexible placement**: `.plx` files work anywhere in your project +- **Automatic discovery**: Pipelex scans and finds them automatically +- **Configuration location**: `.pipelex/` stays at repository root +- **Naming convention**: Use `_struct.py` suffix for structure files +- **Excluded directories**: `.venv`, `.git`, `__pycache__`, `node_modules` are skipped +- **Best practice**: Keep related pipelines with their related code diff --git a/docs/pages/configuration/config-technical/library-config.md b/docs/pages/configuration/config-technical/library-config.md index 64aff7be5..3273cf5bf 100644 --- a/docs/pages/configuration/config-technical/library-config.md +++ b/docs/pages/configuration/config-technical/library-config.md @@ -1,107 +1,297 @@ -# Library Configuration +# Pipeline Discovery and Loading -The Library Configuration manages how Pipelex organizes, loads, and handles libraries in your project. Libraries in Pipelex include pipelines and templates. +Pipelex automatically discovers and loads pipeline files (`.plx`) and structure classes from your project. This page explains how the discovery system works and how to organize your pipelines effectively. -## Directory Structure +## How Pipeline Discovery Works -The library system uses two main root directories: +When you initialize Pipelex with `Pipelex.make()`, the system: -- Internal library root (`pipelex/libraries`): Contains the base libraries shipped with Pipelex -- Exported library root (`pipelex_libraries`): Contains your project's libraries, including copies of base libraries +1. **Scans your project directory** for all `.plx` files +2. **Discovers Python structure classes** that inherit from `StructuredContent` +3. **Loads pipeline definitions** including domains, concepts, and pipes +4. **Registers custom functions** decorated with `@pipe_func()` -### Standard Paths +All of this happens automatically - no configuration needed. + +## Excluded Directories + +To improve performance and avoid loading unnecessary files, Pipelex automatically excludes common directories from discovery: + +- `.venv` - Virtual environments +- `.git` - Git repository data +- `__pycache__` - Python bytecode cache +- `.pytest_cache` - Pytest cache +- `.mypy_cache` - Mypy type checker cache +- `.ruff_cache` - Ruff linter cache +- `node_modules` - Node.js dependencies +- `.env` - Environment files +- `results` - Common output directory + +Files in these directories will not be scanned, even if they contain `.plx` files or structure classes. + +## Project Organization + +**Golden rule:** Put `.plx` files where they make sense in YOUR project. Pipelex finds them automatically. + +### Common Patterns + +**1. Topic-Based (Recommended for structured projects)** + +Keep pipelines with related code: ``` -pipelex_libraries/ # Exported library root -├── pipelines/ # Pipeline definitions -│ ├── base_library/ # Base pipelines from Pipelex -│ └── your_pipelines/ # Your custom pipelines -└── templates/ # Template files +your_project/ +├── my_project/ +│ ├── finance/ +│ │ ├── models.py +│ │ ├── services.py +│ │ ├── invoices.plx # With finance code +│ │ └── invoices_struct.py +│ └── legal/ +│ ├── models.py +│ ├── contracts.plx # With legal code +│ └── contracts_struct.py +├── .pipelex/ +└── requirements.txt ``` -## Library Loading Process +**Benefits:** +- Related things stay together +- Easy to find pipeline for a specific module +- Natural code organization -1. **Domain Loading**: +**2. Centralized (For simpler discovery)** - - Loads domain definitions first - - Each domain must be defined exactly once - - Supports system prompts and structure templates +Group all pipelines in one place: -2. **Concept Loading**: +``` +your_project/ +├── my_project/ +│ ├── pipelines/ # All pipelines here +│ │ ├── finance.plx +│ │ ├── finance_struct.py +│ │ ├── legal.plx +│ │ └── legal_struct.py +│ └── core/ +└── .pipelex/ +``` - - Loads native concepts first - - Loads custom concepts from PLX files - - Validates concept definitions and relationships +**Benefits:** +- All pipelines in one location +- Simple structure for small projects -3. **Pipe Loading**: +## Alternative Structures - - Loads pipe definitions after concepts - - Validates pipe configurations - - Links pipes with their respective domains +Pipelex supports flexible organization. Here are other valid approaches: -### Library Initialization +### Feature-Based Organization + +``` +your_project/ +├── my_project/ +│ ├── features/ +│ │ ├── document_processing/ +│ │ │ ├── extract.plx +│ │ │ └── extract_struct.py +│ │ └── image_generation/ +│ │ ├── generate.plx +│ │ └── generate_struct.py +│ └── main.py +└── .pipelex/ +``` + +### Domain-Driven Organization -Use the CLI command to initialize libraries: -```bash -pipelex init libraries +``` +your_project/ +├── my_project/ +│ ├── finance/ +│ │ ├── pipelines/ +│ │ │ └── invoices.plx +│ │ └── invoice_struct.py +│ ├── legal/ +│ │ ├── pipelines/ +│ │ │ └── contracts.plx +│ │ └── contract_struct.py +│ └── main.py +└── .pipelex/ ``` -This will: +### Flat Organization (Small Projects) -1. Create the necessary directory structure -2. Copy base libraries to your project -3. Set up initial configuration files +``` +your_project/ +├── my_project/ +│ ├── invoice_processing.plx +│ ├── invoice_struct.py +│ └── main.py +└── .pipelex/ +``` -### Library Export Options +## Loading Process -When exporting libraries to your project: +Pipelex loads your pipelines in a specific order to ensure dependencies are resolved correctly: -- Use `overwrite=True` to force update existing files -- Maintain directory structure and initialization files +### 1. Domain Loading -## Validation +- Loads domain definitions from all `.plx` files +- Each domain must be defined exactly once +- Supports system prompts and structure templates per domain + +### 2. Concept Loading + +- Loads native concepts (Text, Image, PDF, etc.) +- Loads custom concepts from `.plx` files +- Validates concept definitions and relationships +- Links concepts to Python structure classes by name + +### 3. Structure Class Registration + +- Discovers all classes inheriting from `StructuredContent` +- Registers them in the class registry +- Makes them available for structured output generation + +### 4. Pipe Loading + +- Loads pipe definitions from `.plx` files +- Validates pipe configurations +- Links pipes with their respective domains +- Resolves input/output concept references + +### 5. Function Registration + +- Discovers functions decorated with `@pipe_func()` +- Registers them in the function registry +- Makes them available for `PipeFunc` operators -The library manager performs several validation steps: +## Custom Function Registration -1. **Concept Library Validation**: +For custom functions used in `PipeFunc` operators, add the `@pipe_func()` decorator: - - Checks concept relationships - - Validates concept definitions +```python +from pipelex.tools.func_registry import pipe_func +from pipelex.core.memory.working_memory import WorkingMemory +from pipelex.core.stuffs.text_content import TextContent -2. **Pipe Library Validation**: +@pipe_func() +async def my_custom_function(working_memory: WorkingMemory) -> TextContent: + """ + This function is automatically discovered and registered. + """ + input_data = working_memory.get_stuff("input_name") + # Process data + return TextContent(text=f"Processed: {input_data.content.text}") - - Verifies pipe configurations - - Checks domain relationships +# Optional: specify a custom name +@pipe_func(name="custom_processor") +async def another_function(working_memory: WorkingMemory) -> TextContent: + # Implementation + pass +``` -3. **Domain Library Validation**: +## Validation - - Ensures domain completeness - - Validates domain relationships +After making changes to your pipelines, validate them: -## Error Handling +```bash +# Validate all pipelines +pipelex validate all -The library system includes specific error types: +# Validate a specific pipe +pipelex validate pipe YOUR_PIPE_CODE -- `LibraryError`: Base error for library issues -- `LibraryParsingError`: For PLX parsing issues -- `ConceptLibraryError`: For concept-related issues -- `PipeLibraryError`: For pipe-related issues +# Show all available pipes +pipelex show pipes + +# Show details of a specific pipe +pipelex show pipe YOUR_PIPE_CODE +``` ## Best Practices -1. **Organization**: +### 1. Organization + +- Keep related concepts and pipes in the same `.plx` file +- Use meaningful domain names that reflect functionality +- Match Python file names with PLX file names (`finance.plx` → `finance.py`) +- Group complex pipelines using subdirectories + +### 2. Structure Classes + +- Only create Python classes when you need structured output +- Name classes to match concept names exactly +- Use `_struct.py` suffix for files containing structure classes (e.g., `finance_struct.py`) +- Inherit from `StructuredContent` or its subclasses +- Place structure class files near their corresponding `.plx` files + +### 3. Custom Functions + +- Always use the `@pipe_func()` decorator +- Use descriptive function names +- Document function parameters and return types +- Keep functions focused and testable + +### 4. Validation + +- Run `pipelex validate all` after making changes +- Check for domain consistency +- Verify concept relationships +- Test pipes individually before composing them + +## Troubleshooting + +### Pipelines Not Found + +**Problem:** Pipelex doesn't find your `.plx` files. + +**Solutions:** + +1. Ensure files have the `.plx` extension +2. Check that files are not in excluded directories +3. Verify file permissions allow reading +4. Run `pipelex show pipes` to see what was discovered + +### Structure Classes Not Registered + +**Problem:** Your Python classes aren't recognized. + +**Solutions:** + +1. Ensure classes inherit from `StructuredContent` +2. Check class names match concept names exactly +3. Verify files are not in excluded directories +4. Make sure class definitions are valid Python + +### Custom Functions Not Found + +**Problem:** `PipeFunc` can't find your function. + +**Solutions:** + +1. Add the `@pipe_func()` decorator +2. Ensure function signature matches requirements +3. Check function is `async` and accepts `working_memory` +4. Verify function is in a discoverable location + +### Validation Errors + +**Problem:** `pipelex validate all` shows errors. + +**Solutions:** - - Keep related concepts and pipes in the same PLX file - - Use meaningful domain names - - Structure complex libraries using subdirectories +1. Read error messages carefully - they indicate the problem +2. Check concept references are spelled correctly +3. Verify pipe configurations match expected format +4. Ensure all required fields are present -2. **Validation**: +## Migration from Old System - - Run `pipelex validate all` after making changes - - Check for domain consistency - - Verify concept relationships +If you're migrating from the old `pipelex_libraries` system, see the [Migration Guide](https://github.com/Pipelex/pipelex/blob/main/no_more_pipelex_libraries.md) for detailed instructions. -3. **Customization**: +Key changes: - - Keep custom pipelines separate from base library - - Document domain-specific configurations +- No `pipelex init libraries` command needed +- No `pipelex_libraries` directory required +- No `-c/--config-folder-path` flags needed +- Structure classes are auto-discovered +- Custom functions need `@pipe_func()` decorator diff --git a/docs/pages/installation/index.md b/docs/pages/installation/index.md index 906ae2301..f47f0e9d5 100644 --- a/docs/pages/installation/index.md +++ b/docs/pages/installation/index.md @@ -36,29 +36,37 @@ OPENAI_API_KEY=sk_... All the secret keys used by `pipelex` are specified in the `.env.example` file. However, by default, only the `OPENAI_API_KEY` is required. -- **Make sure you run the init commands:** +- **Initialize configuration:** -In order to set the pipelex configuration files, you need to run 2 commands using the CLI (we recommend to run it at the root of your project): +To set up the Pipelex configuration files, run this command at the root of your project: -- `pipelex init libraries`: This will create a `pipelex_libraries` folder, with the base llm configuration and the base pipelines. -This is the directory where you should add your pipelines. +- `pipelex init config`: This CLI command will create a `.pipelex/` directory with configuration files including `pipelex.toml`. This configuration file contains settings for feature flags, logging, cost reporting, and more. Learn more in our [Configuration documentation](../configuration/index.md) -The structure is like this: +- **Create your pipelines:** + +You can now create `.plx` pipeline files **anywhere** in your project. Pipelex automatically discovers them (excluding `.venv`, `.git`, `node_modules`, etc.). + +**Keep pipelines with related code** - that's usually the best organization: ```bash -├── pipelex_libraries -│ ├── __init__.py -│ ├── pipelines/ # The pipelines and the structured output are stored here -│ │ ├── __init__.py -│ │ └── base_library/ # The base library with basic pipelines -│ ├── templates/ # Those are template prompt libraries -│ ├── llm_deck/ # A llm deck is a simple way to name a llm and its configuration. -│ └── llm_integrations/ # This directory regroups the configuration of the different models +your_project/ +├── my_project/ # Your Python package +│ ├── finance/ +│ │ ├── services.py +│ │ ├── invoices.plx # Pipeline with finance code +│ │ └── invoices_struct.py # Structure classes +│ └── legal/ +│ ├── services.py +│ ├── contracts.plx # Pipeline with legal code +│ └── contracts_struct.py +├── .pipelex/ # Config at repo root (created by init config) +│ └── pipelex.toml +└── requirements.txt ``` -Learn more about pipelex_libraries in our [Libraries documentation](../build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md) +Or centralize if you prefer: `my_project/pipelines/*.plx` -- `pipelex init config`: This cli command will create a `pipelex.toml` file at the root of the project, with basic configuration. This configuration file gathers all configuration for feature flags, logging, cost reporting, and so on... Learn more in our [Configuration documentation](../configuration/index.md) +Learn more about flexible organization in our [Project Structure documentation](../build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md) 💡 _Any troubles? Have a look at our [Cookbook](https://github.com/Pipelex/pipelex-cookbook)! diff --git a/docs/pages/quick-start/index.md b/docs/pages/quick-start/index.md index 062189cec..01a943cce 100644 --- a/docs/pages/quick-start/index.md +++ b/docs/pages/quick-start/index.md @@ -72,8 +72,7 @@ For illustration purposes, let's build **a character generator**. Each example r ### Write your first pipeline -First, create a `.plx` library file in the `pipelex_libraries/pipelines` directory to store your pipe definition. -Run `pipelex init libraries` to create this directory if it doesn't exist. For now, keep all your pipeline definitions inside that folder only. +Create a `.plx` file to store your pipe definition. You can place it anywhere in your project - we recommend creating a `pipelines` directory for organization. `character.plx` ```plx @@ -162,9 +161,9 @@ Let's say that we no longer want plain text as output but a rigorously structure ### Define the model -Using the [Pydantic BaseModel](https://docs.pydantic.dev/latest/) syntax, define your object structure as a Python class, in the `pipelex_libraries/pipelines` directory: +Using the [Pydantic BaseModel](https://docs.pydantic.dev/latest/) syntax, define your object structure as a Python class in your project: -`pipelex_libraries/pipelines/characters.py` +`characters.py` ```python from pipelex.core.stuffs.structured_content import StructuredContent @@ -183,7 +182,7 @@ It's time to specify that your output be a `Character` instance. Use the `output 💡 Here, the concept name matches the class name (ie. `Character`), the `Character` class will automatically be considered as the structure to output. -`pipelex_libraries/pipelines/characters.plx` +`characters.plx` ```plx domain = "characters" @@ -220,7 +219,7 @@ We want to extract structured information from the description field. Thus we ha ### Define the output structure ```python -# pipelex_libraries/pipelines/character_model.py +# character_model.py from pipelex.core.stuffs.structured_content import StructuredContent # input class @@ -275,7 +274,7 @@ from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline -from pipelex.libraries.pipelines.screenplay import Character, CharacterMetadata +from character_model import Character, CharacterMetadata async def process_existing_character(): diff --git a/docs/pages/tools/cli.md b/docs/pages/tools/cli.md index 15411884e..557131cfe 100644 --- a/docs/pages/tools/cli.md +++ b/docs/pages/tools/cli.md @@ -6,10 +6,9 @@ The Pipelex CLI provides a command-line interface for managing and interacting w ### Init group -Initialize project assets. +Initialize project configuration. ```bash -pipelex init libraries [DIRECTORY] [--overwrite/-o] pipelex init config [--reset/-r] ``` @@ -18,8 +17,8 @@ pipelex init config [--reset/-r] Validate configuration and pipelines. ```bash -pipelex validate all [-c/--config-folder-path PATH] -pipelex validate pipe PIPE_CODE [-c/--config-folder-path PATH] +pipelex validate all +pipelex validate pipe PIPE_CODE ``` ### Show group @@ -28,15 +27,16 @@ Inspect configuration and pipes. ```bash pipelex show config -pipelex show pipes [-c/--config-folder-path PATH] -pipelex show pipe PIPE_CODE [-c/--config-folder-path PATH] +pipelex show pipes +pipelex show pipe PIPE_CODE ``` ## Usage Tips 1. Always run `pipelex validate all` after making changes to your configuration or pipelines 2. Use `pipelex show config` to debug configuration issues -3. When initializing a new project: - - Start with `pipelex init config` - - Then run `pipelex init libraries` - - Finally, validate your setup with `pipelex validate all` +3. Use `pipelex show pipes` to see all discovered pipelines +4. When initializing a new project: + - Run `pipelex init config` to create configuration files + - Create your `.plx` pipeline files anywhere in your project + - Validate your setup with `pipelex validate all` diff --git a/pipelex/cli/commands/init_cmd.py b/pipelex/cli/commands/init_cmd.py index 97813ff47..97b40af04 100644 --- a/pipelex/cli/commands/init_cmd.py +++ b/pipelex/cli/commands/init_cmd.py @@ -13,24 +13,6 @@ PACKAGE_VERSION = metadata(PACKAGE_NAME)["Version"] -def do_init_libraries(directory: str = ".", overwrite: bool = False) -> None: - # try: - # target_dir = os.path.join(directory, "pipelex_libraries") - # os.makedirs(directory, exist_ok=True) - - # library_config = LibraryConfig(config_dir_path=target_dir) - # library_config.export_libraries(overwrite=overwrite) - - # if overwrite: - # typer.echo(f"✅ Successfully initialized pipelex libraries at '{target_dir}' (all files overwritten)") - # else: - # typer.echo(f"✅ Successfully initialized pipelex libraries at '{target_dir}' (only created non-existing files)") - # except Exception as exc: - # msg = f"Failed to initialize libraries at '{directory}': {exc}" - # raise PipelexCLIError(msg) from exc - pass - - def do_init_config(reset: bool = False) -> None: """Initialize pipelex configuration in the current directory.""" config_template_dir = str(get_configs_dir()) @@ -83,14 +65,6 @@ def copy_directory_structure(src_dir: str, dst_dir: str, relative_path: str = "" init_app = typer.Typer(help="Initialization commands", no_args_is_help=True) -@init_app.command("libraries") -def init_libraries_cmd( - directory: Annotated[str, typer.Argument(help="Directory where to create the pipelex_libraries folder")] = ".", - overwrite: Annotated[bool, typer.Option("--overwrite", "-o", help="Warning: If set, existing files will be overwritten.")] = False, -) -> None: - do_init_libraries(directory=directory, overwrite=overwrite) - - @init_app.command("config") def init_config_cmd( reset: Annotated[bool, typer.Option("--reset", "-r", help="Warning: If set, existing files will be overwritten.")] = False, diff --git a/pipelex/cogt/models/model_manager.py b/pipelex/cogt/models/model_manager.py index 1d14f3dbe..a8f4cff27 100644 --- a/pipelex/cogt/models/model_manager.py +++ b/pipelex/cogt/models/model_manager.py @@ -47,7 +47,7 @@ def load_deck_blueprint(cls) -> ModelDeckBlueprint: deck_paths = get_config().cogt.inference_config.get_model_deck_paths() full_deck_dict: dict[str, Any] = {} if not deck_paths: - msg = "No Model deck paths found. Please run `pipelex init-libraries` to create the set up the base deck." + msg = "No Model deck paths found. Please run `pipelex init config` to create the set up the base deck." raise ModelDeckNotFoundError(msg) for deck_path in deck_paths: diff --git a/pipelex/kit/configs/inference/backends/pipelex_inference.toml b/pipelex/kit/configs/inference/backends/pipelex_inference.toml index d850714e8..eb82d8c71 100644 --- a/pipelex/kit/configs/inference/backends/pipelex_inference.toml +++ b/pipelex/kit/configs/inference/backends/pipelex_inference.toml @@ -101,6 +101,13 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 0.10, output = 0.40 } +["gemini-2.5-pro"] +model_id = "gemini-2.5-pro" +inputs = ["text", "images"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 1.25, output = 10.0 } + ["gemini-2.5-flash"] model_id = "gemini/gemini-2.5-flash" inputs = ["text", "images"] diff --git a/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md b/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md index 3cd1e392b..1cc0bd489 100644 --- a/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md +++ b/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md @@ -5,12 +5,18 @@ This guide will help you migrate your Pipelex pipelines and configurations to th ## Overview This release introduces several breaking changes to make the Pipelex language more declarative, intuitive, and consistent. The changes affect: +- Project structure and organization - Pipeline definitions (.plx files) - Configuration files (.pipelex/ directory) +- Python code initialization - Test markers ## Migration Checklist +- [ ] **Migrate from pipelex_libraries system (CRITICAL)** +- [ ] Move .plx files to appropriate locations in your project +- [ ] Update Pipelex.make() calls (remove config path parameters) +- [ ] Add @pipe_func() decorators to custom functions used in PipeFunc operators - [ ] Update PipeCompose (formerly PipeJinja2) - [ ] Update PipeExtract (formerly PipeOCR) - [ ] Update PipeLLM prompts and fields @@ -20,7 +26,246 @@ This release introduces several breaking changes to make the Pipelex language mo - [ ] Update test markers - [ ] Run validation -## 1. General Changes +## 1. Library System Removal (CRITICAL) + +The centralized `pipelex_libraries` folder system has been removed in favor of automatic pipeline discovery throughout your project. + +### Key Changes + +1. **No more `pipelex init libraries` command** +2. **No centralized `pipelex_libraries` directory required** +3. **Pipelines are auto-discovered** from anywhere in your project +4. **No config path parameters** needed in commands or code +5. **Custom functions require `@pipe_func()` decorator** +6. **Structure classes are auto-discovered** + +### Step 1: Move Pipeline Files (Flexible Organization) + +**The key change:** `.plx` files can now live ANYWHERE in your project. No special directory required! + +**Recommendation:** Put `.plx` files with related code. If you have topic-based organization, keep pipelines with their topics. + +**Example Migration Patterns:** + +**Pattern A: Topic-Based (Recommended if you have domain modules)** +``` +Before: +pipelex_libraries/pipelines/ +├── finance.plx +├── finance.py +├── legal.plx +└── legal.py + +After - Keep with related code: +my_project/ +├── finance/ +│ ├── models.py +│ ├── services.py +│ ├── invoices.plx # Pipeline with finance code +│ └── invoices_struct.py # Structure classes +└── legal/ + ├── models.py + ├── services.py + ├── contracts.plx # Pipeline with legal code + └── contracts_struct.py +``` + +**Pattern B: Centralized Pipelines (If you prefer grouping)** +``` +After - Group pipelines together: +my_project/ +├── pipelines/ +│ ├── finance.plx +│ ├── finance_struct.py +│ ├── legal.plx +│ └── legal_struct.py +└── core/ + └── (your other code) +``` + +**Pattern C: Flat (Small projects)** +``` +After - Just put them in your source directory: +my_project/ +├── finance_pipeline.plx +├── finance_struct.py +└── main.py +``` + +**Action Items:** + +1. **Choose your organization** (any of the above patterns work) + +2. **Move .plx files** to where they make sense for YOUR project: + ```bash + # Example: Moving to topic-based structure + mv pipelex_libraries/pipelines/finance.plx my_project/finance/ + ``` + +3. **Rename structure files** with `_struct.py` suffix: + ```bash + # Example + mv my_project/finance/finance.py my_project/finance/finance_struct.py + ``` + +4. **Clean up:** + ```bash + # After all files are moved + rm -rf pipelex_libraries/ + + # Remove from .gitignore if present + sed -i '/^\/pipelex_libraries$/d' .gitignore + ``` + +**Remember:** Configuration (`.pipelex/`) stays at repository root. + +### Step 2: Update Python Code Initialization + +**Before:** +```python +from pipelex.pipelex import Pipelex + +pipelex_instance = Pipelex.make( + relative_config_folder_path="../../../pipelex/libraries", + from_file=True +) +``` + +**After:** +```python +from pipelex.pipelex import Pipelex + +# No path needed - automatic discovery +pipelex_instance = Pipelex.make() +``` + +**Find and replace in all Python files:** +- Remove `relative_config_folder_path` parameter +- Remove `config_folder_path` parameter +- Remove `from_file` parameter + +### Step 3: Update Custom Functions + +All custom functions used in `PipeFunc` operators must now have the `@pipe_func()` decorator for auto-discovery. + +**Before:** +```python +from pipelex.core.memory.working_memory import WorkingMemory +from pipelex.core.stuffs.text_content import TextContent + +async def my_custom_function(working_memory: WorkingMemory) -> TextContent: + input_data = working_memory.get_stuff("input_name") + return TextContent(text=f"Processed: {input_data.content.text}") +``` + +**After:** +```python +from pipelex.tools.func_registry import pipe_func +from pipelex.core.memory.working_memory import WorkingMemory +from pipelex.core.stuffs.text_content import TextContent + +@pipe_func() # Add this decorator +async def my_custom_function(working_memory: WorkingMemory) -> TextContent: + input_data = working_memory.get_stuff("input_name") + return TextContent(text=f"Processed: {input_data.content.text}") + +# Optional: specify a custom name +@pipe_func(name="custom_processor") +async def another_function(working_memory: WorkingMemory) -> TextContent: + # Implementation + pass +``` + +### Step 4: Update CLI Commands + +**Before:** +```bash +# You had to specify config folder path +pipelex validate all -c path/to/pipelex/libraries +pipelex build blueprint "..." -c your/path/to/pipelex/libraries +``` + +**After:** +```bash +# No config path needed - automatic discovery +pipelex validate all +pipelex build blueprint "..." +``` + +### Step 5: Update Imports in Python Code + +Update imports from the old library structure: + +**Before:** +```python +from pipelex.libraries.pipelines.finance import Invoice, InvoiceData +``` + +**After:** +```python +# Import from your own project structure +from my_project.pipelines.finance_struct import Invoice, InvoiceData +``` + +### Step 6: Update Concept References (Optional) + +While domain-prefixed concept references still work, you can now use simpler references: + +**Before:** +```plx +inputs = { prompt = "images.ImgGenPrompt" } +inputs = { wedding_photo = "images.Photo" } +``` + +**After:** +```plx +# Simpler references (domain prefix optional) +inputs = { prompt = "ImgGenPrompt" } +inputs = { wedding_photo = "Photo" } +``` + +### Auto-Discovery Explained + +**The big change:** Pipelex now scans your entire project and finds: + +- **`.plx` files** - Pipeline definitions (wherever they are!) +- **Structure classes** - Classes inheriting from `StructuredContent` +- **Custom functions** - Functions decorated with `@pipe_func()` + +**This means:** +- No special `pipelex_libraries/pipelines/` folder needed +- Put `.plx` files where they logically belong in YOUR codebase +- Keep related things together (pipelines with their code) + +**Excluded directories** (automatically skipped): +- `.venv`, `.git`, `__pycache__` +- `.pytest_cache`, `.mypy_cache`, `.ruff_cache` +- `node_modules`, `.env`, `results` + +### Troubleshooting + +**Issue: Pipelines not found** + +Solution: Ensure `.plx` files are not in excluded directories and run: +```bash +pipelex show pipes # See what was discovered +``` + +**Issue: Structure classes not registered** + +Solution: +1. Ensure classes inherit from `StructuredContent` +2. Check class names match concept names exactly +3. Use `_struct.py` suffix for structure files + +**Issue: Custom functions not found** + +Solution: +1. Add `@pipe_func()` decorator +2. Ensure function is `async` and accepts `working_memory` +3. Verify function is in a discoverable location + +## 2. General Changes ### Rename `definition` to `description` @@ -43,7 +288,7 @@ type = "PipeLLM" description = "Process data" ``` -## 2. PipeCompose (formerly PipeJinja2) +## 3. PipeCompose (formerly PipeJinja2) ### Rename pipe type @@ -105,7 +350,7 @@ category = "html" templating_style = { tag_style = "square_brackets", text_format = "html" } ``` -## 3. PipeExtract (formerly PipeOCR) +## 4. PipeExtract (formerly PipeOCR) ### Rename pipe type @@ -151,7 +396,7 @@ If you're using these functions in Python code: **Find:** `ocr_page_contents_and_views_from_pdf` **Replace with:** `extract_page_contents_and_views_from_pdf` -## 4. PipeLLM Changes +## 5. PipeLLM Changes ### Rename prompt field @@ -229,7 +474,7 @@ Extract person information from this text: """ ``` -## 5. PipeImgGen Changes +## 6. PipeImgGen Changes ### Rename model field @@ -269,7 +514,7 @@ Or use a preset: model = "img_gen_preset_name" ``` -## 6. PipeCondition Changes +## 7. PipeCondition Changes ### Rename outcome fields @@ -319,7 +564,7 @@ To fail when no match: default_outcome = "fail" ``` -## 7. Configuration Files (.pipelex/ directory) +## 8. Configuration Files (.pipelex/ directory) ### LLM presets in deck files @@ -406,7 +651,7 @@ is_auto_setup_preset_extract = true nb_extract_pages = 10 ``` -## 8. Test Markers +## 9. Test Markers ### Update pytest markers @@ -441,25 +686,51 @@ class TestExtractPipeline: **Find:** `make test-ocr` or `make to` **Replace with:** `make test-extract` or `make te` -## 9. Validation +## 10. Validation -After making all changes, run validation: +After making changes, thoroughly test your migration: + +### Activate Virtual Environment ```bash -# Fix any unused imports -make fix-unused-imports +# Activate your virtual environment first +source .venv/bin/activate # Unix/macOS +# or +.venv\Scripts\activate # Windows +``` -# Validate all pipelines -make validate +### Validation Steps -# Run type checking and linting -make check +1. **Validate pipeline syntax:** + ```bash + pipelex validate all + ``` -# Run tests (non-inference) -make tp -``` +2. **Check specific pipes:** + ```bash + pipelex show pipes # List all discovered pipes + pipelex show pipe YOUR_PIPE_CODE # Inspect specific pipe + ``` + +3. **Run your test suite:** + ```bash + pytest tests/ + # or if using make: + make test + ``` -## 10. Python API Changes for Client Projects +4. **Test pipeline execution:** + - Run your application + - Execute example pipelines + - Verify outputs are as expected + +5. **Check for issues:** + - Review any validation errors + - Check imports are working + - Verify structure classes are discovered + - Confirm custom functions are registered + +## 11. Python API Changes for Client Projects These changes affect Python code that imports from or uses pipelex. @@ -543,7 +814,7 @@ get_inference_manager().set_llm_worker_from_external_plugin( ) ``` -## 11. File Cleanup +## 12. File Cleanup ### Remove Deprecated Files @@ -568,7 +839,7 @@ If your project has `AGENTS.md` or `CLAUDE.md` files with Pipelex examples: - `llm = ` → `model = ` - `prompt_template = ` → `prompt = ` -## 12. Common Issues +## 13. Common Issues ### Issue: Pipeline validation fails with "unknown field" @@ -600,133 +871,71 @@ If your project has `AGENTS.md` or `CLAUDE.md` files with Pipelex examples: **Solution:** Remove references to this file. The templates are now auto-loaded from the config. -## 13. Automated Migration Script +## 14. Automation Tools -You can use this bash script to automatically apply most changes: +You can automate many of these text replacements using standard tools available on your platform: -```bash -#!/bin/bash - -# Find all .plx files and apply replacements -find . -name "*.plx" -type f -exec sed -i '' \ - -e 's/definition = "/description = "/g' \ - -e 's/type = "PipeJinja2"/type = "PipeCompose"/g' \ - -e 's/type = "PipeOCR"/type = "PipeExtract"/g' \ - -e 's/prompt_template = /prompt = /g' \ - -e 's/jinja2 = /template = /g' \ - -e 's/jinja2_name = /template_name = /g' \ - -e 's/ocr_model = /model = /g' \ - -e 's/\[pipe\.\([^.]*\)\.pipe_map\]/[pipe.\1.outcomes]/g' \ - -e 's/default_pipe_code = /default_outcome = /g' \ - {} + - -# Update Python files with renamed pipe codes -find . -name "*.py" -type f -exec sed -i '' \ - -e 's/ocr_page_contents_from_pdf/extract_page_contents_from_pdf/g' \ - -e 's/ocr_page_contents_and_views_from_pdf/extract_page_contents_and_views_from_pdf/g' \ - {} + - -# Update documentation files -find . \( -name "AGENTS.md" -o -name "CLAUDE.md" \) -type f -exec sed -i '' \ - -e 's/definition = "/description = "/g' \ - -e 's/type = "PipeOcr"/type = "PipeExtract"/g' \ - -e 's/ocr_model = /model = /g' \ - -e 's/ocr_page_contents_from_pdf/extract_page_contents_from_pdf/g' \ - -e 's/ocr_page_contents_and_views_from_pdf/extract_page_contents_and_views_from_pdf/g' \ - {} + - -# Find all .toml files in .pipelex and apply replacements -find .pipelex -name "*.toml" -type f -exec sed -i '' \ - -e 's/llm_handle = /model = /g' \ - -e 's/img_gen_handle = /model = /g' \ - -e 's/ocr_handle = /model = /g' \ - -e 's/\[presets\.ocr\]/[presets.extract]/g' \ - -e 's/base_ocr_pypdfium2/base_extract_pypdfium2/g' \ - -e 's/base_ocr_mistral/base_extract_mistral/g' \ - -e 's/ocr_config/extract_config/g' \ - -e 's/is_auto_setup_preset_ocr/is_auto_setup_preset_extract/g' \ - -e 's/nb_ocr_pages/nb_extract_pages/g' \ - {} + - -# Find all test files and update markers -find tests -name "*.py" -type f -exec sed -i '' \ - -e 's/@pytest\.mark\.ocr/@pytest.mark.extract/g' \ - {} + - -# Remove deprecated files -rm -f pipelex_libraries/templates/base_templates.toml +### Available Tools by Platform + +**Unix/Linux/macOS:** +- `sed` - Stream editor for find/replace in files +- `find` - Locate files and execute commands on them +- `grep` - Search for patterns in files + +**Windows:** +- PowerShell's `Get-Content` and `-replace` operator +- Git Bash (includes Unix tools) +- WSL (Windows Subsystem for Linux) + +### What Can Be Automated + +The following replacements can be done with find/replace tools: + +**In `.plx` files:** +- `definition = "` → `description = "` +- `type = "PipeJinja2"` → `type = "PipeCompose"` +- `type = "PipeOCR"` → `type = "PipeExtract"` +- `prompt_template = ` → `prompt = ` +- `jinja2 = ` → `template = ` +- `ocr_model = ` → `model = ` +- `[pipe.X.pipe_map]` → `[pipe.X.outcomes]` +- `default_pipe_code = ` → `default_outcome = ` + +**In `.py` files:** +- `ocr_page_contents_from_pdf` → `extract_page_contents_from_pdf` +- Remove `relative_config_folder_path` parameters from `Pipelex.make()` +- Remove `config_folder_path` parameters from `Pipelex.make()` + +**In `.toml` files:** +- `llm_handle = ` → `model = ` +- `img_gen_handle = ` → `model = ` +- `ocr_handle = ` → `model = ` +- `[presets.ocr]` → `[presets.extract]` +- `base_ocr_*` → `base_extract_*` + +**In test files:** +- `@pytest.mark.ocr` → `@pytest.mark.extract` + +### What CANNOT Be Automated + +These require manual intervention: + +1. Moving `.plx` files to appropriate locations (project-specific) +2. Renaming structure files to `*_struct.py` suffix +3. Adding `@pipe_func()` decorator to custom functions +4. Updating imports to match your new structure +5. Adding `default_outcome` to `PipeCondition` pipes +6. Tagging image inputs in `PipeLLM` prompts with `$` or `@` +7. Reviewing and testing all changes + +### Recommendation + +1. **Test incrementally:** Apply changes to one file type at a time +2. **Use version control:** Commit before migrating so you can revert if needed +3. **Activate your virtual environment** before running Pipelex commands +4. **Validate after each change** (see Validation section) -echo "Automated migration complete. Please review changes and:" -echo "1. Manually add default_outcome to all PipeCondition pipes" -echo "2. Tag image inputs in PipeLLM prompts" -echo "3. Remove nb_steps from PipeImgGen if present" -echo "4. Run 'make validate' to check for errors" -``` - -**Note:** -- macOS: Use `sed -i ''` (as shown above) -- Linux: Replace `sed -i ''` with `sed -i` -- Windows: Use Git Bash, WSL, or the PowerShell script below - -### Windows PowerShell Migration Script - -```powershell -# Find all .plx files and apply replacements -Get-ChildItem -Path . -Filter *.plx -Recurse | ForEach-Object { - $content = Get-Content $_.FullName -Raw - $content = $content -replace 'definition = "', 'description = "' - $content = $content -replace 'type = "PipeJinja2"', 'type = "PipeCompose"' - $content = $content -replace 'type = "PipeOCR"', 'type = "PipeExtract"' - $content = $content -replace 'prompt_template = ', 'prompt = ' - $content = $content -replace 'jinja2 = ', 'template = ' - $content = $content -replace 'jinja2_name = ', 'template_name = ' - $content = $content -replace 'ocr_model = ', 'model = ' - $content = $content -replace '\[pipe\.([^.]+)\.pipe_map\]', '[pipe.$1.outcomes]' - $content = $content -replace 'default_pipe_code = ', 'default_outcome = ' - Set-Content -Path $_.FullName -Value $content -NoNewline -} - -# Update Python files with renamed pipe codes -Get-ChildItem -Path . -Filter *.py -Recurse | ForEach-Object { - $content = Get-Content $_.FullName -Raw - $content = $content -replace 'ocr_page_contents_from_pdf', 'extract_page_contents_from_pdf' - $content = $content -replace 'ocr_page_contents_and_views_from_pdf', 'extract_page_contents_and_views_from_pdf' - Set-Content -Path $_.FullName -Value $content -NoNewline -} - -# Find all .toml files in .pipelex and apply replacements -Get-ChildItem -Path .pipelex -Filter *.toml -Recurse | ForEach-Object { - $content = Get-Content $_.FullName -Raw - $content = $content -replace 'llm_handle = ', 'model = ' - $content = $content -replace 'img_gen_handle = ', 'model = ' - $content = $content -replace 'ocr_handle = ', 'model = ' - $content = $content -replace '\[presets\.ocr\]', '[presets.extract]' - $content = $content -replace 'base_ocr_pypdfium2', 'base_extract_pypdfium2' - $content = $content -replace 'base_ocr_mistral', 'base_extract_mistral' - $content = $content -replace 'ocr_config', 'extract_config' - $content = $content -replace 'is_auto_setup_preset_ocr', 'is_auto_setup_preset_extract' - $content = $content -replace 'nb_ocr_pages', 'nb_extract_pages' - Set-Content -Path $_.FullName -Value $content -NoNewline -} - -# Find all test files and update markers -Get-ChildItem -Path tests -Filter *.py -Recurse | ForEach-Object { - $content = Get-Content $_.FullName -Raw - $content = $content -replace '@pytest\.mark\.ocr', '@pytest.mark.extract' - Set-Content -Path $_.FullName -Value $content -NoNewline -} - -# Remove deprecated files -Remove-Item -Path "pipelex_libraries/templates/base_templates.toml" -ErrorAction SilentlyContinue - -Write-Host "Automated migration complete. Please review changes and:" -Write-Host "1. Manually add default_outcome to all PipeCondition pipes" -Write-Host "2. Tag image inputs in PipeLLM prompts" -Write-Host "3. Remove nb_steps from PipeImgGen if present" -Write-Host "4. Run 'make validate' to check for errors" -``` - -## 14. Additional Resources +## 15. Additional Resources - See AGENTS.md for complete documentation of the current syntax - Run `make validate` frequently to catch issues early diff --git a/uv.lock b/uv.lock index bf4c32296..47ac2ecd8 100644 --- a/uv.lock +++ b/uv.lock @@ -335,16 +335,16 @@ wheels = [ [[package]] name = "boto3-stubs" -version = "1.40.50" +version = "1.40.51" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore-stubs" }, { name = "types-s3transfer" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/35/c8/06584145c4ccc80e3297a97874bfaa43e6b2fb9f8a69bcc38e29a1457bf5/boto3_stubs-1.40.50.tar.gz", hash = "sha256:29828adfcb8629b5e285468eb89610f1fc71f964ad0913de3049a0a9d5de0be1", size = 100836, upload-time = "2025-10-10T20:32:34.867Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/4d/b07f9ee0fe432fa8ec6dc368ee7a0409e2b6d9df2c5a2a88265c9b6fd878/boto3_stubs-1.40.51.tar.gz", hash = "sha256:0281e820813a310954e15fb7c1d470c24c34c1cccc7b1ddad977fa293a1080a9", size = 100890, upload-time = "2025-10-13T19:25:36.126Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/69/f18c7135dc8a2b74e21b4a2375fa455e4d9e7e47f7838bc175d52005054a/boto3_stubs-1.40.50-py3-none-any.whl", hash = "sha256:01b9c67df62f26371a4a7473c616eece988a5305e7f7cb3fbc014d178685ac4e", size = 69689, upload-time = "2025-10-10T20:32:25.77Z" }, + { url = "https://files.pythonhosted.org/packages/d3/2e/4476431f11fc3bf7a7e0f4f5c275f17607aa127da7c0d8685a4dc6bf6291/boto3_stubs-1.40.51-py3-none-any.whl", hash = "sha256:896d0ffaa298ce1749eea1a54946320a0f4e07c6912f8e1f8c0744a708ee25a4", size = 69709, upload-time = "2025-10-13T19:25:23.116Z" }, ] [[package]] From 583fbcefc6d3b8c701c498c0da8c4ee8751df658 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 23:41:29 +0200 Subject: [PATCH 055/115] remove old stuff related to the pipelines library --- pipelex/__init__.py | 7 ------- pipelex/_bootstrap_user_libs.py | 30 ------------------------------ 2 files changed, 37 deletions(-) delete mode 100644 pipelex/_bootstrap_user_libs.py diff --git a/pipelex/__init__.py b/pipelex/__init__.py index 98ce4968c..567519bf5 100644 --- a/pipelex/__init__.py +++ b/pipelex/__init__.py @@ -1,4 +1,3 @@ -from pipelex._bootstrap_user_libs import activate as _px_bootstrap_user_libs from pipelex.tools.log.log import log from pipelex.tools.misc.pretty import pretty_print, pretty_print_md @@ -7,9 +6,3 @@ "pretty_print", "pretty_print_md", ] - -# ------------------------------------------------------------ -# Keep /pipelex_libraries on sys.path for every installer (Fix for uv) -# ------------------------------------------------------------ - -_px_bootstrap_user_libs() diff --git a/pipelex/_bootstrap_user_libs.py b/pipelex/_bootstrap_user_libs.py deleted file mode 100644 index 1f6b0a62d..000000000 --- a/pipelex/_bootstrap_user_libs.py +++ /dev/null @@ -1,30 +0,0 @@ -import sys -from pathlib import Path - -# ------------------------------------------------------------------------ -# Public helper: run once, keep user libs import-able -# ------------------------------------------------------------------------ - - -def activate() -> None: - # 1) Re-create Poetry’s behaviour: put itself on sys.path - root = Path.cwd() - root_str = str(root) - if root_str not in sys.path: - sys.path.insert(0, root_str) - - # 2) If a pipelex_libraries folder already exists, also touch __init__.py - # (helps IDE & static type-checkers) and ensure the *exact* folder is - # on sys.path. When it does **not** exist yet, nothing else to do — - # as soon as the user runs `pipelex init-libraries`, the directory will be - # created *inside* , which is already import-able thanks to (1). - for parent in (root, *root.parents): - lib_dir = parent / "pipelex_libraries" - if lib_dir.is_dir(): - # 1) make it a *real* package so editors & type-checkers see it - (lib_dir / "__init__.py").touch(exist_ok=True) - # 2) put it at the front of sys.path exactly once - lib_path = str(lib_dir) - if lib_path not in sys.path: - sys.path.insert(0, lib_path) - break From 6f401434733eeec45c5532e83a162deb7167dffd Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Mon, 13 Oct 2025 23:47:32 +0200 Subject: [PATCH 056/115] More migration guiding (packages moved and renamed) --- .../kit/migrations/migrate_0.11.0_0.12.0.md | 238 ++++++++++++++++-- 1 file changed, 224 insertions(+), 14 deletions(-) diff --git a/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md b/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md index 1cc0bd489..c5a5ef7ef 100644 --- a/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md +++ b/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md @@ -9,6 +9,7 @@ This release introduces several breaking changes to make the Pipelex language mo - Pipeline definitions (.plx files) - Configuration files (.pipelex/ directory) - Python code initialization +- Python import paths (module refactoring) - Test markers ## Migration Checklist @@ -24,6 +25,10 @@ This release introduces several breaking changes to make the Pipelex language mo - [ ] Update PipeCondition fields - [ ] Update configuration files - [ ] Update test markers +- [ ] **Update Python imports for StuffContent subclasses** +- [ ] **Update imports for core module relocations (pipe_works, pipe_input, etc.)** +- [ ] **Update all OCR-related imports to Extract equivalents** +- [ ] **Update Provider → Library class names and hub method calls** - [ ] Run validation ## 1. Library System Removal (CRITICAL) @@ -734,6 +739,125 @@ source .venv/bin/activate # Unix/macOS These changes affect Python code that imports from or uses pipelex. +### Refactoring of `StuffContent` Subclasses + +The monolithic `pipelex.core.stuffs.stuff_content` module has been split into individual files for each core content type. This improves modularity and clarity. + +**You must update your imports for these classes:** + +| Class Name | Old Import Path | New Import Path | +| :--- | :--- | :--- | +| `StructuredContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.structured_content` | +| `TextContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.text_content` | +| `ImageContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.image_content` | +| `ListContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.list_content` | +| `PDFContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.pdf_content` | +| `PageContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.page_content` | +| `NumberContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.number_content` | +| `HtmlContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.html_content` | +| `MermaidContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.mermaid_content` | +| `TextAndImagesContent` | `pipelex.core.stuffs.stuff_content` | `pipelex.core.stuffs.text_and_images_content` | + +**Example Migration:** + +**Before:** +```python +from pipelex.core.stuffs.stuff_content import StructuredContent, TextContent, ImageContent + +class MyData(StructuredContent): + text_field: TextContent + image_field: ImageContent +``` + +**After:** +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pipelex.core.stuffs.text_content import TextContent +from pipelex.core.stuffs.image_content import ImageContent + +class MyData(StructuredContent): + text_field: TextContent + image_field: ImageContent +``` + +### Core Module Relocations + +Several core modules related to pipe execution and input handling have been moved to more logical packages. + +#### Pipe Execution (`pipe_works` → `pipe_run`) + +The `pipelex.pipe_works` package has been renamed to `pipelex.pipe_run`. Additionally, `PipeRunParams` and its factory have been moved into this new package. + +| Old Path | New Path | Description | +| :--- | :--- | :--- | +| `pipelex.core.pipes.pipe_run_params` | `pipelex.pipe_run.pipe_run_params` | Contains `PipeRunParams`, `PipeRunMode`, etc. | +| `pipelex.core.pipes.pipe_run_params_factory` | `pipelex.pipe_run.pipe_run_params_factory` | Factory for creating `PipeRunParams`. | +| `pipelex.pipe_works.pipe_router_protocol` | `pipelex.pipe_run.pipe_router_protocol` | The abstract protocol for the pipe router. | + +**Example Migration:** + +**Before:** +```python +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +``` + +**After:** +```python +from pipelex.pipe_run.pipe_run_params import PipeRunParams +from pipelex.pipe_run.pipe_router_protocol import PipeRouterProtocol +``` + +#### Pipe Input Specifications (`pipe_input` → `input_requirements`) + +Modules for defining pipe inputs have been renamed for clarity. The main class `PipeInputSpec` is now `InputRequirements`. + +| Old Path | New Path | Description | +| :--- | :--- | :--- | +| `pipelex.core.pipes.pipe_input` | `pipelex.core.pipes.input_requirements` | Contains `InputRequirements` (formerly `PipeInputSpec`). | +| `pipelex.core.pipes.pipe_input_blueprint` | `pipelex.core.pipes.input_requirement_blueprint` | Contains `InputRequirementBlueprint`. | +| `pipelex.core.pipes.pipe_input_factory` | `pipelex.core.pipes.input_requirements_factory` | Contains `InputRequirementsFactory`. | + +**Example Migration:** + +**Before:** +```python +from pipelex.core.pipes.pipe_input import PipeInputSpec +from pipelex.core.pipes.pipe_input_factory import PipeInputSpecFactory +``` + +**After:** +```python +from pipelex.core.pipes.input_requirements import InputRequirements +from pipelex.core.pipes.input_requirements_factory import InputRequirementsFactory +``` + +#### Comprehensive OCR → Extract Renaming + +All modules, classes, and configurations related to `ocr` have been renamed to `extract` to better reflect capabilities beyond OCR. + +| Old Path / Name | New Path / Name | Description | +| :--- | :--- | :--- | +| `pipelex.cogt.ocr` | `pipelex.cogt.extract` | Main package for extraction logic. | +| `pipelex.pipe_operators.ocr` | `pipelex.pipe_operators.extract` | Package for the `PipeExtract` operator. | +| `pipelex.cogt.ocr.ocr_input` | `pipelex.cogt.extract.extract_input` | Contains `ExtractInput` (formerly `OcrInput`). | +| `pipelex.cogt.ocr.ocr_job` | `pipelex.cogt.extract.extract_job` | Contains `ExtractJob` (formerly `OcrJob`). | +| `pipelex.cogt.ocr.ocr_worker_abstract` | `pipelex.cogt.extract.extract_worker_abstract` | Contains `ExtractWorkerAbstract`. | + +**Example Migration:** + +**Before:** +```python +from pipelex.cogt.ocr.ocr_input import OcrInput +from pipelex.cogt.ocr.ocr_worker_abstract import OcrWorkerAbstract +``` + +**After:** +```python +from pipelex.cogt.extract.extract_input import ExtractInput +from pipelex.cogt.extract.extract_worker_abstract import ExtractWorkerAbstract +``` + ### Renamed Base Library Pipes **Find:** `ocr_page_contents_from_pdf` @@ -762,6 +886,58 @@ pipe_output = await execute_pipeline( ) ``` +### Builder Module Relocation + +The pipeline builder has been promoted to a top-level package: + +**Old:** `pipelex.libraries.pipelines.builder` +**New:** `pipelex.builder` + +**Note:** This change only affects internal Pipelex code. Most users won't need to update anything related to the builder module. + +### Core Abstractions (`Provider` → `Library`) + +The abstract base classes for core components have been renamed for consistency. Hub accessor methods have been updated accordingly. + +| Old Class Name | New Class Name | Hub Accessor Method | +| :--- | :--- | :--- | +| `ConceptProviderAbstract` | `ConceptLibraryAbstract` | `get_concept_library()` | +| `DomainProviderAbstract` | `DomainLibraryAbstract` | `get_domain_library()` | +| `PipeProviderAbstract` | `PipeLibraryAbstract` | `get_pipe_library()` | + +**Example Migration:** + +**Before:** +```python +from pipelex.core.concepts.concept_provider_abstract import ConceptProviderAbstract +concept_provider = hub.get_concept_provider() +``` + +**After:** +```python +from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract +concept_library = hub.get_concept_library() +``` + +### Templating Refactoring + +The `pipelex.tools.templating` package has been refactored: + +- Core logic now resides in `pipelex.cogt.templating` +- Jinja2-specific utilities are in `pipelex.tools.jinja2` + +**Example Migration:** + +**Before:** +```python +from pipelex.tools.templating.template_processor import TemplateProcessor +``` + +**After:** +```python +from pipelex.cogt.templating.template_processor import TemplateProcessor +``` + ### Removed Methods and Classes The following methods and classes have been removed. If your code uses them, you'll need to refactor: @@ -770,17 +946,6 @@ The following methods and classes have been removed. If your code uses them, you - `PipelexHub.get_optional_library_manager()` - Removed - Hub methods: `get_optional_domain_provider()` and `get_optional_concept_provider()` - Removed -### Renamed Internal Classes (if used) - -If your project directly imports these internal classes: - -- `ConceptProviderAbstract` → `ConceptLibraryAbstract` -- `DomainProviderAbstract` → `DomainLibraryAbstract` -- `PipeProviderAbstract` → `PipeLibraryAbstract` -- `PipeInputSpec` → `InputRequirements` -- `PipeInputSpecFactory` → `InputRequirementsFactory` -- `PipelexError` → `PipelexException` (base exception class) - ### Hub Method Renames If you use hub methods directly: @@ -871,6 +1036,40 @@ If your project has `AGENTS.md` or `CLAUDE.md` files with Pipelex examples: **Solution:** Remove references to this file. The templates are now auto-loaded from the config. +### Issue: ImportError for StuffContent subclasses + +**Cause:** Imports still use the old monolithic `pipelex.core.stuffs.stuff_content` module. + +**Solution:** Update imports to use individual modules. See Section 11 for the complete mapping table. + +**Example:** +```python +# Old (will fail) +from pipelex.core.stuffs.stuff_content import StructuredContent, TextContent + +# New (correct) +from pipelex.core.stuffs.structured_content import StructuredContent +from pipelex.core.stuffs.text_content import TextContent +``` + +### Issue: ImportError for pipe execution classes + +**Cause:** Code imports from old `pipelex.pipe_works` package or old pipe input modules. + +**Solution:** Update to use new package names: +- `pipelex.pipe_works` → `pipelex.pipe_run` +- `pipelex.core.pipes.pipe_input` → `pipelex.core.pipes.input_requirements` +- `PipeInputSpec` → `InputRequirements` + +### Issue: ImportError for OCR-related classes + +**Cause:** Code still imports from `pipelex.cogt.ocr` or `pipelex.pipe_operators.ocr`. + +**Solution:** Update all OCR imports to Extract: +- `pipelex.cogt.ocr` → `pipelex.cogt.extract` +- `pipelex.pipe_operators.ocr` → `pipelex.pipe_operators.extract` +- All class names: `Ocr*` → `Extract*` + ## 14. Automation Tools You can automate many of these text replacements using standard tools available on your platform: @@ -905,6 +1104,16 @@ The following replacements can be done with find/replace tools: - `ocr_page_contents_from_pdf` → `extract_page_contents_from_pdf` - Remove `relative_config_folder_path` parameters from `Pipelex.make()` - Remove `config_folder_path` parameters from `Pipelex.make()` +- `from pipelex.core.stuffs.stuff_content import` → Update to specific module imports +- `from pipelex.pipe_works` → `from pipelex.pipe_run` +- `from pipelex.core.pipes.pipe_input` → `from pipelex.core.pipes.input_requirements` +- `from pipelex.cogt.ocr` → `from pipelex.cogt.extract` +- `PipeInputSpec` → `InputRequirements` +- `ConceptProviderAbstract` → `ConceptLibraryAbstract` +- `DomainProviderAbstract` → `DomainLibraryAbstract` +- `PipeProviderAbstract` → `PipeLibraryAbstract` +- `.get_*_provider()` → `.get_*_library()` +- `.set_*_provider()` → `.set_*_library()` **In `.toml` files:** - `llm_handle = ` → `model = ` @@ -924,9 +1133,10 @@ These require manual intervention: 2. Renaming structure files to `*_struct.py` suffix 3. Adding `@pipe_func()` decorator to custom functions 4. Updating imports to match your new structure -5. Adding `default_outcome` to `PipeCondition` pipes -6. Tagging image inputs in `PipeLLM` prompts with `$` or `@` -7. Reviewing and testing all changes +5. Splitting `StuffContent` imports into individual module imports (requires analyzing which classes are used) +6. Adding `default_outcome` to `PipeCondition` pipes +7. Tagging image inputs in `PipeLLM` prompts with `$` or `@` +8. Reviewing and testing all changes ### Recommendation From 5da479ce235511fd2ba9563ca34f612c9200838e Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 00:07:36 +0200 Subject: [PATCH 057/115] Remove build_flow_cmd (was just a test) --- pipelex/cli/commands/build_cmd.py | 51 ------------------------------- 1 file changed, 51 deletions(-) diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index 7ed881701..e298ba384 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -180,54 +180,3 @@ async def run_pipeline(): typer.echo(typer.style(f"\n✅ Pipeline built in {end_time - start_time:.2f} seconds", fg=typer.colors.GREEN)) get_report_delegate().generate_report() - - -@build_app.command("flow") -def build_flow_cmd( - brief: Annotated[ - str, - typer.Argument(help="Brief description of what the pipeline should do"), - ], - output_path: Annotated[ - str, - typer.Option("--output", "-o", help="Path to save the generated PLX file"), - ] = "./results/generated_pipeline.plx", - no_output: Annotated[ - bool, - typer.Option("--no-output", help="Skip saving the pipeline to file"), - ] = False, -) -> None: - Pipelex.make() - typer.echo("=" * 70) - typer.echo(typer.style("🔥 Starting pipe builder... 🚀", fg=typer.colors.GREEN)) - typer.echo("") - - async def run_pipeline(): - if no_output: - typer.echo(typer.style("\n⚠️ Pipeline will not be saved to file (--no-output specified)", fg=typer.colors.YELLOW)) - elif not output_path: - typer.echo(typer.style("\n🛑 Cannot save a pipeline to an empty file name", fg=typer.colors.RED)) - raise typer.Exit(1) - else: - ensure_directory_for_file_path(file_path=output_path) - - pipe_output = await execute_pipeline( - pipe_code="pipe_builder", - input_memory={"brief": brief}, - ) - # Save to file unless explicitly disabled with --no-output - if no_output: - typer.echo(typer.style("\n⚠️ Pipeline not saved to file (--no-output specified)", fg=typer.colors.YELLOW)) - return - pipelex_bundle_spec = pipe_output.main_stuff_as(content_type=PipelexBundleSpec) - flow = FlowFactory.make_from_bundle_spec(bundle_spec=pipelex_bundle_spec) - json_output = flow.smart_dump() - save_as_json_to_path(object_to_save=json_output, path=output_path) - typer.echo(typer.style(f"\n✅ Pipeline saved to: {output_path}", fg=typer.colors.GREEN)) - - start_time = time.time() - asyncio.run(run_pipeline()) - end_time = time.time() - typer.echo(typer.style(f"\n✅ Pipeline built in {end_time - start_time:.2f} seconds", fg=typer.colors.GREEN)) - - get_report_delegate().generate_report() From 01855a1c5bce3e39dee3586c484889fec7d8d8c7 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 00:12:00 +0200 Subject: [PATCH 058/115] Reorg pip build commands --- .../pipeline-creation.md | 3 -- pipelex/cli/commands/build_cmd.py | 28 +++++++++---------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md index 0a8087e7b..3456034f9 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md @@ -6,9 +6,6 @@ Pipelex provides powerful tools to automatically generate complete, working pipe The pipeline creation system creates a fully working pipeline that has been both statically and dynamically validated. The system automatically handles all aspects of pipeline generation, from understanding requirements to producing executable code. -!!! warning "Execution Time" - Pipeline generation can take 4 to 7 minutes to complete, as the system performs comprehensive validation and testing to ensure the pipeline works correctly. - ## Core Commands ### Build Blueprint diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index e298ba384..9a57645ab 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -32,8 +32,8 @@ """ -@build_app.command("one-shot") -def build_one_shot_cmd( +@build_app.command("pipe", help="Generate a pipeline with one validation/fix loop corecting the deterministic issues") +def build_pipe_cmd( brief: Annotated[ str, typer.Argument(help="Brief description of what the pipeline should do"), @@ -61,18 +61,13 @@ async def run_pipeline(): else: ensure_directory_for_file_path(file_path=output_path) - pipe_output = await execute_pipeline( - pipe_code="pipe_builder", - input_memory={"brief": brief}, - ) - pretty_print(pipe_output, title="Pipe Output") - + builder_loop = BuilderLoop() # Save to file unless explicitly disabled with --no-output if no_output: typer.echo(typer.style("\n⚠️ Pipeline not saved to file (--no-output specified)", fg=typer.colors.YELLOW)) return - pipelex_bundle_spec = pipe_output.working_memory.get_stuff_as(name="pipelex_bundle_spec", content_type=PipelexBundleSpec) + pipelex_bundle_spec = await builder_loop.build_and_fix(pipe_code="pipe_builder", input_memory={"brief": brief}) plx_content = PlxFactory.make_plx_content(blueprint=pipelex_bundle_spec.to_blueprint()) save_text_to_path(text=plx_content, path=output_path) typer.echo(typer.style(f"\n✅ Pipeline saved to: {output_path}", fg=typer.colors.GREEN)) @@ -85,8 +80,8 @@ async def run_pipeline(): get_report_delegate().generate_report() -@build_app.command("pipe") -def build_pipe_cmd( +@build_app.command("one-shot", help="Generate a pipeline in one shot without validation loop (fast but may need manual fixes)") +def build_one_shot_cmd( brief: Annotated[ str, typer.Argument(help="Brief description of what the pipeline should do"), @@ -114,13 +109,18 @@ async def run_pipeline(): else: ensure_directory_for_file_path(file_path=output_path) - builder_loop = BuilderLoop() + pipe_output = await execute_pipeline( + pipe_code="pipe_builder", + input_memory={"brief": brief}, + ) + pretty_print(pipe_output, title="Pipe Output") + # Save to file unless explicitly disabled with --no-output if no_output: typer.echo(typer.style("\n⚠️ Pipeline not saved to file (--no-output specified)", fg=typer.colors.YELLOW)) return - pipelex_bundle_spec = await builder_loop.build_and_fix(pipe_code="pipe_builder", input_memory={"brief": brief}) + pipelex_bundle_spec = pipe_output.working_memory.get_stuff_as(name="pipelex_bundle_spec", content_type=PipelexBundleSpec) plx_content = PlxFactory.make_plx_content(blueprint=pipelex_bundle_spec.to_blueprint()) save_text_to_path(text=plx_content, path=output_path) typer.echo(typer.style(f"\n✅ Pipeline saved to: {output_path}", fg=typer.colors.GREEN)) @@ -133,7 +133,7 @@ async def run_pipeline(): get_report_delegate().generate_report() -@build_app.command("partial") +@build_app.command("partial", help="Generate a partial pipeline specification and save it as JSON (for debugging)") def build_partial_cmd( brief: Annotated[ str, From 53e62fe86f1a2797f35b420a6589f9414347341b Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 01:18:15 +0200 Subject: [PATCH 059/115] Update rules --- .cursor/rules/python_standards.mdc | 1215 +---------------- .cursor/rules/run_pipelex.mdc | 3 +- .cursor/rules/run_pipelines.mdc | 230 ---- .cursor/rules/tdd.mdc | 1 - .cursor/rules/write_pipelex.mdc | 13 +- .github/copilot-instructions.md | 16 +- .windsurfrules.md | 16 +- AGENTS.md | 16 +- BLACKBOX_RULES.md | 16 +- CLAUDE.md | 16 +- Makefile | 12 +- concept_structures.md | 570 ++++++++ .../pipeline-creation.md | 110 +- .../config-technical/library-config.md | 12 - pipelex/cli/commands/build_cmd.py | 3 +- pipelex/kit/agent_rules/python_standards.md | 1215 +---------------- pipelex/kit/agent_rules/run_pipelex.md | 3 +- pipelex/kit/agent_rules/write_pipelex.md | 13 +- pipelex/kit/cursor_export.py | 5 +- pipelex/kit/index_models.py | 14 +- pipelex/pipelex.py | 3 - 21 files changed, 793 insertions(+), 2709 deletions(-) delete mode 100644 .cursor/rules/run_pipelines.mdc create mode 100644 concept_structures.md diff --git a/.cursor/rules/python_standards.mdc b/.cursor/rules/python_standards.mdc index 8fb9315ca..150864d1e 100644 --- a/.cursor/rules/python_standards.mdc +++ b/.cursor/rules/python_standards.mdc @@ -4,7 +4,7 @@ description: Python coding standards and best practices globs: - '**/*.py' --- -# Coding Standards & Best Practices +# Coding Standards & Best Practices for Python Code This document outlines the core coding standards, best practices, and quality control procedures for the codebase. @@ -15,20 +15,34 @@ This document outlines the core coding standards, best practices, and quality co - Every function parameter must be typed - Every function return must be typed - Use type hints for all variables where type is not obvious - - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] + - Use dict, list, tuple types with lowercase first letter: dict[], list[], tuple[] - Use type hints for all fields - - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals - - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` - - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose + - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals (not `Optional[]`) + - Use Field(default_factory=...) for mutable defaults + +2. **BaseModel / Pydantic Standards** -2. **StrEnum** + - Use `BaseModel` and respect Pydantic v2 standards + - Use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` + - Keep models focused and single-purpose + - For list fields with non-string items in BaseModels, use `empty_list_factory_of()` to avoid linter complaints: + ```python + from pydantic import BaseModel, Field + from pipelex.tools.typing.pydantic_utils import empty_list_factory_of + + class MyModel(BaseModel): + names: list[str] = Field(default_factory=list) # OK for strings + numbers: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers") + items: list[MyItem] = Field(default_factory=empty_list_factory_of(MyItem), description="A list of items") + ``` + +3. **StrEnum** - Import from `pipelex.types`: ```python from pipelex.types import StrEnum ``` -3. **Self type** +4. **Self type** - Import from `pipelex.types`: ```python from pipelex.types import Self @@ -61,7 +75,7 @@ This document outlines the core coding standards, best practices, and quality co 1. **Docstring Format** ```python - def process_image(image_path: str, size: Tuple[int, int]) -> bytes: + def process_image(image_path: str, size: tuple[int, int]) -> bytes: """Process and resize an image. Args: @@ -120,1193 +134,10 @@ Always fix any issues reported by these tools before proceeding. ## Pipelines -- All pipeline definitions go in `pipelex/libraries/pipelines/` - Always validate pipelines after creation/edit with `make validate`. Iterate if there are errors. ## Project Structure -- **Pipelines**: `pipelex/libraries/pipelines/` - **Tests**: `tests/` directory - **Documentation**: `docs/` directory - ---- - -# Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -## Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures -- Use descriptive names in `snake_case` - -## Pipeline File Structure -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -### Pipe Definitions - -## Pipe Base Structure - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -## Structuring Models - -### Model Location and Registration - -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` -- Models must inherit from `StructuredContent` or appropriate content type - -## Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -## Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -## PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -### Basic Structure -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -## PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -## PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -## PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -## PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -# Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -## PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -## PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - ---- - -# Guide to write an example to execute a pipeline - -## Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -# start Pipelex -Pipelex.make() -# run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -## Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -# start Pipelex -Pipelex.make() - -# run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -## Setting up the input memory - -### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -# Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -## Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Use descriptive names that match the functionality being tested -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "extract: uses text/image extraction from documents" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure - -Always group the tests of a module into a test class: - -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` - -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. - -## Writing integration test to test pipes - -### Required imports for pipe tests - -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol -``` - -### Pipe test implementation steps - -1. Create Stuff from blueprint: - -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` - -2. Create Working Memory: - -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) -``` - -3. Run the pipe: - -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` - -4. Basic assertions: - -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None -``` - -### Test Data Organization - -- If it's not already there, create a `test_data.py` file in the test directory -- Define test cases using `StuffBlueprint`: - -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) - - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] -``` - -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. - -## Best Practices for Testing - -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose -- Log outputs for debugging -- Generate reports for cost tracking diff --git a/.cursor/rules/run_pipelex.mdc b/.cursor/rules/run_pipelex.mdc index 31d474aea..cb619d261 100644 --- a/.cursor/rules/run_pipelex.mdc +++ b/.cursor/rules/run_pipelex.mdc @@ -43,8 +43,9 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart +from my_project.gantt.gantt_struct import GanttChart SAMPLE_NAME = "extract_gantt" IMAGE_URL = "assets/gantt/gantt_tree_house.png" diff --git a/.cursor/rules/run_pipelines.mdc b/.cursor/rules/run_pipelines.mdc deleted file mode 100644 index 31d474aea..000000000 --- a/.cursor/rules/run_pipelines.mdc +++ /dev/null @@ -1,230 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for running Pipelex pipelines -globs: -- examples/**/*.py ---- -# Guide to execute a pipeline and write example code - -## Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -# start Pipelex -Pipelex.make() -# run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -## Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -# start Pipelex -Pipelex.make() - -# run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -## Setting up the input memory - -### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -# Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -## Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.cursor/rules/tdd.mdc b/.cursor/rules/tdd.mdc index aa317290e..4b4f058b5 100644 --- a/.cursor/rules/tdd.mdc +++ b/.cursor/rules/tdd.mdc @@ -1,7 +1,6 @@ --- alwaysApply: false description: Guidelines for writing test-driven development code -globs: [] --- # Test-Driven Development Guide diff --git a/.cursor/rules/write_pipelex.mdc b/.cursor/rules/write_pipelex.mdc index a802af442..23c0c5f8e 100644 --- a/.cursor/rules/write_pipelex.mdc +++ b/.cursor/rules/write_pipelex.mdc @@ -13,10 +13,10 @@ globs: ## Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures +- Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` -## Pipeline File Structure +## Pipeline File Outline A pipeline file has three main sections: 1. Domain statement 2. Concept definitions @@ -43,7 +43,7 @@ Important Rules: yes ### Pipe Definitions -## Pipe Base Structure +## Pipe Base Definition ```plx [pipe.your_pipe_name] @@ -90,8 +90,9 @@ inputs = { ### Model Location and Registration -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) - Models must inherit from `StructuredContent` or appropriate content type +- Structure classes are automatically discovered by Pipelex - no manual registration needed ## Model Structure @@ -143,7 +144,7 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. +Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. ### Best Practices for structures @@ -173,7 +174,7 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -### Basic Structure +### Basic Definition ```plx [pipe.your_sequence_name] type = "PipeSequence" diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 0573dba49..32ec89f0e 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -7,10 +7,10 @@ ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures +- Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` -### Pipeline File Structure +### Pipeline File Outline A pipeline file has three main sections: 1. Domain statement 2. Concept definitions @@ -37,7 +37,7 @@ Important Rules: yes #### Pipe Definitions -### Pipe Base Structure +### Pipe Base Definition ```plx [pipe.your_pipe_name] @@ -84,8 +84,9 @@ inputs = { #### Model Location and Registration -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) - Models must inherit from `StructuredContent` or appropriate content type +- Structure classes are automatically discovered by Pipelex - no manual registration needed ### Model Structure @@ -137,7 +138,7 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. +Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. #### Best Practices for structures @@ -167,7 +168,7 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -#### Basic Structure +#### Basic Definition ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -832,8 +833,9 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart +from my_project.gantt.gantt_struct import GanttChart SAMPLE_NAME = "extract_gantt" IMAGE_URL = "assets/gantt/gantt_tree_house.png" diff --git a/.windsurfrules.md b/.windsurfrules.md index 0573dba49..32ec89f0e 100644 --- a/.windsurfrules.md +++ b/.windsurfrules.md @@ -7,10 +7,10 @@ ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures +- Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` -### Pipeline File Structure +### Pipeline File Outline A pipeline file has three main sections: 1. Domain statement 2. Concept definitions @@ -37,7 +37,7 @@ Important Rules: yes #### Pipe Definitions -### Pipe Base Structure +### Pipe Base Definition ```plx [pipe.your_pipe_name] @@ -84,8 +84,9 @@ inputs = { #### Model Location and Registration -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) - Models must inherit from `StructuredContent` or appropriate content type +- Structure classes are automatically discovered by Pipelex - no manual registration needed ### Model Structure @@ -137,7 +138,7 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. +Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. #### Best Practices for structures @@ -167,7 +168,7 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -#### Basic Structure +#### Basic Definition ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -832,8 +833,9 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart +from my_project.gantt.gantt_struct import GanttChart SAMPLE_NAME = "extract_gantt" IMAGE_URL = "assets/gantt/gantt_tree_house.png" diff --git a/AGENTS.md b/AGENTS.md index 0573dba49..32ec89f0e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,10 +7,10 @@ ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures +- Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` -### Pipeline File Structure +### Pipeline File Outline A pipeline file has three main sections: 1. Domain statement 2. Concept definitions @@ -37,7 +37,7 @@ Important Rules: yes #### Pipe Definitions -### Pipe Base Structure +### Pipe Base Definition ```plx [pipe.your_pipe_name] @@ -84,8 +84,9 @@ inputs = { #### Model Location and Registration -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) - Models must inherit from `StructuredContent` or appropriate content type +- Structure classes are automatically discovered by Pipelex - no manual registration needed ### Model Structure @@ -137,7 +138,7 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. +Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. #### Best Practices for structures @@ -167,7 +168,7 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -#### Basic Structure +#### Basic Definition ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -832,8 +833,9 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart +from my_project.gantt.gantt_struct import GanttChart SAMPLE_NAME = "extract_gantt" IMAGE_URL = "assets/gantt/gantt_tree_house.png" diff --git a/BLACKBOX_RULES.md b/BLACKBOX_RULES.md index 0573dba49..32ec89f0e 100644 --- a/BLACKBOX_RULES.md +++ b/BLACKBOX_RULES.md @@ -7,10 +7,10 @@ ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures +- Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` -### Pipeline File Structure +### Pipeline File Outline A pipeline file has three main sections: 1. Domain statement 2. Concept definitions @@ -37,7 +37,7 @@ Important Rules: yes #### Pipe Definitions -### Pipe Base Structure +### Pipe Base Definition ```plx [pipe.your_pipe_name] @@ -84,8 +84,9 @@ inputs = { #### Model Location and Registration -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) - Models must inherit from `StructuredContent` or appropriate content type +- Structure classes are automatically discovered by Pipelex - no manual registration needed ### Model Structure @@ -137,7 +138,7 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. +Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. #### Best Practices for structures @@ -167,7 +168,7 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -#### Basic Structure +#### Basic Definition ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -832,8 +833,9 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart +from my_project.gantt.gantt_struct import GanttChart SAMPLE_NAME = "extract_gantt" IMAGE_URL = "assets/gantt/gantt_tree_house.png" diff --git a/CLAUDE.md b/CLAUDE.md index 0573dba49..32ec89f0e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,10 +7,10 @@ ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures +- Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` -### Pipeline File Structure +### Pipeline File Outline A pipeline file has three main sections: 1. Domain statement 2. Concept definitions @@ -37,7 +37,7 @@ Important Rules: yes #### Pipe Definitions -### Pipe Base Structure +### Pipe Base Definition ```plx [pipe.your_pipe_name] @@ -84,8 +84,9 @@ inputs = { #### Model Location and Registration -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) - Models must inherit from `StructuredContent` or appropriate content type +- Structure classes are automatically discovered by Pipelex - no manual registration needed ### Model Structure @@ -137,7 +138,7 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. +Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. #### Best Practices for structures @@ -167,7 +168,7 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -#### Basic Structure +#### Basic Definition ```plx [pipe.your_sequence_name] type = "PipeSequence" @@ -832,8 +833,9 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart +from my_project.gantt.gantt_struct import GanttChart SAMPLE_NAME = "extract_gantt" IMAGE_URL = "assets/gantt/gantt_tree_house.png" diff --git a/Makefile b/Makefile index 7906e5b29..16085ff07 100644 --- a/Makefile +++ b/Makefile @@ -58,8 +58,7 @@ make cft - Shorthand -> config-template make cleanenv - Remove virtual env and lock files make cleanderived - Remove extraneous compiled files, caches, logs, etc. -make cleanlibraries - Remove pipelex_libraries -make cleanall - Remove all -> cleanenv + cleanderived + cleanlibraries +make cleanall - Remove all -> cleanenv + cleanderived make merge-check-ruff-lint - Run ruff merge check without updating files make merge-check-ruff-format - Run ruff merge check without updating files @@ -107,7 +106,7 @@ export HELP .PHONY: \ all help env lock install update build \ format lint pyright mypy pylint \ - cleanderived cleanenv cleanlibraries cleanall \ + cleanderived cleanenv cleanall \ test test-xdist t test-quiet tq test-with-prints tp test-inference ti \ test-llm tl test-img-gen tg test-extract te codex-tests gha-tests \ run-all-tests run-manual-trigger-gha-tests run-gha_disabled-tests \ @@ -200,17 +199,12 @@ cleanenv: find . -type d -wholename './.venv' -exec rm -rf {} + && \ echo "Cleaned up virtual env and dependency lock files"; -cleanlibraries: - $(call PRINT_TITLE,"Erasing derived files and directories") - @find . -type d -wholename './pipelex_libraries' -exec rm -rf {} + && \ - echo "Cleaned up pipelex_libraries"; - cleanconfig: $(call PRINT_TITLE,"Erasing config files and directories") @find . -type d -wholename './.pipelex' -exec rm -rf {} + && \ echo "Cleaned up .pipelex"; -cleanall: cleanderived cleanenv cleanlibraries cleanconfig +cleanall: cleanderived cleanenv cleanconfig @echo "Cleaned up all derived files and directories"; ########################################################################################## diff --git a/concept_structures.md b/concept_structures.md new file mode 100644 index 000000000..7a07ea160 --- /dev/null +++ b/concept_structures.md @@ -0,0 +1,570 @@ +# Defining Concept Structures in Pipelex + +## Introduction + +Pipelex provides a powerful feature that allows you to define structured concepts directly within your `.plx` pipeline files using inline TOML syntax. This eliminates the need to create separate Python files for simple to moderately complex data structures, streamlining your workflow and keeping all pipeline definitions in one place. + +**Quick Example:** + +```plx +domain = "finance" +description = "Financial document processing" + +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items in the invoice", required = false } +``` + +Behind the scenes, Pipelex automatically generates a fully-typed Pydantic model that inherits from `StructuredContent`, giving you structured LLM outputs with validation—all from TOML. + +## How Inline Structures Work + +When you define a concept structure inline, Pipelex performs the following steps automatically: + +1. **Parsing**: The TOML parser reads `[concept.ConceptName.structure]` sections from your `.plx` file +2. **Blueprint Creation**: Each field definition is converted into a `ConceptStructureBlueprint` object that specifies the field's type, description, requirements, and defaults +3. **Code Generation**: The `StructureGenerator` class dynamically generates Python source code for a complete Pydantic class that inherits from `StructuredContent` +4. **Class Creation**: The generated Python code is executed to create an actual class at runtime +5. **Registration**: The new class is automatically registered in Pipelex's `class_registry`, making it available throughout your pipelines + +This entire process is transparent to you—you write TOML, and Pipelex handles the rest. + +## Syntax and Examples + +### Simple Field Definitions + +The simplest way to define a field is with a string description. This creates a required text field: + +```plx +[concept.Person] +description = "Information about a person" + +[concept.Person.structure] +name = "The person's full name" +email = "The person's email address" +``` + +### Detailed Field Definitions + +For more control, use inline tables with explicit field properties: + +```plx +[concept.Employee] +description = "Information about an employee" + +[concept.Employee.structure] +employee_id = { type = "integer", description = "Unique employee identifier", required = true } +name = { type = "text", description = "Employee's full name", required = true } +hire_date = { type = "date", description = "Date of hire", required = true } +department = { type = "text", description = "Department name", required = false, default_value = "General" } +is_active = { type = "boolean", description = "Employment status", required = false, default_value = true } +salary = { type = "number", description = "Annual salary", required = false } +``` + +### Supported Field Types + +Inline structures support the following field types: + +- **text**: String values +- **integer**: Whole numbers +- **boolean**: True/false values +- **number**: Numeric values (integers or floats) +- **date**: Date and datetime values +- **list**: Arrays/lists of items (specify `item_type`) +- **dict**: Dictionary/map structures (specify `key_type` and `value_type`) + +### Field Properties + +Each field can specify: + +- **type**: The data type (required for detailed definitions) +- **description**: Human-readable description of the field +- **required**: Whether the field is mandatory (default: `true`) +- **default_value**: Default value if not provided +- **choices**: For enum-like fields, a list of valid values +- **item_type**: For list fields, the type of list items +- **key_type** and **value_type**: For dict fields, the types of keys and values + +### Complex Type Examples + +**List Fields:** + +```plx +[concept.Project] +description = "A software project" + +[concept.Project.structure] +name = "Project name" +tags = { type = "list", item_type = "text", description = "Project tags", required = false } +team_members = { type = "list", item_type = "text", description = "Team member names", required = true } +``` + +**Dictionary Fields:** + +```plx +[concept.Configuration] +description = "Application configuration" + +[concept.Configuration.structure] +app_name = "Application name" +settings = { type = "dict", key_type = "text", value_type = "text", description = "Configuration settings", required = false } +``` + +**Choice Fields:** + +```plx +[concept.Task] +description = "A task to be completed" + +[concept.Task.structure] +title = "Task title" +priority = { choices = ["low", "medium", "high"], description = "Task priority level", required = true } +status = { choices = ["todo", "in_progress", "done"], description = "Current status", default_value = "todo" } +``` + +### Mixed Syntax Example + +You can mix simple string definitions with detailed inline tables in the same structure: + +```plx +[concept.Article] +description = "A blog article" + +[concept.Article.structure] +title = "The article title" +author = "The author's name" +word_count = { type = "integer", description = "Number of words", required = false } +published_date = { type = "date", description = "Publication date", required = true } +tags = { type = "list", item_type = "text", description = "Article tags", required = false } +is_featured = { type = "boolean", description = "Whether article is featured", default_value = false } +``` + +## Advantages of Inline Structures + +### Rapid Development + +- **Single File**: Keep concepts, structures, and pipes all in one `.plx` file +- **No Context Switching**: No need to jump between `.plx` and `.py` files +- **Quick Iteration**: Modify structures instantly without managing separate Python files + +### Simplicity + +- **Declarative Syntax**: Straightforward TOML that's easy to read and write +- **No Boilerplate**: No need for Python imports, class definitions, or field declarations +- **Automatic Registration**: Generated classes are automatically discovered and registered + +### Type Safety + +- **Pydantic Models**: Behind the scenes, you get full Pydantic v2 models +- **Runtime Validation**: Automatic validation of structured outputs from LLMs +- **Type Hints**: Generated classes include proper type annotations + +### Developer Experience + +- **Less Code to Maintain**: Fewer files, less boilerplate +- **Clear and Readable**: TOML structure definitions are self-documenting +- **Perfect for Prototyping**: Ideal for getting started quickly +- **Good for Simple to Medium Complexity**: Handles most common use cases + +## Current Limitations + +### Concept Refinement Restrictions + +Currently, inline structures can only be used for concepts that: + +- Don't refine other concepts, OR +- Refine native concepts only: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page` + +You cannot use inline structures to refine custom (non-native) concepts. This limitation may be removed in future versions. + +### Feature Constraints + +Inline structures cannot provide: + +- **Custom Methods**: No ability to define methods or computed properties +- **Complex Validation**: No custom validators or cross-field validation logic +- **Advanced Pydantic Features**: + - `@field_validator` decorators + - `@model_validator` decorators + - Custom serializers/deserializers + - `@property` methods + - Class methods or static methods +- **Nested Custom Concepts**: Cannot reference other custom concepts as field types (coming soon - see roadmap below) +- **Inheritance Hierarchies**: Cannot create class inheritance beyond the base `StructuredContent` + +### Tooling Limitations + +- **IDE Support**: Limited autocomplete compared to explicit Python classes +- **Static Type Checking**: Type checkers like `mypy` or `pyright` won't validate inline structures as thoroughly (static code generation coming soon - see roadmap below) +- **Refactoring**: Less IDE refactoring support for inline structures +- **Documentation**: No docstrings or inline documentation beyond descriptions + +## Future Roadmap + +The Pipelex team is actively working on enhancing inline structures with powerful new capabilities: + +### Nested Custom Concepts (Coming Soon) + +Currently, inline structures only support native types and references to native concepts. Soon, you'll be able to reference other custom concepts as field types: + +```plx +[concept.Address] +description = "A postal address" + +[concept.Address.structure] +street = "Street address" +city = "City name" +postal_code = "Postal or ZIP code" + +[concept.Company] +description = "A company with an address" + +[concept.Company.structure] +name = "Company name" +headquarters = { type = "Address", description = "Company headquarters address", required = true } +``` + +This will enable building complex, nested data models entirely within `.plx` files. + +## When to Use Explicit Python Classes + +While inline structures are convenient, there are scenarios where creating an explicit Python `StructuredContent` class is the better choice. + +### Use Python Classes When You Need: + +#### 1. Complex Validation Logic + +When your data requires custom validation that goes beyond field types: + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + total_amount: float = Field(ge=0, description="Total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + net_amount: float = Field(ge=0, description="Net amount before tax") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v + + @field_validator('net_amount') + @classmethod + def validate_net_amount(cls, v, info): + """Verify net_amount + tax_amount = total_amount.""" + total = info.data.get('total_amount', 0) + tax = info.data.get('tax_amount', 0) + expected = total - tax + if abs(v - expected) > 0.01: # Allow small floating point differences + raise ValueError(f'Net amount should be {expected}, got {v}') + return v +``` + +#### 2. Computed Properties + +When you need derived values or methods: + +```python +from datetime import datetime +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class Subscription(StructuredContent): + """A subscription with computed properties.""" + + start_date: datetime = Field(description="Subscription start date") + end_date: datetime = Field(description="Subscription end date") + monthly_price: float = Field(ge=0, description="Monthly subscription price") + + @property + def duration_days(self) -> int: + """Calculate subscription duration in days.""" + return (self.end_date - self.start_date).days + + @property + def total_cost(self) -> float: + """Calculate total subscription cost.""" + months = self.duration_days / 30.0 + return months * self.monthly_price + + def is_active_on(self, date: datetime) -> bool: + """Check if subscription is active on a given date.""" + return self.start_date <= date <= self.end_date +``` + +#### 3. Reusability Across Domains + +When the structure needs to be shared: + +```python +# shared_models.py +from typing import Optional +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class Address(StructuredContent): + """A reusable address structure.""" + + street: str = Field(description="Street address") + city: str = Field(description="City name") + state: str = Field(description="State or province") + postal_code: str = Field(description="Postal/ZIP code") + country: str = Field(default="USA", description="Country") + +# Can now be imported and used in multiple domains/projects +``` + +#### 4. Advanced Type Features + +When you need sophisticated typing: + +```python +from typing import Union, Literal, Optional +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class ApiResponse(StructuredContent): + """A flexible API response structure.""" + + status: Literal["success", "error", "pending"] + data: Optional[dict] = Field(default=None, description="Response data") + error_message: Optional[str] = Field(default=None, description="Error details if status is error") + metadata: dict[str, Union[str, int, float]] = Field(default_factory=dict, description="Additional metadata") +``` + +#### 5. Better Developer Experience + +When your team prefers: + +- Full IDE autocomplete and type hints +- Static type checking with `mypy` or `pyright` +- Comprehensive docstrings +- Explicit, self-documenting code +- Better refactoring support + +## Using AI Agents to Create Python Classes + +One of the major advantages of the modern development workflow is that **AI coding assistants make creating Python classes nearly as fast as writing TOML**. Tools like Cursor AI, GitHub Copilot, and other AI-powered IDEs understand Pipelex patterns and can generate proper `StructuredContent` classes instantly. + +### The Best of Both Worlds + +You don't have to choose between inline structures and Python classes from the start. Instead, follow this pragmatic approach: + +1. **Prototype Fast**: Start with inline structures for rapid development +2. **Validate Quickly**: Test your pipelines and iterate on the structure +3. **Upgrade When Needed**: When complexity grows, convert to Python classes +4. **Let AI Help**: Use AI assistants to generate the Python code automatically + +### Example Workflow + +**Step 1: Start with inline structure** + +```plx +[concept.UserProfile] +description = "A user profile" + +[concept.UserProfile.structure] +username = "The user's username" +email = "The user's email address" +age = { type = "integer", description = "User's age", required = false } +``` + +**Step 2: Run and test your pipeline** + +Iterate quickly, adjusting the structure as needed. + +**Step 3: When you need validation, ask your AI assistant** + +> "Convert this inline UserProfile structure to a Python StructuredContent class with email validation" + +**Step 4: AI generates the class** + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator +from typing import Optional +import re + +class UserProfile(StructuredContent): + """A user profile with validation.""" + + username: str = Field(description="The user's username") + email: str = Field(description="The user's email address") + age: Optional[int] = Field(default=None, description="User's age") + + @field_validator('email') + @classmethod + def validate_email(cls, v): + """Validate email format.""" + pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + if not re.match(pattern, v): + raise ValueError('Invalid email format') + return v + + @field_validator('age') + @classmethod + def validate_age(cls, v): + """Validate age range.""" + if v is not None and (v < 0 or v > 150): + raise ValueError('Age must be between 0 and 150') + return v +``` + +**Step 5: Update your .plx file** + +```plx +[concept] +UserProfile = "A user profile" # Structure now defined in Python +``` + +The Python class is automatically discovered and registered by Pipelex. + +### AI Assistant Capabilities + +Modern AI coding assistants can: + +- Generate complete `StructuredContent` classes from descriptions +- Add appropriate validators and validation logic +- Convert inline TOML structures to Python classes +- Suggest improvements and best practices +- Handle complex type annotations +- Add docstrings and documentation +- Follow Pydantic v2 patterns + +This means you get the **development speed of inline structures** with the **power and flexibility of Python classes** when you need them. + +## Migration Path + +### From Inline Structure to Python Class + +Here's a step-by-step guide to migrate from inline structures to explicit Python classes: + +**1. Identify the concept to migrate** + +Let's say you have this inline structure: + +```plx +domain = "ecommerce" + +[concept.Product] +description = "A product in the catalog" + +[concept.Product.structure] +product_id = { type = "integer", description = "Unique product ID", required = true } +name = "Product name" +price = { type = "number", description = "Product price", required = true } +in_stock = { type = "boolean", description = "Stock availability", default_value = true } +categories = { type = "list", item_type = "text", description = "Product categories", required = false } +``` + +**2. Create a Python file for structures** + +Create `ecommerce_struct.py` in your project: + +```python +from typing import Optional, List +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class Product(StructuredContent): + """A product in the catalog.""" + + product_id: int = Field(description="Unique product ID") + name: str = Field(description="Product name") + price: float = Field(ge=0, description="Product price") + in_stock: bool = Field(default=True, description="Stock availability") + categories: Optional[List[str]] = Field(default=None, description="Product categories") +``` + +**3. Remove the inline structure from .plx** + +Update your `.plx` file: + +```plx +domain = "ecommerce" + +[concept] +Product = "A product in the catalog" + +# Structure section removed - now defined in ecommerce_struct.py +``` + +**4. Verify automatic discovery** + +Pipelex automatically discovers and registers `StructuredContent` classes. No manual registration needed. + +**5. Test your pipeline** + +Run your pipeline to ensure everything works. The behavior should be identical, but now you have the flexibility to add custom logic. + +**6. Add enhancements (optional)** + +Now you can add validators, computed properties, or other Python features: + +```python +from typing import Optional, List +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Product(StructuredContent): + """A product in the catalog.""" + + product_id: int = Field(description="Unique product ID") + name: str = Field(description="Product name") + price: float = Field(ge=0, description="Product price") + in_stock: bool = Field(default=True, description="Stock availability") + categories: Optional[List[str]] = Field(default=None, description="Product categories") + + @field_validator('price') + @classmethod + def validate_price(cls, v): + """Ensure price is positive and reasonable.""" + if v < 0: + raise ValueError('Price cannot be negative') + if v > 1_000_000: + raise ValueError('Price seems unreasonably high') + return v + + @property + def display_price(self) -> str: + """Format price for display.""" + return f"${self.price:.2f}" +``` + +## Recommendation: Start Simple, Grow as Needed + +The inline structure feature is a **practical solution for the majority of use cases**. It allows you to: + +- Get started quickly without Python overhead +- Keep all pipeline logic in one place +- Iterate rapidly during development +- Still get full type safety and validation + +When your needs grow beyond what inline structures can provide, **explicit Python `StructuredContent` classes offer more power and flexibility**. With AI coding assistants, creating these classes is fast and easy, giving you the best of both worlds. + +**Guidelines:** + +- ✅ **Use inline structures** for straightforward data models +- ✅ **Use inline structures** during prototyping and early development +- ✅ **Use inline structures** for domain-specific models with simple validation +- ✅ **Use Python classes** when you need custom validation logic +- ✅ **Use Python classes** for reusable, shared data models +- ✅ **Use Python classes** when you need computed properties or methods +- ✅ **Use Python classes** for complex type relationships + +Remember: You can always start with inline structures and migrate to Python classes later. The migration is straightforward, and AI assistants can help you make the transition quickly. + diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md index 3456034f9..56b64c95d 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md @@ -4,34 +4,120 @@ Pipelex provides powerful tools to automatically generate complete, working pipe ## Overview -The pipeline creation system creates a fully working pipeline that has been both statically and dynamically validated. The system automatically handles all aspects of pipeline generation, from understanding requirements to producing executable code. +The pipeline creation system can generate pipelines in different modes depending on your needs - from quick one-shot generation to validated, production-ready pipelines that have been automatically tested and fixed. ## Core Commands -### Build Blueprint +### Build Pipe (Recommended) -Generate a complete pipeline blueprint from requirements: +Generate a validated pipeline with automatic fixing of deterministic issues: ```bash -pipelex build blueprint "BRIEF IN NATURAL LANGUAGE" [OPTIONS] +pipelex build pipe "BRIEF IN NATURAL LANGUAGE" [OPTIONS] ``` +This command runs a validation/fix loop to ensure the generated pipeline is correct and runnable. It automatically detects and corrects common issues. + **Example:** + ```bash -pipelex build blueprint "Take a photo as input, and render the opposite of the photo" \ - -o output/pipeline/file/path +pipelex build pipe "Take a photo as input, and render the opposite of the photo" \ + -o results/photo_inverter.plx ``` **Options:** -- `--output, -o`: Output path for generated files -## Complete Workflow +- `--output, -o`: Output path for generated PLX file (default: `./results/generated_pipeline.plx`) +- `--no-output`: Skip saving the pipeline to file + +### Build One-Shot (Fast) + +Generate a pipeline quickly without validation: + +```bash +pipelex build one-shot "BRIEF IN NATURAL LANGUAGE" [OPTIONS] +``` + +This command generates the pipeline in a single pass without validation. It's faster but may produce pipelines that need manual fixes. + +**Example:** + +```bash +pipelex build one-shot "Extract invoice data from PDF documents" \ + -o results/invoice_extractor.plx +``` + +**Use when:** You want to quickly iterate on ideas or plan to manually review/modify the pipeline. -### 1. Requirements Analysis +### Build Partial (Debug) -Start with clear, specific requirements: +Generate a partial pipeline specification as JSON for debugging: -```text -Take a photo as input, and render the opposite of the photo +```bash +pipelex build partial "BRIEF IN NATURAL LANGUAGE" [OPTIONS] ``` +This outputs the internal pipeline specification in JSON format, useful for debugging and understanding how Pipelex interprets your requirements. + +**Example:** + +```bash +pipelex build partial "Analyze sentiment from customer reviews" \ + -o results/debug_spec.json +``` + +## Quick Start Example + +The simplest way to create a pipeline is to use the `build pipe` command with a clear description: + +```bash +pipelex build pipe "Given an expense report, apply company rules and validate compliance" +``` + +This will: + +1. Analyze your requirements +2. Generate a complete pipeline with appropriate concepts and pipes +3. Validate the pipeline for correctness +4. Fix any deterministic issues automatically +5. Save the working pipeline to `./results/generated_pipeline.plx` + +## Best Practices + +When creating pipelines with natural language: + +**Be Specific About Inputs and Outputs:** + +- ✅ Good: "Take a PDF invoice as input and extract the total amount, vendor name, and date" +- ❌ Vague: "Process invoices" + +**Describe the Transformation:** + +- ✅ Good: "Analyze sentiment of customer reviews and categorize as positive, negative, or neutral" +- ❌ Vague: "Do something with reviews" + +**Mention Data Types When Relevant:** + +- ✅ Good: "Extract text from a PDF, then summarize it into 3 bullet points" +- ❌ Unclear: "Summarize documents" + +## What Gets Generated + +When you run a build command, Pipelex automatically creates: + +- **Domain definition**: The namespace for your pipeline +- **Concepts**: Structured data types for inputs and outputs +- **Pipes**: The processing steps and LLM operations +- **Python structures**: When structured output is needed (saved alongside the `.plx` file with `_struct.py` suffix) + +All generated pipelines follow Pipelex best practices and conventions automatically. + +## Next Steps + +After generating your pipeline: + +1. **Review the generated `.plx` file** to understand the structure +2. **Test the pipeline** using the generated example code +3. **Iterate if needed** by modifying the natural language description and regenerating +4. **Customize** the pipeline by editing the `.plx` file directly for fine-tuning + diff --git a/docs/pages/configuration/config-technical/library-config.md b/docs/pages/configuration/config-technical/library-config.md index 3273cf5bf..e919431d6 100644 --- a/docs/pages/configuration/config-technical/library-config.md +++ b/docs/pages/configuration/config-technical/library-config.md @@ -283,15 +283,3 @@ pipelex show pipe YOUR_PIPE_CODE 2. Check concept references are spelled correctly 3. Verify pipe configurations match expected format 4. Ensure all required fields are present - -## Migration from Old System - -If you're migrating from the old `pipelex_libraries` system, see the [Migration Guide](https://github.com/Pipelex/pipelex/blob/main/no_more_pipelex_libraries.md) for detailed instructions. - -Key changes: - -- No `pipelex init libraries` command needed -- No `pipelex_libraries` directory required -- No `-c/--config-folder-path` flags needed -- Structure classes are auto-discovered -- Custom functions need `@pipe_func()` decorator diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index 9a57645ab..04f01a9ac 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -7,7 +7,6 @@ from pipelex import pretty_print from pipelex.builder.builder import PipelexBundleSpec from pipelex.builder.builder_loop import BuilderLoop -from pipelex.builder.flow_factory import FlowFactory from pipelex.hub import get_report_delegate from pipelex.language.plx_factory import PlxFactory from pipelex.pipelex import Pipelex @@ -15,7 +14,7 @@ from pipelex.tools.misc.file_utils import ensure_directory_for_file_path, save_text_to_path from pipelex.tools.misc.json_utils import save_as_json_to_path -build_app = typer.Typer(help="Build artifacts like pipelines", no_args_is_help=True) +build_app = typer.Typer(help="Build working pipelines from natural language requirements", no_args_is_help=True) """ Today's example: diff --git a/pipelex/kit/agent_rules/python_standards.md b/pipelex/kit/agent_rules/python_standards.md index 6c6930a3e..8471ef3be 100644 --- a/pipelex/kit/agent_rules/python_standards.md +++ b/pipelex/kit/agent_rules/python_standards.md @@ -1,4 +1,4 @@ -# Coding Standards & Best Practices +# Coding Standards & Best Practices for Python Code This document outlines the core coding standards, best practices, and quality control procedures for the codebase. @@ -9,20 +9,34 @@ This document outlines the core coding standards, best practices, and quality co - Every function parameter must be typed - Every function return must be typed - Use type hints for all variables where type is not obvious - - Use dict, list, tupele types with lowercase first letter: dict[], list[], tuple[] + - Use dict, list, tuple types with lowercase first letter: dict[], list[], tuple[] - Use type hints for all fields - - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals - - Use Field(default_factory=...) for mutable defaults and if it's a list of something else than str, use `empty_list_factory_of()` to make a factory: `number_list: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers")` - - Use `BaseModel` and respect Pydantic v2 standards, in particular use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose + - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals (not `Optional[]`) + - Use Field(default_factory=...) for mutable defaults + +2. **BaseModel / Pydantic Standards** -2. **StrEnum** + - Use `BaseModel` and respect Pydantic v2 standards + - Use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` + - Keep models focused and single-purpose + - For list fields with non-string items in BaseModels, use `empty_list_factory_of()` to avoid linter complaints: + ```python + from pydantic import BaseModel, Field + from pipelex.tools.typing.pydantic_utils import empty_list_factory_of + + class MyModel(BaseModel): + names: list[str] = Field(default_factory=list) # OK for strings + numbers: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers") + items: list[MyItem] = Field(default_factory=empty_list_factory_of(MyItem), description="A list of items") + ``` + +3. **StrEnum** - Import from `pipelex.types`: ```python from pipelex.types import StrEnum ``` -3. **Self type** +4. **Self type** - Import from `pipelex.types`: ```python from pipelex.types import Self @@ -55,7 +69,7 @@ This document outlines the core coding standards, best practices, and quality co 1. **Docstring Format** ```python - def process_image(image_path: str, size: Tuple[int, int]) -> bytes: + def process_image(image_path: str, size: tuple[int, int]) -> bytes: """Process and resize an image. Args: @@ -114,1193 +128,10 @@ Always fix any issues reported by these tools before proceeding. ## Pipelines -- All pipeline definitions go in `pipelex/libraries/pipelines/` - Always validate pipelines after creation/edit with `make validate`. Iterate if there are errors. ## Project Structure -- **Pipelines**: `pipelex/libraries/pipelines/` - **Tests**: `tests/` directory - **Documentation**: `docs/` directory - ---- - -# Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -## Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures -- Use descriptive names in `snake_case` - -## Pipeline File Structure -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -### Pipe Definitions - -## Pipe Base Structure - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -## Structuring Models - -### Model Location and Registration - -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` -- Models must inherit from `StructuredContent` or appropriate content type - -## Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -## Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -## PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -### Basic Structure -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -## PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -## PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -## PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -## PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -# Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -## PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -## PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - ---- - -# Guide to write an example to execute a pipeline - -## Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -# start Pipelex -Pipelex.make() -# run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -## Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -# start Pipelex -Pipelex.make() - -# run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -## Setting up the input memory - -### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -# Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -## Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Use descriptive names that match the functionality being tested -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "extract: uses text/image extraction from documents" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure - -Always group the tests of a module into a test class: - -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` - -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. - -## Writing integration test to test pipes - -### Required imports for pipe tests - -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol -``` - -### Pipe test implementation steps - -1. Create Stuff from blueprint: - -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` - -2. Create Working Memory: - -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) -``` - -3. Run the pipe: - -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` - -4. Basic assertions: - -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None -``` - -### Test Data Organization - -- If it's not already there, create a `test_data.py` file in the test directory -- Define test cases using `StuffBlueprint`: - -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) - - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] -``` - -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. - -## Best Practices for Testing - -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose -- Log outputs for debugging -- Generate reports for cost tracking diff --git a/pipelex/kit/agent_rules/run_pipelex.md b/pipelex/kit/agent_rules/run_pipelex.md index 7fe22547f..207f6404d 100644 --- a/pipelex/kit/agent_rules/run_pipelex.md +++ b/pipelex/kit/agent_rules/run_pipelex.md @@ -37,8 +37,9 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent -from pipelex_libraries.pipelines.examples.extract_gantt.gantt import GanttChart +from my_project.gantt.gantt_struct import GanttChart SAMPLE_NAME = "extract_gantt" IMAGE_URL = "assets/gantt/gantt_tree_house.png" diff --git a/pipelex/kit/agent_rules/write_pipelex.md b/pipelex/kit/agent_rules/write_pipelex.md index f79c85f79..e4bde77de 100644 --- a/pipelex/kit/agent_rules/write_pipelex.md +++ b/pipelex/kit/agent_rules/write_pipelex.md @@ -6,10 +6,10 @@ ## Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for structures +- Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` -## Pipeline File Structure +## Pipeline File Outline A pipeline file has three main sections: 1. Domain statement 2. Concept definitions @@ -36,7 +36,7 @@ Important Rules: yes ### Pipe Definitions -## Pipe Base Structure +## Pipe Base Definition ```plx [pipe.your_pipe_name] @@ -83,8 +83,9 @@ inputs = { ### Model Location and Registration -- Create models for structured generations related to "some_domain" in `pipelex_libraries/pipelines/.py` +- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) - Models must inherit from `StructuredContent` or appropriate content type +- Structure classes are automatically discovered by Pipelex - no manual registration needed ## Model Structure @@ -136,7 +137,7 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. -Structure classes defined within `pipelex_libraries/pipelines/` are automatically loaded into the class_registry when setting up Pipelex, no need to do it manually. +Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. ### Best Practices for structures @@ -166,7 +167,7 @@ Look at the Pipes we have in order to adapt it. Pipes are organized in two categ Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. -### Basic Structure +### Basic Definition ```plx [pipe.your_sequence_name] type = "PipeSequence" diff --git a/pipelex/kit/cursor_export.py b/pipelex/kit/cursor_export.py index c0490d48f..ea72f6aaa 100644 --- a/pipelex/kit/cursor_export.py +++ b/pipelex/kit/cursor_export.py @@ -36,10 +36,13 @@ def _front_matter_for(name: str, idx: KitIndex) -> dict[str, Any]: Returns: Merged front-matter dictionary """ - base = dict(idx.agent_rules.cursor.front_matter) + base = idx.agent_rules.cursor.front_matter.copy() key = name.removesuffix(".md") if key in idx.agent_rules.cursor.files: base |= idx.agent_rules.cursor.files[key].front_matter + # Remove globs if it's an empty list + if "globs" in base and base["globs"] == []: + del base["globs"] return base diff --git a/pipelex/kit/index_models.py b/pipelex/kit/index_models.py index bd4eeddf9..75c3c8ced 100644 --- a/pipelex/kit/index_models.py +++ b/pipelex/kit/index_models.py @@ -2,23 +2,25 @@ from typing import Any -from pydantic import BaseModel, Field +from pydantic import Field +from pipelex.config import ConfigModel -class CursorFileOverride(BaseModel): + +class CursorFileOverride(ConfigModel): """Per-file front-matter overrides for Cursor export.""" front_matter: dict[str, Any] = Field(default_factory=dict, description="Front-matter to override for this file") -class CursorSpec(BaseModel): +class CursorSpec(ConfigModel): """Configuration for Cursor rules export.""" front_matter: dict[str, Any] = Field(default_factory=dict, description="Default YAML front-matter for all Cursor files") files: dict[str, CursorFileOverride] = Field(default_factory=dict, description="Per-file front-matter overrides") -class Target(BaseModel): +class Target(ConfigModel): """Configuration for a single-file merge target.""" path: str = Field(description="Path to the target file relative to repo root") @@ -28,7 +30,7 @@ class Target(BaseModel): heading_1: str | None = Field(default=None, description="Main title (H1) to add when inserting into empty file or file with no H1 headings") -class AgentRules(BaseModel): +class AgentRules(ConfigModel): """Configuration for merging agent documentation files.""" sets: dict[str, list[str]] = Field(description="Named sets of agent_rules files (e.g., coding_standards, pipelex_language, all)") @@ -38,7 +40,7 @@ class AgentRules(BaseModel): targets: dict[str, Target] = Field(description="Dictionary of single-file merge targets keyed by ID") -class KitIndex(BaseModel): +class KitIndex(ConfigModel): """Root configuration model for kit index.toml.""" meta: dict[str, Any] = Field(default_factory=dict, description="Metadata about the kit configuration") diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index 8ec30b83b..6d9fef8e1 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -307,9 +307,6 @@ def make(cls) -> Self: Raises: if setup fails - Note: - If neither path is provided, defaults to "./pipelex_libraries". - """ if cls.get_optional_instance() is not None: msg = "Pipelex is already initialized" From b59ec194a3e661f05cacbc70057c718b41159f2a Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 01:39:56 +0200 Subject: [PATCH 060/115] Remove rules --- .cursor/rules/docs.mdc | 14 - .cursor/rules/llms.mdc | 85 --- .cursor/rules/pytest_standards.mdc | 164 ----- .cursor/rules/python_standards.mdc | 143 ---- .cursor/rules/run_pipelex.mdc | 231 ------ .cursor/rules/tdd.mdc | 28 - .cursor/rules/write_pipelex.mdc | 801 -------------------- .github/copilot-instructions.md | 1099 ---------------------------- .windsurfrules.md | 1099 ---------------------------- AGENTS.md | 1099 ---------------------------- BLACKBOX_RULES.md | 1099 ---------------------------- CLAUDE.md | 1099 ---------------------------- pipelex/cli/commands/kit_cmd.py | 54 +- pipelex/kit/cursor_export.py | 30 + pipelex/kit/targets_update.py | 109 ++- 15 files changed, 182 insertions(+), 6972 deletions(-) delete mode 100644 .cursor/rules/docs.mdc delete mode 100644 .cursor/rules/llms.mdc delete mode 100644 .cursor/rules/pytest_standards.mdc delete mode 100644 .cursor/rules/python_standards.mdc delete mode 100644 .cursor/rules/run_pipelex.mdc delete mode 100644 .cursor/rules/tdd.mdc delete mode 100644 .cursor/rules/write_pipelex.mdc delete mode 100644 .github/copilot-instructions.md delete mode 100644 .windsurfrules.md delete mode 100644 AGENTS.md delete mode 100644 BLACKBOX_RULES.md delete mode 100644 CLAUDE.md diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc deleted file mode 100644 index 1400c5cd6..000000000 --- a/.cursor/rules/docs.mdc +++ /dev/null @@ -1,14 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing documentation -globs: -- docs/**/*.md ---- -Write docs and answer questions about writing docs. - -We use Material for MkDocs. All markdown in our docs must be compatible with Material for MkDocs and done using best practices to get the best results with Material for MkDocs. - -## MkDocs Markdown Requirements - -- Always add a blank line before any bullet lists or numbered lists in MkDocs markdown. - diff --git a/.cursor/rules/llms.mdc b/.cursor/rules/llms.mdc deleted file mode 100644 index a21831a30..000000000 --- a/.cursor/rules/llms.mdc +++ /dev/null @@ -1,85 +0,0 @@ ---- -alwaysApply: false -description: LLM configuration and usage guidelines -globs: -- '*.plx' -- '*.toml' ---- -# Rules to choose LLM models used in PipeLLMs. - -## LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -## LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -## Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -## LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc deleted file mode 100644 index 9b5df04a6..000000000 --- a/.cursor/rules/pytest_standards.mdc +++ /dev/null @@ -1,164 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing unit tests -globs: -- tests/**/*.py ---- -# Writing unit tests - -## Unit test generalities - -NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. - -### Test file structure - -- Name test files with `test_` prefix -- Use descriptive names that match the functionality being tested -- Place test files in the appropriate test category directory: - - `tests/unit/` - for unit tests that test individual functions/classes in isolation - - `tests/integration/` - for integration tests that test component interactions - - `tests/e2e/` - for end-to-end tests that test complete workflows - - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) -- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest -- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. -- Always put test inside Test classes. -- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` - -### Markers - -Apply the appropriate markers: -- "llm: uses an LLM to generate text or objects" -- "img_gen: uses an image generation AI" -- "extract: uses text/image extraction from documents" -- "inference: uses either an LLM or an image generation AI" -- "gha_disabled: will not be able to run properly on GitHub Actions" - -Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. - -### Important rules - -- Never use the unittest.mock. Use pytest-mock. - -### Test Class Structure - -Always group the tests of a module into a test class: - -```python -@pytest.mark.llm -@pytest.mark.inference -@pytest.mark.asyncio(loop_scope="class") -class TestFooBar: - @pytest.mark.parametrize( - "topic test_case_blueprint", - [ - TestCases.CASE_1, - TestCases.CASE_2, - ], - ) - async def test_pipe_processing( - self, - request: FixtureRequest, - topic: str, - test_case_blueprint: StuffBlueprint, - ): - # Test implementation -``` - -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. - -## Writing integration test to test pipes - -### Required imports for pipe tests - -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol -``` - -### Pipe test implementation steps - -1. Create Stuff from blueprint: - -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` - -2. Create Working Memory: - -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) -``` - -3. Run the pipe: - -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` - -4. Basic assertions: - -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None -``` - -### Test Data Organization - -- If it's not already there, create a `test_data.py` file in the test directory -- Define test cases using `StuffBlueprint`: - -```python -class TestCases: - CASE_BLUEPRINT_1 = StuffBlueprint( - name="test_case_1", - concept_code="domain.ConceptName1", - value="test_value" - ) - CASE_BLUEPRINT_2 = StuffBlueprint( - name="test_case_2", - concept_code="domain.ConceptName2", - value="test_value" - ) - - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" - ("topic1", CASE_BLUEPRINT_1), - ("topic2", CASE_BLUEPRINT_2), - ] -``` - -Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. -Also note that we provide a topic for the test case, which is purely for convenience. - -## Best Practices for Testing - -- Use parametrize for multiple test cases -- Test both success and failure cases -- Verify working memory state -- Check output structure and content -- Use meaningful test case names -- Include docstrings explaining test purpose -- Log outputs for debugging -- Generate reports for cost tracking diff --git a/.cursor/rules/python_standards.mdc b/.cursor/rules/python_standards.mdc deleted file mode 100644 index 150864d1e..000000000 --- a/.cursor/rules/python_standards.mdc +++ /dev/null @@ -1,143 +0,0 @@ ---- -alwaysApply: false -description: Python coding standards and best practices -globs: -- '**/*.py' ---- -# Coding Standards & Best Practices for Python Code - -This document outlines the core coding standards, best practices, and quality control procedures for the codebase. - -## Type Hints - -1. **Always Use Type Hints** - - - Every function parameter must be typed - - Every function return must be typed - - Use type hints for all variables where type is not obvious - - Use dict, list, tuple types with lowercase first letter: dict[], list[], tuple[] - - Use type hints for all fields - - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals (not `Optional[]`) - - Use Field(default_factory=...) for mutable defaults - -2. **BaseModel / Pydantic Standards** - - - Use `BaseModel` and respect Pydantic v2 standards - - Use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` - - Keep models focused and single-purpose - - For list fields with non-string items in BaseModels, use `empty_list_factory_of()` to avoid linter complaints: - ```python - from pydantic import BaseModel, Field - from pipelex.tools.typing.pydantic_utils import empty_list_factory_of - - class MyModel(BaseModel): - names: list[str] = Field(default_factory=list) # OK for strings - numbers: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers") - items: list[MyItem] = Field(default_factory=empty_list_factory_of(MyItem), description="A list of items") - ``` - -3. **StrEnum** - - Import from `pipelex.types`: - ```python - from pipelex.types import StrEnum - ``` - -4. **Self type** - - Import from `pipelex.types`: - ```python - from pipelex.types import Self - ``` - -## Factory Pattern - - - Use Factory Pattern for object creation when dealing with multiple implementations - - Our factory methods are named `make_from_...` and such - -## Error Handling - - - Always catch exceptions at the place where you can add useful context to it. - - Use try/except blocks with specific exceptions - - Convert third-party exceptions to our custom ones - - Never catch Exception, only catch specific exceptions - - Always add `from exc` to the exception - - ```python - try: - self.models_manager.setup() - except RoutingProfileLibraryNotFoundError as exc: - msg = "The routing library could not be found, please call `pipelex init config` to create it" - raise PipelexSetupError(msg) from exc - ``` - - **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. - -## Documentation - -1. **Docstring Format** - ```python - def process_image(image_path: str, size: tuple[int, int]) -> bytes: - """Process and resize an image. - - Args: - image_path: Path to the source image - size: Tuple of (width, height) for resizing - - Returns: - Processed image as bytes - """ - pass - ``` - -2. **Class Documentation** - ```python - class ImageProcessor: - """Handles image processing operations. - - Provides methods for resizing, converting, and optimizing images. - """ - ``` - -## Code Quality Checks - -### Linting and Type Checking - -Before finalizing a task, run: -```bash -make fix-unused-imports -make check -``` - -This runs multiple code quality tools: -- Pyright: Static type checking -- Ruff: Fast Python linter -- Mypy: Static type checker - -Always fix any issues reported by these tools before proceeding. - -### Running Tests - -1. **Quick Test Run** (no LLM/image generation): - ```bash - make tp - ``` - Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` - -2. **Specific Tests**: - ```bash - make tp TEST=TestClassName - # or - make tp TEST=test_function_name - ``` - Note: Matches names starting with the provided string. - -**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. - -## Pipelines - -- Always validate pipelines after creation/edit with `make validate`. - Iterate if there are errors. - -## Project Structure - -- **Tests**: `tests/` directory -- **Documentation**: `docs/` directory diff --git a/.cursor/rules/run_pipelex.mdc b/.cursor/rules/run_pipelex.mdc deleted file mode 100644 index cb619d261..000000000 --- a/.cursor/rules/run_pipelex.mdc +++ /dev/null @@ -1,231 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for running Pipelex pipelines -globs: -- examples/**/*.py ---- -# Guide to execute a pipeline and write example code - -## Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -# start Pipelex -Pipelex.make() -# run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -## Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -# start Pipelex -Pipelex.make() - -# run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -## Setting up the input memory - -### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -# Here we have a single input and it's a PDF. -# Because PDFContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -# Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -## Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.cursor/rules/tdd.mdc b/.cursor/rules/tdd.mdc deleted file mode 100644 index 4b4f058b5..000000000 --- a/.cursor/rules/tdd.mdc +++ /dev/null @@ -1,28 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing test-driven development code ---- -# Test-Driven Development Guide - -This document outlines our test-driven development (TDD) process and the tools available for testing. - -## TDD Cycle - -1. **Write a Test First** -[pytest.mdc](pytest.mdc) - -2. **Write the Code** - - Implement the minimum amount of code needed to pass the test - - Follow the project's coding standards - - Keep it simple - don't write more than needed - -3. **Run Linting and Type Checking** -[coding_standards.mdc](coding_standards.mdc) - -4. **Refactor if needed** -If the code needs refactoring, with the best practices [coding_standards.mdc](coding_standards.mdc) - -5. **Validate tests** - -Remember: The key to TDD is writing the test first and letting it drive your implementation. Always run the full test suite and quality checks before considering a feature complete. - diff --git a/.cursor/rules/write_pipelex.mdc b/.cursor/rules/write_pipelex.mdc deleted file mode 100644 index 23c0c5f8e..000000000 --- a/.cursor/rules/write_pipelex.mdc +++ /dev/null @@ -1,801 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing Pipelex pipelines -globs: -- '**/*.plx' -- '**/pipelines/**/*.py' ---- -# Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -## Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -## Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -### Pipe Definitions - -## Pipe Base Definition - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -## Structuring Models - -### Model Location and Registration - -- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) -- Models must inherit from `StructuredContent` or appropriate content type -- Structure classes are automatically discovered by Pipelex - no manual registration needed - -## Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -## Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -## PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -## PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -## PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -## PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -## PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -# Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -## PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -## PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index 32ec89f0e..000000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,1099 +0,0 @@ - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -### Structuring Models - -#### Model Location and Registration - -- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) -- Models must inherit from `StructuredContent` or appropriate content type -- Structure classes are automatically discovered by Pipelex - no manual registration needed - -### Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -#### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -#### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a PDF. -## Because PDFContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/.windsurfrules.md b/.windsurfrules.md deleted file mode 100644 index 32ec89f0e..000000000 --- a/.windsurfrules.md +++ /dev/null @@ -1,1099 +0,0 @@ - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -### Structuring Models - -#### Model Location and Registration - -- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) -- Models must inherit from `StructuredContent` or appropriate content type -- Structure classes are automatically discovered by Pipelex - no manual registration needed - -### Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -#### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -#### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a PDF. -## Because PDFContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 32ec89f0e..000000000 --- a/AGENTS.md +++ /dev/null @@ -1,1099 +0,0 @@ - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -### Structuring Models - -#### Model Location and Registration - -- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) -- Models must inherit from `StructuredContent` or appropriate content type -- Structure classes are automatically discovered by Pipelex - no manual registration needed - -### Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -#### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -#### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a PDF. -## Because PDFContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/BLACKBOX_RULES.md b/BLACKBOX_RULES.md deleted file mode 100644 index 32ec89f0e..000000000 --- a/BLACKBOX_RULES.md +++ /dev/null @@ -1,1099 +0,0 @@ - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -### Structuring Models - -#### Model Location and Registration - -- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) -- Models must inherit from `StructuredContent` or appropriate content type -- Structure classes are automatically discovered by Pipelex - no manual registration needed - -### Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -#### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -#### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a PDF. -## Because PDFContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 32ec89f0e..000000000 --- a/CLAUDE.md +++ /dev/null @@ -1,1099 +0,0 @@ - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural langage, then transcribe it in pipelex. -- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_name" -description = "Description of the domain" # Optional -``` -Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions -```plx -[concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output -``` - -Important Rules: -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_name] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). -So If you have this error: -`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -### Structuring Models - -#### Model Location and Registration - -- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) -- Models must inherit from `StructuredContent` or appropriate content type -- Structure classes are automatically discovered by Pipelex - no manual registration needed - -### Model Structure - -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. - -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** - -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` - -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: -```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from typing import List, Optional -from pydantic import Field - -from pipelex.core.stuffs.structured_content import StructuredContent - -## IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int - - # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) - - # Date fields should remove timezone - date_field: Optional[datetime] = None -``` -#### Usage - -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. - -Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. - - -#### Best Practices for structures - -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -nb_output = 3 # Generate exactly 3 ideas -``` - -Generate multiple outputs (variable number): -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea" -multiple_output = true # Let the LLM decide how many to generate -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "PDF" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters - -- `template`: Inline template string (mutually exclusive with template_name) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -nb_output = 3 -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `nb_output`: Number of images to generate -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.tools.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - input_memory={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a PDF. -## Because PDFContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - input_memory={ - "document": PDFContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - input_memory={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - input_memory={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - input_memory={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extarct any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_reason = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/pipelex/cli/commands/kit_cmd.py b/pipelex/cli/commands/kit_cmd.py index 9c110ca45..1f68b7c06 100644 --- a/pipelex/cli/commands/kit_cmd.py +++ b/pipelex/cli/commands/kit_cmd.py @@ -6,10 +6,10 @@ from typing_extensions import Annotated from pipelex.exceptions import PipelexCLIError -from pipelex.kit.cursor_export import export_cursor_rules +from pipelex.kit.cursor_export import export_cursor_rules, remove_cursor_rules from pipelex.kit.index_loader import load_index from pipelex.kit.migrations_export import export_migration_instructions -from pipelex.kit.targets_update import build_merged_rules, update_targets +from pipelex.kit.targets_update import build_merged_rules, remove_from_targets, update_targets kit_app = typer.Typer(help="Manage kit assets: export Cursor rules and merge agent docs", no_args_is_help=True) @@ -55,6 +55,56 @@ def agent_rules( raise PipelexCLIError(msg) from exc +@kit_app.command("remove-rules") +def remove_rules( + repo_root: Annotated[Path | None, typer.Option("--repo-root", dir_okay=True, writable=True, help="Repository root directory")] = None, + cursor: Annotated[bool, typer.Option("--cursor/--no-cursor", help="Remove Cursor rules from .cursor/rules")] = True, + single_files: Annotated[bool, typer.Option("--single-files/--no-single-files", help="Remove agent documentation from target files")] = True, + delete_files: Annotated[bool, typer.Option("--delete-files", help="Delete entire target files instead of just removing marked sections")] = False, + dry_run: Annotated[bool, typer.Option("--dry-run", help="Show what would be done without making changes")] = False, + diff: Annotated[bool, typer.Option("--diff", help="Show unified diff of changes")] = False, + backup: Annotated[str | None, typer.Option("--backup", help="Backup suffix (e.g., '.bak')")] = None, +) -> None: + """Remove agent rules: delete Cursor rules and remove marked sections from target files. + + This command: + 1. Deletes agent markdown files from Cursor .mdc files in .cursor/rules + 2. Removes marked sections from target files (or deletes entire files with --delete-files) + """ + try: + if repo_root is None: + repo_root = Path() + + idx = load_index() + + if cursor: + typer.echo("🗑️ Removing Cursor rules...") + remove_cursor_rules(repo_root, dry_run=dry_run) + + if single_files: + if delete_files: + typer.echo("🗑️ Deleting target files...") + else: + typer.echo("🗑️ Removing marked sections from target files...") + remove_from_targets( + repo_root, + idx.agent_rules.targets, + delete_files=delete_files, + dry_run=dry_run, + diff=diff, + backup=backup, + ) + + if dry_run: + typer.echo("✅ Dry run completed - no changes made") + else: + typer.echo("✅ Agent rules removal completed successfully") + + except Exception as exc: + msg = f"Failed to remove agent rules: {exc}" + raise PipelexCLIError(msg) from exc + + @kit_app.command("migrations") def migration_instructions( repo_root: Annotated[Path | None, typer.Option("--repo-root", dir_okay=True, writable=True, help="Repository root directory")] = None, diff --git a/pipelex/kit/cursor_export.py b/pipelex/kit/cursor_export.py index ea72f6aaa..060e86935 100644 --- a/pipelex/kit/cursor_export.py +++ b/pipelex/kit/cursor_export.py @@ -69,3 +69,33 @@ def export_cursor_rules(repo_root: Path, idx: KitIndex, dry_run: bool = False) - else: out_path.write_text(mdc, encoding="utf-8") typer.echo(f"✅ Exported {out_path}") + + +def remove_cursor_rules(repo_root: Path, dry_run: bool = False) -> None: + """Remove Cursor .mdc files that correspond to agent markdown files. + + Args: + repo_root: Repository root directory + dry_run: If True, only print what would be done + """ + agents_dir = get_agents_dir() + out_dir = repo_root / ".cursor" / "rules" + + if not out_dir.exists(): + typer.echo(f"⚠️ Directory {out_dir} does not exist - nothing to remove") + return + + removed_count = 0 + for fname, _ in _iter_agent_files(agents_dir): + out_path = out_dir / (fname.removesuffix(".md") + ".mdc") + + if out_path.exists(): + if dry_run: + typer.echo(f"[DRY] delete {out_path}") + else: + out_path.unlink() + typer.echo(f"🗑️ Deleted {out_path}") + removed_count += 1 + + if removed_count == 0: + typer.echo("⚠️ No Cursor rules found to remove") diff --git a/pipelex/kit/targets_update.py b/pipelex/kit/targets_update.py index 7b1ed0b27..80c2b2f86 100644 --- a/pipelex/kit/targets_update.py +++ b/pipelex/kit/targets_update.py @@ -84,29 +84,28 @@ def _insert_block_with_markers(target_md: str, block_md: str, main_title: str | Args: target_md: Existing target markdown content block_md: Block to insert - main_title: Main title (H1) to add when inserting into empty file or file with no H1 headings + main_title: Main title (H1) to add inside markers when inserting into empty file or file with no H1 headings markers: Tuple of (begin_marker, end_marker) Returns: Updated markdown with block inserted and markers added """ marker_begin, marker_end = markers - wrapped_block = wrap(marker_begin, marker_end, block_md) # Check if file is empty or has no H1 heading is_empty = not target_md or not target_md.strip() h1_pattern = r"^#\s+.+$" has_h1 = bool(target_md) and bool(re.search(h1_pattern, target_md, flags=re.MULTILINE)) - # If empty or no H1 heading, add main_title at top if provided + # If empty or no H1 heading, add main_title INSIDE the markers if (is_empty or not has_h1) and main_title: - if is_empty: - return f"{main_title}\n\n{wrapped_block}\n" - else: - # File has content but no H1 - add title at top, preserve content, append wrapped block - return f"{main_title}\n\n{target_md.rstrip()}\n\n{wrapped_block}\n" + content_with_heading = f"{main_title}\n\n{block_md}" + wrapped_block = wrap(marker_begin, marker_end, content_with_heading) + else: + # File already has H1 heading, don't add another one + wrapped_block = wrap(marker_begin, marker_end, block_md) - # Otherwise append at the end + # Append at the end if is_empty: return wrapped_block + "\n" return target_md.rstrip() + "\n\n" + wrapped_block + "\n" @@ -190,3 +189,95 @@ def update_targets( diff_output = _diff(before, after, str(target_path)) if diff_output: typer.echo(diff_output) + + +def remove_from_targets( + repo_root: Path, + targets: dict[str, Target], + delete_files: bool, + dry_run: bool, + diff: bool, + backup: str | None, +) -> None: + """Remove agent documentation from target files. + + Args: + repo_root: Repository root directory + targets: Dictionary of target file configurations keyed by ID + delete_files: If True, delete entire files; if False, only remove marked sections + dry_run: If True, only print what would be done + diff: If True, show unified diff + backup: Backup suffix (e.g., ".bak"), or None for no backup + """ + for target in targets.values(): + target_path = repo_root / target.path + + if not target_path.exists(): + typer.echo(f"⚠️ File {target_path} does not exist - skipping") + continue + + if delete_files: + # Delete the entire file + if dry_run: + typer.echo(f"[DRY] delete {target_path}") + else: + if backup: + backup_path = target_path.with_suffix(target_path.suffix + backup) + target_path.rename(backup_path) + typer.echo(f"📦 Backup saved to {backup_path}") + else: + target_path.unlink() + typer.echo(f"🗑️ Deleted {target_path}") + else: + # Remove only the marked section + before = target_path.read_text(encoding="utf-8") + span = find_span(before, target.marker_begin, target.marker_end) + + if not span: + typer.echo(f"⚠️ No marked section found in {target_path} - skipping") + continue + + # Remove the marked section entirely + before_section = before[: span[0]].rstrip() + after_section = before[span[1] :].lstrip() + + # If there's content before or after, join them + if before_section and after_section: + after = before_section + "\n\n" + after_section + elif before_section: + after = before_section + "\n" + elif after_section: + after = after_section + else: + # File only contained the marked section - delete the file + if dry_run: + typer.echo(f"[DRY] delete {target_path} (file only contained marked section)") + else: + if backup: + backup_path = target_path.with_suffix(target_path.suffix + backup) + target_path.rename(backup_path) + typer.echo(f"📦 Backup saved to {backup_path}") + else: + target_path.unlink() + typer.echo(f"🗑️ Deleted {target_path} (file only contained marked section)") + continue + + if dry_run: + typer.echo(f"[DRY] remove marked section from {target_path}") + if diff: + diff_output = _diff(before, after, str(target_path)) + if diff_output: + typer.echo(diff_output) + else: + if backup: + backup_path = target_path.with_suffix(target_path.suffix + backup) + backup_path.write_text(before, encoding="utf-8") + typer.echo(f"📦 Backup saved to {backup_path}") + + target_path.write_text(after, encoding="utf-8") + typer.echo(f"✅ Removed marked section from {target_path}") + + if diff: + diff_output = _diff(before, after, str(target_path)) + if diff_output: + typer.echo(diff_output) From 6f135d10d50325c648d500552c092694a824c7ef Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 01:46:24 +0200 Subject: [PATCH 061/115] coding standards in other agent rules --- pipelex/kit/agent_rules/pytest_standards.md | 2 +- pipelex/kit/agent_rules/run_pipelex.md | 4 ++-- pipelex/kit/agent_rules/write_pipelex.md | 13 ++++++------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pipelex/kit/agent_rules/pytest_standards.md b/pipelex/kit/agent_rules/pytest_standards.md index e6ab9d5b6..ac560a8bd 100644 --- a/pipelex/kit/agent_rules/pytest_standards.md +++ b/pipelex/kit/agent_rules/pytest_standards.md @@ -137,7 +137,7 @@ class TestCases: value="test_value" ) - CASE_BLUEPRINTS: ClassVar[List[Tuple[str, str]]] = [ # topic, blueprint" + CASE_BLUEPRINTS: ClassVar[list[tuple[str, str]]] = [ # topic, blueprint" ("topic1", CASE_BLUEPRINT_1), ("topic2", CASE_BLUEPRINT_2), ] diff --git a/pipelex/kit/agent_rules/run_pipelex.md b/pipelex/kit/agent_rules/run_pipelex.md index 207f6404d..93bac64e6 100644 --- a/pipelex/kit/agent_rules/run_pipelex.md +++ b/pipelex/kit/agent_rules/run_pipelex.md @@ -74,8 +74,8 @@ pretty_print(gantt_chart, title="Gantt Chart") The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: ```python -StuffContentOrData = Dict[str, Any] | StuffContent | List[Any] | str -ImplicitMemory = Dict[str, StuffContentOrData] +StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str +ImplicitMemory = dict[str, StuffContentOrData] ``` As you can seen, we made it so different ways can be used to define that stuff using structured content or data. diff --git a/pipelex/kit/agent_rules/write_pipelex.md b/pipelex/kit/agent_rules/write_pipelex.md index e4bde77de..97e58c151 100644 --- a/pipelex/kit/agent_rules/write_pipelex.md +++ b/pipelex/kit/agent_rules/write_pipelex.md @@ -115,7 +115,6 @@ Only create a Python structure class when you need to add specific fields: ```python from datetime import datetime -from typing import List, Optional from pydantic import Field from pipelex.core.stuffs.structured_content import StructuredContent @@ -127,11 +126,11 @@ class YourModel(StructuredContent): # Always be a subclass of StructuredContent field2: int # Optional fields with defaults - field3: Optional[str] = Field(None, "Description of field3") - field4: List[str] = Field(default_factory=list) + field3: str | None = Field(None, "Description of field3") + field4: list[str] = Field(default_factory=list) # Date fields should remove timezone - date_field: Optional[datetime] = None + date_field: datetime | None = None ``` ### Usage @@ -430,12 +429,12 @@ It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent ```python class TextAndImagesContent(StuffContent): - text: Optional[TextContent] - images: Optional[List[ImageContent]] + text: TextContent | None + images: list[ImageContent] | None class PageContent(StructuredContent): # CONCEPT IS "Page" text_and_images: TextAndImagesContent - page_view: Optional[ImageContent] = None + page_view: ImageContent | None = None ``` - `text_and_images` are the text, and the related images found in the input image or PDF. - `page_view` is the screenshot of the whole pdf page/image. From 588e7ee926f7b735efb1a4a5c00b13c9339146f8 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 02:12:36 +0200 Subject: [PATCH 062/115] Updated docs --- concept_structures.md | 18 +- .../define_your_concepts.md | 103 +++-- .../structuring-concepts.md | 426 ++++++++++++++++++ docs/pages/quick-start/index.md | 17 + mkdocs.yml | 1 + pipelex/kit/agent_rules/write_pipelex.md | 171 ++++--- 6 files changed, 612 insertions(+), 124 deletions(-) create mode 100644 docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md diff --git a/concept_structures.md b/concept_structures.md index 7a07ea160..02d468247 100644 --- a/concept_structures.md +++ b/concept_structures.md @@ -312,7 +312,6 @@ When the structure needs to be shared: ```python # shared_models.py -from typing import Optional from pipelex.core.stuffs.structured_content import StructuredContent from pydantic import Field @@ -333,7 +332,7 @@ class Address(StructuredContent): When you need sophisticated typing: ```python -from typing import Union, Literal, Optional +from typing import Literal from pipelex.core.stuffs.structured_content import StructuredContent from pydantic import Field @@ -341,9 +340,9 @@ class ApiResponse(StructuredContent): """A flexible API response structure.""" status: Literal["success", "error", "pending"] - data: Optional[dict] = Field(default=None, description="Response data") - error_message: Optional[str] = Field(default=None, description="Error details if status is error") - metadata: dict[str, Union[str, int, float]] = Field(default_factory=dict, description="Additional metadata") + data: dict | None = Field(default=None, description="Response data") + error_message: str | None = Field(default=None, description="Error details if status is error") + metadata: dict[str, str | int | float] = Field(default_factory=dict, description="Additional metadata") ``` #### 5. Better Developer Experience @@ -396,7 +395,6 @@ Iterate quickly, adjusting the structure as needed. ```python from pipelex.core.stuffs.structured_content import StructuredContent from pydantic import Field, field_validator -from typing import Optional import re class UserProfile(StructuredContent): @@ -404,7 +402,7 @@ class UserProfile(StructuredContent): username: str = Field(description="The user's username") email: str = Field(description="The user's email address") - age: Optional[int] = Field(default=None, description="User's age") + age: int | None = Field(default=None, description="User's age") @field_validator('email') @classmethod @@ -476,7 +474,6 @@ categories = { type = "list", item_type = "text", description = "Product categor Create `ecommerce_struct.py` in your project: ```python -from typing import Optional, List from pipelex.core.stuffs.structured_content import StructuredContent from pydantic import Field @@ -487,7 +484,7 @@ class Product(StructuredContent): name: str = Field(description="Product name") price: float = Field(ge=0, description="Product price") in_stock: bool = Field(default=True, description="Stock availability") - categories: Optional[List[str]] = Field(default=None, description="Product categories") + categories: list[str] | None = Field(default=None, description="Product categories") ``` **3. Remove the inline structure from .plx** @@ -516,7 +513,6 @@ Run your pipeline to ensure everything works. The behavior should be identical, Now you can add validators, computed properties, or other Python features: ```python -from typing import Optional, List from pipelex.core.stuffs.structured_content import StructuredContent from pydantic import Field, field_validator @@ -527,7 +523,7 @@ class Product(StructuredContent): name: str = Field(description="Product name") price: float = Field(ge=0, description="Product price") in_stock: bool = Field(default=True, description="Stock availability") - categories: Optional[List[str]] = Field(default=None, description="Product categories") + categories: list[str] | None = Field(default=None, description="Product categories") @field_validator('price') @classmethod diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md index bd231a903..d1a74b4de 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md @@ -65,20 +65,64 @@ PaymentTerms = "Conditions under which payment is to be made" LineItem = "An individual item or service listed in a financial document" ``` +## How to Structure Your Concepts + +Once you've defined your concepts semantically, you may need to add structure if they have specific fields. Pipelex offers three approaches: + +### 1. No Structure (Concept Only) + +For concepts that only refine native concepts without adding fields, just declare them with a description. They default to text-based content. + +```plx +[concept] +ProductReview = "A customer's evaluation of a product or service" +``` + +### 2. Inline TOML Structures (Recommended) + +Define structured concepts directly in your `.plx` files using TOML syntax. This is the fastest and simplest approach for most use cases. + +```plx +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +total_amount = { type = "number", description = "Total invoice amount", required = true } +vendor_name = "The name of the vendor" +``` + +Behind the scenes, Pipelex generates a complete Pydantic model with validation. + +### 3. Python StructuredContent Classes + +Create explicit Python classes when you need custom validation, computed properties, or advanced features. + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class Invoice(StructuredContent): + invoice_number: str + total_amount: float = Field(ge=0, description="Total invoice amount") + vendor_name: str +``` + +**For detailed guidance on choosing and implementing these approaches, see [Structuring Concepts](structuring-concepts.md).** + ## Adding Structure with Python Models -While text definitions help LLMs understand your concepts, Python models ensure structured, validated outputs. This combination gives you the best of both worlds: AI flexibility with software reliability. +This section covers the Python class approach for structured concepts. For a simpler alternative using inline TOML syntax, see [Structuring Concepts](structuring-concepts.md). -**Important**: If you don't create a Python class for a concept, it defaults to text-based content. Only create Python models when you need structured output with specific fields. +While text definitions help LLMs understand your concepts, Python models ensure structured, validated outputs when you need custom validation, computed properties, or advanced features. -### Creating Your First Structured Model +### Creating a Python Structured Model -For each concept that needs structured output, create a corresponding Python class: +For concepts that need custom logic, create a Python class that inherits from `StructuredContent`: ```python # finance.py from datetime import datetime -from typing import List, Optional from pydantic import Field from pipelex.core.stuffs.structured_content import StructuredContent @@ -90,14 +134,15 @@ class Invoice(StructuredContent): customer_name: str total_amount: float = Field(ge=0, description="Total invoice amount") currency: str = Field(default="USD", description="Three-letter currency code") - line_items: List[str] = Field(default_factory=list) ``` The model name must match the concept name exactly: `Invoice` concept → `Invoice` class. -### Basic Validation Examples +Python classes are automatically discovered and registered by Pipelex. + +### With Custom Validation -Use Pydantic's validation features to ensure data quality: +Use Pydantic's validation features for complex rules: ```python from pydantic import field_validator @@ -115,37 +160,11 @@ class Employee(StructuredContent): if "@" not in v: raise ValueError("Invalid email format") return v.lower() - -class ProductReview(StructuredContent): - product_name: str - reviewer_name: str - rating: int = Field(ge=1, le=5, description="Rating from 1 to 5 stars") - review_text: str - verified_purchase: bool = False -``` - -### Working with Optional Fields - -Not all data is always available. Use Optional fields with sensible defaults: - -```python -from typing import Optional -from datetime import datetime -from pipelex.core.stuffs.structured_content import StructuredContent - -class Meeting(StructuredContent): - title: str - scheduled_date: datetime - duration_minutes: int = Field(ge=15, le=480, description="Meeting duration") - location: Optional[str] = None - attendees: List[str] = Field(default_factory=list) - notes: Optional[str] = None - is_recurring: bool = False ``` ### Linking Concepts to Models -The connection between PLX definitions and Python models happens automatically through naming: +The connection between `.plx` definitions and Python models happens automatically through naming: ```plx # hr.plx @@ -154,7 +173,6 @@ domain = "hr" [concept] Employee = "A person employed by an organization" Meeting = "A scheduled gathering of people for discussion" -PerformanceReview = "An evaluation of an employee's work performance" Department = "An organizational unit within a company" # No Python model => text-based ``` @@ -162,9 +180,7 @@ Department = "An organizational unit within a company" # No Python model => tex # hr.py from pipelex.core.stuffs.structured_content import StructuredContent from datetime import datetime -from typing import List, Optional -# Only define models for concepts that need structure class Employee(StructuredContent): name: str email: str @@ -175,18 +191,13 @@ class Meeting(StructuredContent): title: str scheduled_date: datetime duration_minutes: int - attendees: List[str] - -class PerformanceReview(StructuredContent): - employee_name: str - review_period: str - rating: int = Field(ge=1, le=5) - strengths: List[str] - areas_for_improvement: List[str] + attendees: list[str] # Note: Department concept has no Python model, so it's text-based ``` +**For more examples, advanced features, and guidance on when to use Python classes vs inline structures, see [Structuring Concepts](structuring-concepts.md).** + ## Concept Refinement and Inheritance Sometimes concepts build on each other. A `Contract` is a kind of `Document`. A `NonCompeteClause` is a specific part of a `Contract`. Pipelex lets you express these relationships. diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md new file mode 100644 index 000000000..0f0f81aac --- /dev/null +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md @@ -0,0 +1,426 @@ +# Structuring Concepts + +Pipelex offers two ways to add structure to your concepts: **inline TOML structures** and **Python classes**. This guide helps you choose the right approach and shows you how to use both effectively. + +For an introduction to concepts themselves, see [Define Your Concepts](define_your_concepts.md). + +## Quick Comparison + +| Approach | Best For | Advantages | Limitations | +|----------|----------|------------|-------------| +| **Inline TOML** | Most use cases, prototyping | Fast, single-file, no boilerplate | No custom validation or methods | +| **Python Class** | Complex validation, computed properties | Full Pydantic power, IDE support | More files, more code | + +## Inline Structure Definition + +Define structured concepts directly in your `.plx` files using TOML syntax. This is the **recommended approach** for most use cases. + +### Quick Example + +```plx +domain = "finance" + +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items in the invoice", required = false } +``` + +Behind the scenes, Pipelex automatically generates a fully-typed Pydantic model with validation—all from TOML. + +### Supported Field Types + +Inline structures support these field types: + +- **text**: String values +- **integer**: Whole numbers +- **boolean**: True/false values +- **number**: Numeric values (integers or floats) +- **date**: Date and datetime values +- **list**: Arrays/lists (specify `item_type`) +- **dict**: Dictionaries (specify `key_type` and `value_type`) + +### Field Properties + +Each field can specify: + +- **type**: The data type (required for detailed definitions) +- **description**: Human-readable description +- **required**: Whether the field is mandatory (default: `true`) +- **default_value**: Default value if not provided +- **choices**: For enum-like fields, a list of valid values +- **item_type**: For list fields, the type of list items +- **key_type** and **value_type**: For dict fields, the types of keys and values + +### Simple vs Detailed Syntax + +**Simple syntax** (creates required text field): + +```plx +[concept.Person.structure] +name = "The person's full name" +email = "The person's email address" +``` + +**Detailed syntax** (explicit properties): + +```plx +[concept.Employee.structure] +employee_id = { type = "integer", description = "Unique employee identifier", required = true } +department = { type = "text", description = "Department name", required = false, default_value = "General" } +is_active = { type = "boolean", description = "Employment status", required = false, default_value = true } +``` + +You can mix both styles in the same structure. + +### Complex Field Examples + +**List Fields:** + +```plx +[concept.Project.structure] +name = "Project name" +tags = { type = "list", item_type = "text", description = "Project tags", required = false } +team_members = { type = "list", item_type = "text", description = "Team member names", required = true } +``` + +**Dictionary Fields:** + +```plx +[concept.Configuration.structure] +app_name = "Application name" +settings = { type = "dict", key_type = "text", value_type = "text", description = "Configuration settings", required = false } +``` + +**Choice Fields (Enums):** + +```plx +[concept.Task.structure] +title = "Task title" +priority = { choices = ["low", "medium", "high"], description = "Task priority level", required = true } +status = { choices = ["todo", "in_progress", "done"], description = "Current status", default_value = "todo" } +``` + +## Python StructuredContent Classes + +For advanced features, create explicit Python classes that inherit from `StructuredContent`. + +### When to Use Python Classes + +Use Python classes when you need: + +1. **Custom validation logic** - Cross-field validation, complex rules +2. **Computed properties** - Derived values, formatted outputs +3. **Custom methods** - Business logic, helper functions +4. **Reusability** - Shared structures across multiple domains +5. **Advanced typing** - `Literal`, complex unions, etc. +6. **Better IDE support** - Full autocomplete and type checking + +### Basic Python Class Example + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class Invoice(StructuredContent): + """A commercial invoice.""" + + invoice_number: str = Field(description="Unique invoice identifier") + issue_date: datetime = Field(description="Date the invoice was issued") + total_amount: float = Field(ge=0, description="Total invoice amount") + vendor_name: str = Field(description="Name of the vendor") + line_items: list[str] = Field(default_factory=list, description="List of items") +``` + +Classes inheriting from `StructuredContent` are automatically discovered and registered by Pipelex. + +### With Custom Validation + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + total_amount: float = Field(ge=0, description="Total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + net_amount: float = Field(ge=0, description="Net amount before tax") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +### With Computed Properties + +```python +from datetime import datetime +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class Subscription(StructuredContent): + """A subscription with computed properties.""" + + start_date: datetime = Field(description="Subscription start date") + end_date: datetime = Field(description="Subscription end date") + monthly_price: float = Field(ge=0, description="Monthly subscription price") + + @property + def duration_days(self) -> int: + """Calculate subscription duration in days.""" + return (self.end_date - self.start_date).days + + @property + def total_cost(self) -> float: + """Calculate total subscription cost.""" + months = self.duration_days / 30.0 + return months * self.monthly_price +``` + +## Advantages of Inline Structures + +Inline structures offer several benefits: + +### Rapid Development + +- **Single File**: Keep concepts, structures, and pipes all in one `.plx` file +- **No Context Switching**: No need to jump between `.plx` and `.py` files +- **Quick Iteration**: Modify structures instantly without managing separate files + +### Simplicity + +- **Declarative Syntax**: Straightforward TOML that's easy to read and write +- **No Boilerplate**: No need for Python imports, class definitions, or field declarations +- **Automatic Registration**: Generated classes are automatically discovered + +### Type Safety + +- **Pydantic Models**: Full Pydantic v2 models behind the scenes +- **Runtime Validation**: Automatic validation of structured outputs from LLMs +- **Type Hints**: Generated classes include proper type annotations + +## Limitations of Inline Structures + +### What Inline Structures Cannot Do + +Inline structures cannot provide: + +- **Custom validators** - No `@field_validator` decorators +- **Computed properties** - No `@property` methods +- **Custom methods** - No class methods or instance methods +- **Complex validation** - No cross-field validation logic +- **Nested custom concepts** - Cannot reference other custom concepts as field types (coming soon) +- **Advanced Pydantic features** - No custom serializers, model validators, etc. + +### Concept Refinement Restrictions + +Currently, inline structures can only be used for concepts that: + +- Don't refine other concepts, OR +- Refine native concepts only: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page` + +### Tooling Limitations + +- **Limited IDE autocomplete** compared to explicit Python classes +- **No static type checking** with `mypy` or `pyright` +- **Less refactoring support** in IDEs + +## Using AI to Create Python Classes + +Modern AI coding assistants like Cursor AI and GitHub Copilot can generate `StructuredContent` classes instantly, making the transition from inline structures to Python classes fast and easy. + +### Recommended Workflow + +Follow this pragmatic approach: + +1. **Prototype Fast**: Start with inline structures for rapid development +2. **Validate Quickly**: Test your pipelines and iterate on the structure +3. **Upgrade When Needed**: Convert to Python classes when you need advanced features +4. **Let AI Help**: Use AI assistants to generate the Python code automatically + +### Example: AI-Assisted Migration + +**Step 1: Start with inline structure** + +```plx +[concept.UserProfile] +description = "A user profile" + +[concept.UserProfile.structure] +username = "The user's username" +email = "The user's email address" +age = { type = "integer", description = "User's age", required = false } +``` + +**Step 2: Ask your AI assistant** + +> "Convert this inline UserProfile structure to a Python StructuredContent class with email validation" + +**Step 3: AI generates the class** + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator +import re + +class UserProfile(StructuredContent): + """A user profile with validation.""" + + username: str = Field(description="The user's username") + email: str = Field(description="The user's email address") + age: int | None = Field(default=None, description="User's age") + + @field_validator('email') + @classmethod + def validate_email(cls, v): + """Validate email format.""" + pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + if not re.match(pattern, v): + raise ValueError('Invalid email format') + return v +``` + +**Step 4: Update your .plx file** + +```plx +[concept] +UserProfile = "A user profile" # Structure now defined in Python +``` + +The Python class is automatically discovered and registered. + +## Migration Guide + +### From Inline Structure to Python Class + +Here's how to migrate when you need more advanced features: + +**1. You have this inline structure:** + +```plx +domain = "ecommerce" + +[concept.Product] +description = "A product in the catalog" + +[concept.Product.structure] +product_id = { type = "integer", description = "Unique product ID", required = true } +name = "Product name" +price = { type = "number", description = "Product price", required = true } +in_stock = { type = "boolean", description = "Stock availability", default_value = true } +``` + +**2. Create a Python file** (e.g., `ecommerce_struct.py`): + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field + +class Product(StructuredContent): + """A product in the catalog.""" + + product_id: int = Field(description="Unique product ID") + name: str = Field(description="Product name") + price: float = Field(ge=0, description="Product price") + in_stock: bool = Field(default=True, description="Stock availability") +``` + +**3. Update your `.plx` file:** + +```plx +domain = "ecommerce" + +[concept] +Product = "A product in the catalog" + +# Structure section removed - now defined in ecommerce_struct.py +``` + +**4. Test your pipeline** - The behavior should be identical. + +**5. Add enhancements** (optional): + +```python +class Product(StructuredContent): + """A product in the catalog.""" + + product_id: int = Field(description="Unique product ID") + name: str = Field(description="Product name") + price: float = Field(ge=0, description="Product price") + in_stock: bool = Field(default=True, description="Stock availability") + + @field_validator('price') + @classmethod + def validate_price(cls, v): + """Ensure price is positive and reasonable.""" + if v < 0: + raise ValueError('Price cannot be negative') + if v > 1_000_000: + raise ValueError('Price seems unreasonably high') + return v + + @property + def display_price(self) -> str: + """Format price for display.""" + return f"${self.price:.2f}" +``` + +## Recommendations + +The inline structure feature is a **practical solution for the majority of use cases**. Use it to: + +- Get started quickly without Python overhead +- Keep all pipeline logic in one place +- Iterate rapidly during development +- Still get full type safety and validation + +When your needs grow, **Python `StructuredContent` classes offer more power and flexibility**. + +### Guidelines + +- ✅ **Use inline structures** for straightforward data models +- ✅ **Use inline structures** during prototyping and early development +- ✅ **Use inline structures** for domain-specific models with simple validation +- ✅ **Use Python classes** when you need custom validation logic +- ✅ **Use Python classes** for reusable, shared data models +- ✅ **Use Python classes** when you need computed properties or methods +- ✅ **Use Python classes** for complex type relationships + +Remember: You can always start with inline structures and migrate to Python classes later. The migration is straightforward, and AI assistants can help you make the transition quickly. + +## Future Roadmap + +### Nested Custom Concepts (Coming Soon) + +Soon, you'll be able to reference other custom concepts as field types in inline structures: + +```plx +[concept.Address.structure] +street = "Street address" +city = "City name" +postal_code = "Postal or ZIP code" + +[concept.Company.structure] +name = "Company name" +headquarters = { type = "Address", description = "Company headquarters", required = true } +``` + +This will enable building complex, nested data models entirely within `.plx` files. + +## Related Documentation + +- [Define Your Concepts](define_your_concepts.md) - Learn about concept semantics and naming +- [Quick Start](../quick-start/index.md) - Get started with structured outputs +- [Design and Run Pipelines](design_and_run_pipelines.md) - Use structured concepts in pipes + diff --git a/docs/pages/quick-start/index.md b/docs/pages/quick-start/index.md index 01a943cce..072f9690c 100644 --- a/docs/pages/quick-start/index.md +++ b/docs/pages/quick-start/index.md @@ -176,6 +176,23 @@ class Character(StructuredContent): description: str ``` +💡 **Alternative: Inline Structure Definition** + +Instead of creating a separate Python file, you can define structures directly in your `.plx` file using TOML syntax: + +```plx +[concept.Character] +description = "A character in a fiction story" + +[concept.Character.structure] +name = "The character's name" +age = { type = "integer", description = "The character's age", required = true } +gender = "The character's gender" +description = "A description of the character" +``` + +Learn more about inline structures and when to use them in [Structuring Concepts](../build-reliable-ai-workflows-with-pipelex/structuring-concepts.md). + ### Improve the pipeline It's time to specify that your output be a `Character` instance. Use the `output` field for that purpose. diff --git a/mkdocs.yml b/mkdocs.yml index 0f76fd2da..5ac702f8c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -108,6 +108,7 @@ nav: - Build Reliable AI Workflows: - Kick off a Pipeline Project: pages/build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md - Define Your Concepts: pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md + - Structuring Concepts: pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md - Design and Run Pipelines: pages/build-reliable-ai-workflows-with-pipelex/design_and_run_pipelines.md - Pipeline Creation: pages/build-reliable-ai-workflows-with-pipelex/pipeline-creation.md - Pipe Operators: diff --git a/pipelex/kit/agent_rules/write_pipelex.md b/pipelex/kit/agent_rules/write_pipelex.md index 97e58c151..f5d16f512 100644 --- a/pipelex/kit/agent_rules/write_pipelex.md +++ b/pipelex/kit/agent_rules/write_pipelex.md @@ -23,17 +23,34 @@ description = "Description of the domain" # Optional Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. ### Concept Definitions + +Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. + ```plx [concept] -ConceptName = "Description of the concept" # Should be the same name as the Structure ClassName you want to output +ConceptName = "Description of the concept" ``` -Important Rules: +**Naming Rules:** - Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") -- Avoid adjectives (no "LargeText", use "Text") -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number) -yes +- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex +- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page) + +**Native Concepts:** +Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`. Use these directly or refine them when appropriate. + +**Refining Native Concepts:** +To create a concept that specializes a native concept without adding fields: + +```plx +[concept.Landscape] +description = "A scenic outdoor photograph" +refines = "Image" +``` + +For details on how to structure concepts with fields, see the "Structuring Models" section below. + ### Pipe Definitions ## Pipe Base Definition @@ -46,21 +63,7 @@ inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } output = "ConceptName" ``` -DO NOT WRITE: -```plx -[pipe.your_pipe_name] -type = "pipe_sequence" -``` - -But it should be: - -```plx -[pipe.your_pipe_name] -type = "PipeSequence" -description = "....." -``` - -The pipes will all have at least this base structure. +The pipes will all have at least this base definition. - `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • @@ -81,69 +84,103 @@ inputs = { ## Structuring Models -### Model Location and Registration +Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. -- Create models for structured generations related to "some_domain" in your project (e.g., `my_project/some_domain/some_domain_struct.py`) -- Models must inherit from `StructuredContent` or appropriate content type -- Structure classes are automatically discovered by Pipelex - no manual registration needed +### Three Ways to Structure Concepts -## Model Structure +**1. No Structure Needed** -Concepts and their structure classes are meant to indicate an idea. -A Concept MUST NEVER be a plural noun and you should never create a SomeConceptList: lists and arrays are implicitly handled by Pipelex according to the context. Just define SomeConcept. +If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. -**IMPORTANT: Never create unnecessary structure classes that only refine native concepts without adding fields.** +**2. Inline Structure Definition (RECOMMENDED for most cases)** -DO NOT create structures like: -```python -class Joke(TextContent): - """A humorous text that makes people laugh.""" - pass -``` +For concepts with structured fields, define them inline using TOML syntax: -If a concept only refines a native concept (like Text, Image, etc.) without adding new fields, simply declare it in the .plx file: -```plx -[concept] -Joke = "A humorous text that makes people laugh." -``` -If you simply need to refine another native concept, construct it like this: ```plx -[concept.Landscape] -refines = "Image" -``` -Only create a Python structure class when you need to add specific fields: - -```python -from datetime import datetime -from pydantic import Field +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" -from pipelex.core.stuffs.structured_content import StructuredContent +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items", required = false } +``` -# IMPORTANT: THE CLASS MUST BE A SUBCLASS OF StructuredContent -class YourModel(StructuredContent): # Always be a subclass of StructuredContent - # Required fields - field1: str - field2: int +**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - # Optional fields with defaults - field3: str | None = Field(None, "Description of field3") - field4: list[str] = Field(default_factory=list) +**Field properties:** `type`, `description`, `required` (default: true), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - # Date fields should remove timezone - date_field: datetime | None = None +**Simple syntax** (creates required text field): +```plx +field_name = "Field description" ``` -### Usage -Structures are meant to indicate what class to use for a particular Concept. In general they use the same name as the concept. +**Detailed syntax** (with explicit properties): +```plx +field_name = { type = "text", description = "Field description", required = false, default_value = "default" } +``` -Structure classes that inherit from `StructuredContent` are automatically discovered and loaded into the class_registry when setting up Pipelex, no need to do it manually. +**3. Python StructuredContent Class (For Advanced Features)** +Create a Python class when you need: +- Custom validation logic (@field_validator, @model_validator) +- Computed properties (@property methods) +- Custom methods or class methods +- Complex cross-field validation +- Reusable structures across multiple domains -### Best Practices for structures +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator -- Respect Pydantic v2 standards -- Use type hints for all fields -- Use `Field` declaration and write the description +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + invoice_number: str = Field(description="The unique invoice identifier") + total_amount: float = Field(ge=0, description="The total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. + +### Decision Rules for Agents + +**If concept already exists:** +- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class +- If it's already a Python class → KEEP IT as Python class + +**If creating new concept:** +1. Does it only refine a native concept without adding fields? → Use concept-only declaration +2. Does it need custom validation, computed properties, or methods? → Use Python class +3. Otherwise → Use inline structure (fastest and simplest) + +**When to suggest conversion to Python class:** +- User needs validation logic beyond type checking +- User needs computed properties or custom methods +- Structure needs to be reused across multiple domains +- Complex type relationships or inheritance required + +### Inline Structure Limitations + +Inline structures: +- ✅ Support all common field types (text, number, date, list, dict, etc.) +- ✅ Support required/optional fields, defaults, choices +- ✅ Generate full Pydantic models with validation +- ❌ Cannot have custom validators or complex validation logic +- ❌ Cannot have computed properties or custom methods +- ❌ Cannot refine custom (non-native) concepts +- ❌ Limited IDE autocomplete compared to explicit Python classes ## Pipe Controllers and Pipe Operators From ff96bce91ec8073ac833c8e1950a3ec5ddb7ca87 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 02:20:02 +0200 Subject: [PATCH 063/115] Changelog --- CHANGELOG.md | 13 + concept_structures.md | 566 ------------------------------------ pipelex/kit/__init__.py.bak | 1 - 3 files changed, 13 insertions(+), 567 deletions(-) delete mode 100644 concept_structures.md delete mode 100644 pipelex/kit/__init__.py.bak diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e9d3300d..bb1f569ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,15 @@ We tried to group all the renamings we wanted to do which impact our language, s This is all in the spirit of making Pipelex a declarative language, where you express what you want to do, and the system will figure out how to do it. So our focus inwas to make the Pipelex language easier to understand and use for non-technical users, and at the same time use more consistent and obvious words that developers are used to. +**💡 Pro tip:** To make migration easier, pass the [migration guide](pipelex/kit/migrations/migrate_0.11.0_0.12.0.md) to your favorite SWE agent (Cursor, Claude Code, github copilot, etc.) and let it handle the bulk of the changes! + +- **Removed centralized `pipelex_libraries` folder system** + - Pipelines are now auto-discovered from anywhere in your project—no special directory required + - No config path parameters needed in `Pipelex.make()` or CLI commands (just call `Pipelex.make()`) + - Custom functions require `@pipe_func()` decorator for auto-discovery + - Structure classes auto-discovered (must inherit from `StructuredContent`) + - Configuration stays at repository root in `.pipelex/` directory + - See [migration guide](pipelex/kit/migrations/migrate_0.11.0_0.12.0.md) for details on reorganizing your project structure - General changes - renamed `definition` fields to `description` across all cases @@ -91,6 +100,8 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Updated kajson dependency from version `0.3.0` to `0.3.1` - Cleanup env example and better explain how to set up keys in README and docs - Changed Gemini routing from `google` backend to `pipelex_inference` backend + - **BREAKING:** Split `pipelex.core.stuffs.stuff_content` module into individual files per content type (affects imports: `StructuredContent`, `TextContent`, `ImageContent`, `ListContent`, `PDFContent`, `PageContent`, `NumberContent`, `HtmlContent`, `MermaidContent`, `TextAndImagesContent`) + - **BREAKING:** Renamed package `pipelex.pipe_works` to `pipelex.pipe_run` and moved `PipeRunParams` classes into it - Renamed `ConceptProviderAbstract` to `ConceptLibraryAbstract` - Renamed `DomainProviderAbstract` to `DomainLibraryAbstract` - Renamed `PipeProviderAbstract` to `PipeLibraryAbstract` @@ -119,6 +130,8 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Updated README badge URL to point to main branch instead of feature/pipe-builder branch ### Removed + - Removed centralized `pipelex_libraries` folder system and `pipelex init libraries` command + - Removed config path parameters from `Pipelex.make()` (`relative_config_folder_path`, `config_folder_path`, `from_file`) - Removed Gemini 1.5 series models: `gemini-1.5-pro`, `gemini-1.5-flash`, and `gemini-1.5-flash-8b` - Removed `base_templates.toml` file (generic prompts moved to `pipelex.toml`) - Removed `gpt-5-mini` from possible models in pipe-builder diff --git a/concept_structures.md b/concept_structures.md deleted file mode 100644 index 02d468247..000000000 --- a/concept_structures.md +++ /dev/null @@ -1,566 +0,0 @@ -# Defining Concept Structures in Pipelex - -## Introduction - -Pipelex provides a powerful feature that allows you to define structured concepts directly within your `.plx` pipeline files using inline TOML syntax. This eliminates the need to create separate Python files for simple to moderately complex data structures, streamlining your workflow and keeping all pipeline definitions in one place. - -**Quick Example:** - -```plx -domain = "finance" -description = "Financial document processing" - -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" -line_items = { type = "list", item_type = "text", description = "List of items in the invoice", required = false } -``` - -Behind the scenes, Pipelex automatically generates a fully-typed Pydantic model that inherits from `StructuredContent`, giving you structured LLM outputs with validation—all from TOML. - -## How Inline Structures Work - -When you define a concept structure inline, Pipelex performs the following steps automatically: - -1. **Parsing**: The TOML parser reads `[concept.ConceptName.structure]` sections from your `.plx` file -2. **Blueprint Creation**: Each field definition is converted into a `ConceptStructureBlueprint` object that specifies the field's type, description, requirements, and defaults -3. **Code Generation**: The `StructureGenerator` class dynamically generates Python source code for a complete Pydantic class that inherits from `StructuredContent` -4. **Class Creation**: The generated Python code is executed to create an actual class at runtime -5. **Registration**: The new class is automatically registered in Pipelex's `class_registry`, making it available throughout your pipelines - -This entire process is transparent to you—you write TOML, and Pipelex handles the rest. - -## Syntax and Examples - -### Simple Field Definitions - -The simplest way to define a field is with a string description. This creates a required text field: - -```plx -[concept.Person] -description = "Information about a person" - -[concept.Person.structure] -name = "The person's full name" -email = "The person's email address" -``` - -### Detailed Field Definitions - -For more control, use inline tables with explicit field properties: - -```plx -[concept.Employee] -description = "Information about an employee" - -[concept.Employee.structure] -employee_id = { type = "integer", description = "Unique employee identifier", required = true } -name = { type = "text", description = "Employee's full name", required = true } -hire_date = { type = "date", description = "Date of hire", required = true } -department = { type = "text", description = "Department name", required = false, default_value = "General" } -is_active = { type = "boolean", description = "Employment status", required = false, default_value = true } -salary = { type = "number", description = "Annual salary", required = false } -``` - -### Supported Field Types - -Inline structures support the following field types: - -- **text**: String values -- **integer**: Whole numbers -- **boolean**: True/false values -- **number**: Numeric values (integers or floats) -- **date**: Date and datetime values -- **list**: Arrays/lists of items (specify `item_type`) -- **dict**: Dictionary/map structures (specify `key_type` and `value_type`) - -### Field Properties - -Each field can specify: - -- **type**: The data type (required for detailed definitions) -- **description**: Human-readable description of the field -- **required**: Whether the field is mandatory (default: `true`) -- **default_value**: Default value if not provided -- **choices**: For enum-like fields, a list of valid values -- **item_type**: For list fields, the type of list items -- **key_type** and **value_type**: For dict fields, the types of keys and values - -### Complex Type Examples - -**List Fields:** - -```plx -[concept.Project] -description = "A software project" - -[concept.Project.structure] -name = "Project name" -tags = { type = "list", item_type = "text", description = "Project tags", required = false } -team_members = { type = "list", item_type = "text", description = "Team member names", required = true } -``` - -**Dictionary Fields:** - -```plx -[concept.Configuration] -description = "Application configuration" - -[concept.Configuration.structure] -app_name = "Application name" -settings = { type = "dict", key_type = "text", value_type = "text", description = "Configuration settings", required = false } -``` - -**Choice Fields:** - -```plx -[concept.Task] -description = "A task to be completed" - -[concept.Task.structure] -title = "Task title" -priority = { choices = ["low", "medium", "high"], description = "Task priority level", required = true } -status = { choices = ["todo", "in_progress", "done"], description = "Current status", default_value = "todo" } -``` - -### Mixed Syntax Example - -You can mix simple string definitions with detailed inline tables in the same structure: - -```plx -[concept.Article] -description = "A blog article" - -[concept.Article.structure] -title = "The article title" -author = "The author's name" -word_count = { type = "integer", description = "Number of words", required = false } -published_date = { type = "date", description = "Publication date", required = true } -tags = { type = "list", item_type = "text", description = "Article tags", required = false } -is_featured = { type = "boolean", description = "Whether article is featured", default_value = false } -``` - -## Advantages of Inline Structures - -### Rapid Development - -- **Single File**: Keep concepts, structures, and pipes all in one `.plx` file -- **No Context Switching**: No need to jump between `.plx` and `.py` files -- **Quick Iteration**: Modify structures instantly without managing separate Python files - -### Simplicity - -- **Declarative Syntax**: Straightforward TOML that's easy to read and write -- **No Boilerplate**: No need for Python imports, class definitions, or field declarations -- **Automatic Registration**: Generated classes are automatically discovered and registered - -### Type Safety - -- **Pydantic Models**: Behind the scenes, you get full Pydantic v2 models -- **Runtime Validation**: Automatic validation of structured outputs from LLMs -- **Type Hints**: Generated classes include proper type annotations - -### Developer Experience - -- **Less Code to Maintain**: Fewer files, less boilerplate -- **Clear and Readable**: TOML structure definitions are self-documenting -- **Perfect for Prototyping**: Ideal for getting started quickly -- **Good for Simple to Medium Complexity**: Handles most common use cases - -## Current Limitations - -### Concept Refinement Restrictions - -Currently, inline structures can only be used for concepts that: - -- Don't refine other concepts, OR -- Refine native concepts only: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page` - -You cannot use inline structures to refine custom (non-native) concepts. This limitation may be removed in future versions. - -### Feature Constraints - -Inline structures cannot provide: - -- **Custom Methods**: No ability to define methods or computed properties -- **Complex Validation**: No custom validators or cross-field validation logic -- **Advanced Pydantic Features**: - - `@field_validator` decorators - - `@model_validator` decorators - - Custom serializers/deserializers - - `@property` methods - - Class methods or static methods -- **Nested Custom Concepts**: Cannot reference other custom concepts as field types (coming soon - see roadmap below) -- **Inheritance Hierarchies**: Cannot create class inheritance beyond the base `StructuredContent` - -### Tooling Limitations - -- **IDE Support**: Limited autocomplete compared to explicit Python classes -- **Static Type Checking**: Type checkers like `mypy` or `pyright` won't validate inline structures as thoroughly (static code generation coming soon - see roadmap below) -- **Refactoring**: Less IDE refactoring support for inline structures -- **Documentation**: No docstrings or inline documentation beyond descriptions - -## Future Roadmap - -The Pipelex team is actively working on enhancing inline structures with powerful new capabilities: - -### Nested Custom Concepts (Coming Soon) - -Currently, inline structures only support native types and references to native concepts. Soon, you'll be able to reference other custom concepts as field types: - -```plx -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = "Street address" -city = "City name" -postal_code = "Postal or ZIP code" - -[concept.Company] -description = "A company with an address" - -[concept.Company.structure] -name = "Company name" -headquarters = { type = "Address", description = "Company headquarters address", required = true } -``` - -This will enable building complex, nested data models entirely within `.plx` files. - -## When to Use Explicit Python Classes - -While inline structures are convenient, there are scenarios where creating an explicit Python `StructuredContent` class is the better choice. - -### Use Python Classes When You Need: - -#### 1. Complex Validation Logic - -When your data requires custom validation that goes beyond field types: - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - total_amount: float = Field(ge=0, description="Total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - net_amount: float = Field(ge=0, description="Net amount before tax") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v - - @field_validator('net_amount') - @classmethod - def validate_net_amount(cls, v, info): - """Verify net_amount + tax_amount = total_amount.""" - total = info.data.get('total_amount', 0) - tax = info.data.get('tax_amount', 0) - expected = total - tax - if abs(v - expected) > 0.01: # Allow small floating point differences - raise ValueError(f'Net amount should be {expected}, got {v}') - return v -``` - -#### 2. Computed Properties - -When you need derived values or methods: - -```python -from datetime import datetime -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field - -class Subscription(StructuredContent): - """A subscription with computed properties.""" - - start_date: datetime = Field(description="Subscription start date") - end_date: datetime = Field(description="Subscription end date") - monthly_price: float = Field(ge=0, description="Monthly subscription price") - - @property - def duration_days(self) -> int: - """Calculate subscription duration in days.""" - return (self.end_date - self.start_date).days - - @property - def total_cost(self) -> float: - """Calculate total subscription cost.""" - months = self.duration_days / 30.0 - return months * self.monthly_price - - def is_active_on(self, date: datetime) -> bool: - """Check if subscription is active on a given date.""" - return self.start_date <= date <= self.end_date -``` - -#### 3. Reusability Across Domains - -When the structure needs to be shared: - -```python -# shared_models.py -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field - -class Address(StructuredContent): - """A reusable address structure.""" - - street: str = Field(description="Street address") - city: str = Field(description="City name") - state: str = Field(description="State or province") - postal_code: str = Field(description="Postal/ZIP code") - country: str = Field(default="USA", description="Country") - -# Can now be imported and used in multiple domains/projects -``` - -#### 4. Advanced Type Features - -When you need sophisticated typing: - -```python -from typing import Literal -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field - -class ApiResponse(StructuredContent): - """A flexible API response structure.""" - - status: Literal["success", "error", "pending"] - data: dict | None = Field(default=None, description="Response data") - error_message: str | None = Field(default=None, description="Error details if status is error") - metadata: dict[str, str | int | float] = Field(default_factory=dict, description="Additional metadata") -``` - -#### 5. Better Developer Experience - -When your team prefers: - -- Full IDE autocomplete and type hints -- Static type checking with `mypy` or `pyright` -- Comprehensive docstrings -- Explicit, self-documenting code -- Better refactoring support - -## Using AI Agents to Create Python Classes - -One of the major advantages of the modern development workflow is that **AI coding assistants make creating Python classes nearly as fast as writing TOML**. Tools like Cursor AI, GitHub Copilot, and other AI-powered IDEs understand Pipelex patterns and can generate proper `StructuredContent` classes instantly. - -### The Best of Both Worlds - -You don't have to choose between inline structures and Python classes from the start. Instead, follow this pragmatic approach: - -1. **Prototype Fast**: Start with inline structures for rapid development -2. **Validate Quickly**: Test your pipelines and iterate on the structure -3. **Upgrade When Needed**: When complexity grows, convert to Python classes -4. **Let AI Help**: Use AI assistants to generate the Python code automatically - -### Example Workflow - -**Step 1: Start with inline structure** - -```plx -[concept.UserProfile] -description = "A user profile" - -[concept.UserProfile.structure] -username = "The user's username" -email = "The user's email address" -age = { type = "integer", description = "User's age", required = false } -``` - -**Step 2: Run and test your pipeline** - -Iterate quickly, adjusting the structure as needed. - -**Step 3: When you need validation, ask your AI assistant** - -> "Convert this inline UserProfile structure to a Python StructuredContent class with email validation" - -**Step 4: AI generates the class** - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator -import re - -class UserProfile(StructuredContent): - """A user profile with validation.""" - - username: str = Field(description="The user's username") - email: str = Field(description="The user's email address") - age: int | None = Field(default=None, description="User's age") - - @field_validator('email') - @classmethod - def validate_email(cls, v): - """Validate email format.""" - pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' - if not re.match(pattern, v): - raise ValueError('Invalid email format') - return v - - @field_validator('age') - @classmethod - def validate_age(cls, v): - """Validate age range.""" - if v is not None and (v < 0 or v > 150): - raise ValueError('Age must be between 0 and 150') - return v -``` - -**Step 5: Update your .plx file** - -```plx -[concept] -UserProfile = "A user profile" # Structure now defined in Python -``` - -The Python class is automatically discovered and registered by Pipelex. - -### AI Assistant Capabilities - -Modern AI coding assistants can: - -- Generate complete `StructuredContent` classes from descriptions -- Add appropriate validators and validation logic -- Convert inline TOML structures to Python classes -- Suggest improvements and best practices -- Handle complex type annotations -- Add docstrings and documentation -- Follow Pydantic v2 patterns - -This means you get the **development speed of inline structures** with the **power and flexibility of Python classes** when you need them. - -## Migration Path - -### From Inline Structure to Python Class - -Here's a step-by-step guide to migrate from inline structures to explicit Python classes: - -**1. Identify the concept to migrate** - -Let's say you have this inline structure: - -```plx -domain = "ecommerce" - -[concept.Product] -description = "A product in the catalog" - -[concept.Product.structure] -product_id = { type = "integer", description = "Unique product ID", required = true } -name = "Product name" -price = { type = "number", description = "Product price", required = true } -in_stock = { type = "boolean", description = "Stock availability", default_value = true } -categories = { type = "list", item_type = "text", description = "Product categories", required = false } -``` - -**2. Create a Python file for structures** - -Create `ecommerce_struct.py` in your project: - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field - -class Product(StructuredContent): - """A product in the catalog.""" - - product_id: int = Field(description="Unique product ID") - name: str = Field(description="Product name") - price: float = Field(ge=0, description="Product price") - in_stock: bool = Field(default=True, description="Stock availability") - categories: list[str] | None = Field(default=None, description="Product categories") -``` - -**3. Remove the inline structure from .plx** - -Update your `.plx` file: - -```plx -domain = "ecommerce" - -[concept] -Product = "A product in the catalog" - -# Structure section removed - now defined in ecommerce_struct.py -``` - -**4. Verify automatic discovery** - -Pipelex automatically discovers and registers `StructuredContent` classes. No manual registration needed. - -**5. Test your pipeline** - -Run your pipeline to ensure everything works. The behavior should be identical, but now you have the flexibility to add custom logic. - -**6. Add enhancements (optional)** - -Now you can add validators, computed properties, or other Python features: - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Product(StructuredContent): - """A product in the catalog.""" - - product_id: int = Field(description="Unique product ID") - name: str = Field(description="Product name") - price: float = Field(ge=0, description="Product price") - in_stock: bool = Field(default=True, description="Stock availability") - categories: list[str] | None = Field(default=None, description="Product categories") - - @field_validator('price') - @classmethod - def validate_price(cls, v): - """Ensure price is positive and reasonable.""" - if v < 0: - raise ValueError('Price cannot be negative') - if v > 1_000_000: - raise ValueError('Price seems unreasonably high') - return v - - @property - def display_price(self) -> str: - """Format price for display.""" - return f"${self.price:.2f}" -``` - -## Recommendation: Start Simple, Grow as Needed - -The inline structure feature is a **practical solution for the majority of use cases**. It allows you to: - -- Get started quickly without Python overhead -- Keep all pipeline logic in one place -- Iterate rapidly during development -- Still get full type safety and validation - -When your needs grow beyond what inline structures can provide, **explicit Python `StructuredContent` classes offer more power and flexibility**. With AI coding assistants, creating these classes is fast and easy, giving you the best of both worlds. - -**Guidelines:** - -- ✅ **Use inline structures** for straightforward data models -- ✅ **Use inline structures** during prototyping and early development -- ✅ **Use inline structures** for domain-specific models with simple validation -- ✅ **Use Python classes** when you need custom validation logic -- ✅ **Use Python classes** for reusable, shared data models -- ✅ **Use Python classes** when you need computed properties or methods -- ✅ **Use Python classes** for complex type relationships - -Remember: You can always start with inline structures and migrate to Python classes later. The migration is straightforward, and AI assistants can help you make the transition quickly. - diff --git a/pipelex/kit/__init__.py.bak b/pipelex/kit/__init__.py.bak deleted file mode 100644 index 8b1378917..000000000 --- a/pipelex/kit/__init__.py.bak +++ /dev/null @@ -1 +0,0 @@ - From 3ccaea2294fd2700368e7841be5f0e0008302677 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 02:34:41 +0200 Subject: [PATCH 064/115] Docs about pipe_funcs and warnings --- CHANGELOG.md | 3 ++ .../define_your_concepts.md | 5 +++ .../pipe-operators/PipeFunc.md | 44 +++++++++++++++---- .../structuring-concepts.md | 5 +++ .../config-technical/library-config.md | 2 + docs/pages/quick-start/index.md | 3 ++ pipelex/tools/func_registry.py | 7 ++- pipelex/urls.py | 1 + 8 files changed, 59 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb1f569ef..5edaa5257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Added `is_dump_text_prompts_enabled` and `is_dump_response_text_enabled` configuration flags to have the console display everything that goes in and out of the LLMs - Added `generic_templates` section in `llm_config` with structure extraction prompts - Added useful error messages with migration configuration maps pin-pointing the fields to rename for config and plx files + - Added improved error message for `PipeFunc` when function not found in registry, mentioning `@pipe_func()` decorator requirement since v0.12.0 - Added pytest filterwarnings to ignore deprecated class-based config warnings - Added `Flow` class that represents the flow of pipe signatures - Added `pipe-builder` command `flow` to generate flow view from pipeline brief @@ -120,6 +121,8 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Updated `PipelexBundleSpec.to_blueprint()` to sort pipes by dependencies before creating bundle - Changed exception base class from `PipelexError` to `PipelexException` throughout codebase - Updated Makefile pyright target to use `--pythonpath` flag correctly + - Updated PipeFunc documentation to reflect `@pipe_func()` decorator requirement and auto-discovery from anywhere in project + - Added warnings about module-level code execution during auto-discovery to PipeFunc and StructuredContent documentation ### Fixed - Fixed Makefile target `pyright` to use correct pythonpath flag diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md index d1a74b4de..1ca81085b 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/define_your_concepts.md @@ -140,6 +140,11 @@ The model name must match the concept name exactly: `Invoice` concept → `Invoi Python classes are automatically discovered and registered by Pipelex. +!!! warning "Module Execution During Auto-Discovery" + When Pipelex discovers `StructuredContent` classes, it imports the module containing them. **Any code at the module level (outside functions/classes) will be executed during import.** This can have unintended side effects. + + **Best practice:** Keep your `StructuredContent` classes in dedicated modules (e.g., `*_struct.py` files) with minimal module-level code, or ensure module-level code is safe to execute during discovery. + ### With Custom Validation Use Pydantic's validation features for complex rules: diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeFunc.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeFunc.md index 99b40db85..78f779597 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeFunc.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeFunc.md @@ -4,23 +4,25 @@ The `PipeFunc` operator provides an essential escape hatch, allowing you to exec ## How it works -`PipeFunc` operates by calling a Python function that has been automatically registered with Pipelex's central function registry. +`PipeFunc` operates by calling a Python function that has been registered with Pipelex's central function registry. -1. **Automatic Registration**: Functions are automatically discovered and registered from Python files in the `pipelex/libraries/` directory when Pipelex starts up. -2. **Function Signature**: Eligible functions are automatically registered using their function name as the registry key. -3. **Execution**: When the `PipeFunc` pipe is executed, it looks up your function by name and calls it, passing in the current `working_memory`. -4. **Returning Data**: The function returns data, which `PipeFunc` places back into the working memory, associated with the pipe's `output` concept. +1. **Decorator Required**: Functions must be decorated with `@pipe_func()` to be discovered and registered (since v0.12.0). +2. **Automatic Discovery**: Functions with the `@pipe_func()` decorator are automatically discovered from anywhere in your project when Pipelex starts up. +3. **Function Signature**: Eligible functions are registered using their function name (or a custom name) as the registry key. +4. **Execution**: When the `PipeFunc` pipe is executed, it looks up your function by name and calls it, passing in the current `working_memory`. +5. **Returning Data**: The function returns data, which `PipeFunc` places back into the working memory, associated with the pipe's `output` concept. ## Function Eligibility Requirements For a function to be automatically registered and available to `PipeFunc`, it **must** meet all of the following criteria: !!! warning "Function Eligibility Requirements" + - **Must be decorated with** `@pipe_func()` (required since v0.12.0) - **Must be an async function** (defined with `async def`) - **Must have exactly 1 parameter** named `working_memory` - **Parameter type must be** `WorkingMemory` - **Return type must be** a subclass of `StuffContent` (or a generic type like `ListContent[SomeType]`) - - **Must be defined in a Pipelex library file** within the `pipelines/` directory + - **Must be discoverable** (not in excluded directories like `.venv`, `__pycache__`, etc.) ### Return values @@ -30,16 +32,27 @@ Your async Python function can return: ## How to Create a Function -To make a Python function available to `PipeFunc`, simply create it in any Python file within the `pipelex/libraries/` directory structure. +To make a Python function available to `PipeFunc`: + +1. Add the `@pipe_func()` decorator to your function +2. Place the function anywhere in your project (it will be auto-discovered) +3. Ensure it meets all eligibility requirements + +!!! warning "Module Execution During Auto-Discovery" + When Pipelex discovers functions with `@pipe_func()`, it imports the module containing them. **Any code at the module level (outside functions/classes) will be executed during import.** This can have unintended side effects. + + **Best practice:** Keep your `@pipe_func()` functions in dedicated modules with minimal module-level code, or ensure module-level code is safe to execute during discovery. Here is an example of an eligible function: ```python -# in a file like pipelex/libraries/my_custom_functions.py +# in any Python file in your project (e.g., my_project/custom_functions.py) +from pipelex.tools.func_registry import pipe_func from pipelex.core.memory.working_memory import WorkingMemory -from pipelex.core.stuffs.stuff_content import TextContent +from pipelex.core.stuffs.text_content import TextContent +@pipe_func() # Required decorator for auto-discovery async def concatenate_texts(working_memory: WorkingMemory) -> TextContent: """ Retrieves two text stuffs, concatenates them, and returns a new text stuff. @@ -55,6 +68,19 @@ async def concatenate_texts(working_memory: WorkingMemory) -> TextContent: The function will be automatically registered with the name `concatenate_texts` (the function name) when Pipelex starts up. +### Custom Registration Name + +You can optionally specify a custom name for registration: + +```python +@pipe_func(name="custom_concat") +async def concatenate_texts(working_memory: WorkingMemory) -> TextContent: + # Implementation... + pass +``` + +Then use `function_name = "custom_concat"` in your `.plx` file. + ## Configuration Once the function is registered, you can use it in your `.plx` file. diff --git a/docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md b/docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md index 0f0f81aac..19d1c9b60 100644 --- a/docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md +++ b/docs/pages/build-reliable-ai-workflows-with-pipelex/structuring-concepts.md @@ -139,6 +139,11 @@ class Invoice(StructuredContent): Classes inheriting from `StructuredContent` are automatically discovered and registered by Pipelex. +!!! warning "Module Execution During Auto-Discovery" + When Pipelex discovers `StructuredContent` classes, it imports the module containing them. **Any code at the module level (outside functions/classes) will be executed during import.** This can have unintended side effects. + + **Best practice:** Keep your `StructuredContent` classes in dedicated modules (e.g., `*_struct.py` files) with minimal module-level code, or ensure module-level code is safe to execute during discovery. + ### With Custom Validation ```python diff --git a/docs/pages/configuration/config-technical/library-config.md b/docs/pages/configuration/config-technical/library-config.md index e919431d6..220abc872 100644 --- a/docs/pages/configuration/config-technical/library-config.md +++ b/docs/pages/configuration/config-technical/library-config.md @@ -223,6 +223,7 @@ pipelex show pipe YOUR_PIPE_CODE - Use `_struct.py` suffix for files containing structure classes (e.g., `finance_struct.py`) - Inherit from `StructuredContent` or its subclasses - Place structure class files near their corresponding `.plx` files +- **Keep modules clean**: Avoid module-level code that executes on import (Pipelex imports modules during auto-discovery) ### 3. Custom Functions @@ -230,6 +231,7 @@ pipelex show pipe YOUR_PIPE_CODE - Use descriptive function names - Document function parameters and return types - Keep functions focused and testable +- **Keep modules clean**: Avoid module-level code that executes on import (Pipelex imports modules during auto-discovery) ### 4. Validation diff --git a/docs/pages/quick-start/index.md b/docs/pages/quick-start/index.md index 072f9690c..834c31680 100644 --- a/docs/pages/quick-start/index.md +++ b/docs/pages/quick-start/index.md @@ -176,6 +176,9 @@ class Character(StructuredContent): description: str ``` +!!! tip "Keep Structure Files Clean" + Keep your `StructuredContent` classes in dedicated files with minimal module-level code. Pipelex imports these modules during auto-discovery, so any module-level code will be executed. + 💡 **Alternative: Inline Structure Definition** Instead of creating a separate Python file, you can define structures directly in your `.plx` file using TOML syntax: diff --git a/pipelex/tools/func_registry.py b/pipelex/tools/func_registry.py index 5f97b9904..ab8009be7 100644 --- a/pipelex/tools/func_registry.py +++ b/pipelex/tools/func_registry.py @@ -6,6 +6,7 @@ from pydantic import Field, PrivateAttr, RootModel from pipelex.tools.exceptions import ToolException +from pipelex.urls import URLs FUNC_REGISTRY_LOGGER_CHANNEL_NAME = "func_registry" @@ -127,8 +128,10 @@ def get_required_function(self, name: str) -> Callable[..., Any]: """Retrieves a function from the registry by its name. Raises an error if not found.""" if name not in self.root: msg = ( - f"Function '{name}' not found in registry:" - "See how to register a function here: https://docs.pipelex.com/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeFunc" + f"Function '{name}' not found in registry. " + f"Since v0.12.0, custom functions require the @pipe_func() decorator for auto-discovery. " + f"Add @pipe_func() above your function definition. " + f"See: {URLs.pipe_func_docs}" ) raise FuncRegistryError(msg) return self.root[name] diff --git a/pipelex/urls.py b/pipelex/urls.py index d57662830..adec05ebf 100644 --- a/pipelex/urls.py +++ b/pipelex/urls.py @@ -1,2 +1,3 @@ class URLs: discord = "https://go.pipelex.com/discord" + pipe_func_docs = "https://docs.pipelex.com/pages/build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeFunc" From 8fa3f04489c1e0f83e68b4e680cbf91bcae41106 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 02:50:19 +0200 Subject: [PATCH 065/115] Pipe builder docs --- CHANGELOG.md | 4 +- docs/pages/tools/pipe-builder.md | 124 +++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 3 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 docs/pages/tools/pipe-builder.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 5edaa5257..78556d0fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ We tried to group all the renamings we wanted to do which impact our language, s This is all in the spirit of making Pipelex a declarative language, where you express what you want to do, and the system will figure out how to do it. So our focus inwas to make the Pipelex language easier to understand and use for non-technical users, and at the same time use more consistent and obvious words that developers are used to. -**💡 Pro tip:** To make migration easier, pass the [migration guide](pipelex/kit/migrations/migrate_0.11.0_0.12.0.md) to your favorite SWE agent (Cursor, Claude Code, github copilot, etc.) and let it handle the bulk of the changes! +**💡 Pro tip:** To make migration easier, pass the [migration guide](https://github.com/PipelexLab/pipelex/blob/main/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md) to your favorite SWE agent (Cursor, Claude Code, github copilot, etc.) and let it handle the bulk of the changes! - **Removed centralized `pipelex_libraries` folder system** - Pipelines are now auto-discovered from anywhere in your project—no special directory required @@ -23,7 +23,7 @@ This is all in the spirit of making Pipelex a declarative language, where you ex - Custom functions require `@pipe_func()` decorator for auto-discovery - Structure classes auto-discovered (must inherit from `StructuredContent`) - Configuration stays at repository root in `.pipelex/` directory - - See [migration guide](pipelex/kit/migrations/migrate_0.11.0_0.12.0.md) for details on reorganizing your project structure + - See [migration guide](https://github.com/PipelexLab/pipelex/blob/main/pipelex/kit/migrations/migrate_0.11.0_0.12.0.md) for details on reorganizing your project structure - General changes - renamed `definition` fields to `description` across all cases diff --git a/docs/pages/tools/pipe-builder.md b/docs/pages/tools/pipe-builder.md new file mode 100644 index 000000000..1896659e8 --- /dev/null +++ b/docs/pages/tools/pipe-builder.md @@ -0,0 +1,124 @@ +# Pipe Builder + +!!! warning "Beta Feature" + The Pipe Builder is currently in beta and progressing fast. Expect frequent improvements and changes. + +The Pipe Builder is an AI-powered tool that generates Pipelex pipelines from natural language descriptions. It helps you quickly prototype pipelines by describing what you want to achieve, and the builder translates your requirements into working `.plx` files. + +## Overview + +The Pipe Builder uses AI to: + +- Understand your pipeline requirements from a brief description +- Generate domain concepts, pipe specifications, and complete pipeline structure +- Validate the generated pipeline for common errors +- Automatically fix certain deterministic issues + +## Usage + +Generate a pipeline with one validation/fix loop that automatically corrects deterministic issues: + +```bash +pipelex build pipe "Brief description of what the pipeline should do" -o path/to/output.plx +``` + +**Example:** + +```bash +pipelex build pipe "Given an expense report, apply company rules" -o results/expense_pipeline.plx +``` + +This command: + +1. Generates a complete pipeline from your brief +2. Validates the pipeline structure +3. Attempts to automatically fix common errors +4. Saves the final pipeline to the specified path + +## Options + +The build command supports the following options: + +- `--output`, `-o`: Path to save the generated file (default: `./results/generated_pipeline.plx`) +- `--no-output`: Skip saving the file (useful for testing) + +## How It Works + +The Pipe Builder follows this process: + +1. **Analysis**: Analyzes your brief to understand the domain and requirements +2. **Concept Generation**: Creates appropriate domain concepts for your workflow +3. **Pipe Generation**: Generates pipe operators and controllers to implement the logic +4. **Validation**: Validates the generated pipeline structure +5. **Automatic Fixes**: Fixes common errors like missing inputs or incorrect pipe connections + +## Example Use Cases + +**Document Processing:** + +```bash +pipelex build pipe "Take a CV in a PDF file and a Job offer text, and analyze if they match" +``` + +**Data Transformation:** + +```bash +pipelex build pipe "Extract structured data from invoice images" +``` + +**Multi-step Workflows:** + +```bash +pipelex build pipe "Given an RFP PDF, build a compliance matrix" +``` + +## Current Limitations + +The Pipe Builder is in active development and currently: + +- Can automatically fix input/output connection errors +- May require manual adjustments for complex conditional logic or custom functions +- Validation focuses on structural correctness, not business logic + +## Tips for Best Results + +- You can be specific in your brief about inputs, outputs, data formats, or structures if you know what you need +- If you're uncertain about the details, let the AI figure it out and see what it generates +- Include any domain-specific requirements you're aware of upfront + +## Iterating on Generated Pipelines + +After generating a pipeline, you can continue refining it using any Software Engineering (SWE) agent. The generated `.plx` file can be iteratively improved through natural language instructions. + +Pipelex provides specialized agent rules (`write_pipelex.md` and `run_pipelex.md`) that guide AI assistants in working with pipelines. You can install these rules for your preferred AI coding assistant using: + +```bash +pipelex kit rules +``` + +This command installs the rules for: + +- **Cursor** +- **Claude Code** +- **OpenAI Codex** +- **GitHub Copilot** +- **Windsurf** +- **Blackbox AI** + +These rules help AI assistants understand Pipelex syntax, best practices, and common patterns, making it easier to iterate and refine your generated pipelines. + +## Next Steps + +After generating a pipeline: + +1. Review the generated `.plx` file +2. Test it with sample inputs: `pipelex run --input-memory-from-json input.json` +3. Continue iterating using your preferred SWE agent with the Pipelex agent rules +4. Adjust concepts or pipe configurations as needed + +For more information on pipeline structure and customization, see: + +- [Design and Run Pipelines](../build-reliable-ai-workflows-with-pipelex/design_and_run_pipelines.md) +- [Pipe Operators](../build-reliable-ai-workflows-with-pipelex/pipe-operators/index.md) +- [Pipe Controllers](../build-reliable-ai-workflows-with-pipelex/pipe-controllers/index.md) + diff --git a/mkdocs.yml b/mkdocs.yml index 5ac702f8c..281bc256e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -148,6 +148,7 @@ nav: - Changelog: changelog.md - Tools: - CLI: pages/tools/cli.md + - Pipe Builder: pages/tools/pipe-builder.md - Logging: pages/tools/logging.md - Advanced Customizations: - Overview: pages/advanced-customization/index.md From cd2f8f2b6160780595e5e87a89be84731fa6e15a Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 03:14:59 +0200 Subject: [PATCH 066/115] Update Quick Start --- docs/pages/installation/index.md | 197 ++++++++++++++++++---- docs/pages/quick-start/index.md | 269 ++++++++++++++++++++----------- docs/pages/tools/kit.md | 136 ++++++++++++++++ mkdocs.yml | 3 +- 4 files changed, 485 insertions(+), 120 deletions(-) create mode 100644 docs/pages/tools/kit.md diff --git a/docs/pages/installation/index.md b/docs/pages/installation/index.md index f47f0e9d5..480262e9b 100644 --- a/docs/pages/installation/index.md +++ b/docs/pages/installation/index.md @@ -1,52 +1,164 @@ -# Installation +# Installation & Configuration -## Prerequisites - -Pipelex requires `python` version `3.10` or above, and access to an LLM, via an API key or a custom endpoint. +## Installation -## Getting Started +Install Pipelex with pip: -Along with our [Quick Start Guide](../quick-start/index.md), we recommend you check out our [Cookbook](https://github.com/Pipelex/pipelex-cookbook) for practical examples. +```bash +pip install pipelex +``` -- **Create a virtual environment** (recommended) +Or use [uv](https://github.com/astral-sh/uv) for faster installs: ```bash -python3 -m venv .venv && source .venv/bin/activate +uv pip install pipelex ``` - - **Install Pipelex** +That's it! Pipelex is now installed. Continue to the [Quick-start Guide](../quick-start/index.md) to generate your first pipeline. + +--- + +## API Configuration + +To run pipelines with LLMs, you need to configure API access. **You have three options** - choose what works best for you: + +### Option 1: Pipelex Inference (Easiest for Getting Started) + +Get **free credits** for testing and development with a single API key that works with all major LLM providers: + +**Benefits:** + +- No credit card required +- Access to OpenAI, Anthropic, Google, Mistral, and more +- Perfect for development and testing +- Single API key for all models + +**Setup:** + +1. Join our Discord community to get your free API key: + - Visit [https://go.pipelex.com/discord](https://go.pipelex.com/discord) + - Request your key in the appropriate channel + +2. Create a `.env` file in your project root: + ```bash + echo "PIPELEX_INFERENCE_API_KEY=your-key-here" > .env + ``` -Pipelex can be installed from PyPI. We encourage the use of [uv](https://github.com/astral-sh/uv) for faster installs and dependency management: +That's it! Your pipelines can now access any supported LLM. + +### Option 2: Bring Your Own API Keys + +Use your existing API keys from LLM providers. This is ideal if you: + +- Already have API keys from providers +- Need to use specific accounts for billing +- Have negotiated rates or enterprise agreements + +**Setup:** + +Create a `.env` file in your project root with your provider keys: ```bash -uv pip install pipelex +# OpenAI +OPENAI_API_KEY=sk-... + +# Anthropic +ANTHROPIC_API_KEY=sk-ant-... + +# Google +GOOGLE_API_KEY=... + +# Mistral +MISTRAL_API_KEY=... + +# FAL (for image generation) +FAL_API_KEY=... + +# XAI +XAI_API_KEY=... + +# Azure OpenAI +AZURE_API_KEY=... +AZURE_API_BASE=... +AZURE_API_VERSION=... + +# AWS Bedrock +AWS_ACCESS_KEY_ID=... +AWS_SECRET_ACCESS_KEY=... +AWS_REGION=... ``` -Otherwise use pip: -```bash -pip install pipelex +You only need to add keys for the providers you plan to use. + +**Configure the Inference Backend:** + +When using your own keys, you need to tell Pipelex which backends to enable: + +1. Initialize your configuration: + ```bash + pipelex init config + ``` + +2. Edit `.pipelex/inference/backends.toml` to enable your providers + +For example, to use Google models: + +```toml +[google] +enabled = true ``` -- **Make sure you have a .env** file at the root of your project that contains the following fields +Learn more in our [Inference Backend Configuration](../configuration/config-technical/inference-backend-config.md) guide. + +### Option 3: Local AI (No API Keys Required) + +Run AI models locally without any API keys. This is perfect if you: + +- Want complete privacy and control +- Have capable hardware (GPU recommended) +- Need offline capabilities +- Want to avoid API costs + +**Supported Local Options:** + +**Ollama** (Recommended): + +1. Install [Ollama](https://ollama.ai/) +2. Pull a model: `ollama pull llama2` +3. No API key needed! Configure Ollama backend in `.pipelex/inference/backends.toml` + +**Other Local Providers:** + +- **vLLM**: High-performance inference server +- **LM Studio**: User-friendly local model interface +- **llama.cpp**: Lightweight C++ inference + +Configure these in `.pipelex/inference/backends.toml`. See our [Inference Backend Configuration](../configuration/config-technical/inference-backend-config.md) for details. + +--- + +## Initialize Configuration + +To set up Pipelex configuration files, run: ```bash -OPENAI_API_KEY=sk_... +pipelex init config ``` -All the secret keys used by `pipelex` are specified in the `.env.example` file. However, by default, only the `OPENAI_API_KEY` is required. - +This creates a `.pipelex/` directory with: -- **Initialize configuration:** +- `pipelex.toml`: Feature flags, logging, cost reporting +- `inference/`: LLM configuration and model presets -To set up the Pipelex configuration files, run this command at the root of your project: +Learn more in our [Configuration documentation](../configuration/index.md). -- `pipelex init config`: This CLI command will create a `.pipelex/` directory with configuration files including `pipelex.toml`. This configuration file contains settings for feature flags, logging, cost reporting, and more. Learn more in our [Configuration documentation](../configuration/index.md) +--- -- **Create your pipelines:** +## Project Organization -You can now create `.plx` pipeline files **anywhere** in your project. Pipelex automatically discovers them (excluding `.venv`, `.git`, `node_modules`, etc.). +Pipelex automatically discovers `.plx` pipeline files anywhere in your project (excluding `.venv`, `.git`, `node_modules`, etc.). -**Keep pipelines with related code** - that's usually the best organization: +**Recommended: Keep pipelines with related code** ```bash your_project/ @@ -59,15 +171,42 @@ your_project/ │ ├── services.py │ ├── contracts.plx # Pipeline with legal code │ └── contracts_struct.py -├── .pipelex/ # Config at repo root (created by init config) +├── .pipelex/ # Config at repo root │ └── pipelex.toml +├── .env # API keys (git-ignored) └── requirements.txt ``` -Or centralize if you prefer: `my_project/pipelines/*.plx` +**Alternative: Centralize pipelines** + +```bash +your_project/ +├── pipelines/ +│ ├── invoices.plx +│ ├── contracts.plx +│ └── structures.py +└── .pipelex/ + └── pipelex.toml +``` + +Learn more in our [Project Structure documentation](../build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md). + +--- + +## Prerequisites + +- **Python**: Version 3.10 or above +- **API Access**: One of the three options above (Pipelex Inference, your own keys, or local AI) + +--- + +## Next Steps -Learn more about flexible organization in our [Project Structure documentation](../build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md) +Now that you have Pipelex installed and configured: +1. **Start generating pipelines**: [Quick-start Guide](../quick-start/index.md) +2. **Explore examples**: [Cookbook Repository](https://github.com/Pipelex/pipelex-cookbook) +3. **Learn the concepts**: [The Pipelex Paradigm](../pipelex-paradigm-for-repeatable-ai-workflows/index.md) +4. **Deep dive**: [Build Reliable AI Workflows](../build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md) -💡 _Any troubles? Have a look at our [Cookbook](https://github.com/Pipelex/pipelex-cookbook)! - +💡 Need help? Check out our [Cookbook](https://github.com/Pipelex/pipelex-cookbook) for practical examples! diff --git a/docs/pages/quick-start/index.md b/docs/pages/quick-start/index.md index 834c31680..7a6ea8258 100644 --- a/docs/pages/quick-start/index.md +++ b/docs/pages/quick-start/index.md @@ -1,78 +1,139 @@ # Quick-start -This guide shows the basics of Pipelex for the simplest use-cases: LLM calling and structured outputs. +Welcome to Pipelex! This guide will get you started building AI workflows in minutes. -You can **find more powerful examples** in the [Cookbook Examples](../cookbook-examples/index.md) section of the docs or dive directly into the [Cookbook repository](https://github.com/Pipelex/pipelex-cookbook). [![GitHub](https://img.shields.io/badge/Cookbook-5a0dad?logo=github&logoColor=white&style=flat)](https://github.com/Pipelex/pipelex-cookbook/) +## What is Pipelex? + +Pipelex is an open-source Python framework for building **repeatable AI workflows**. Instead of cramming everything into one complex prompt, you break tasks into focused steps—each pipe handling one clear transformation. These workflows process information from **extraction** to **analysis** to **decision making**. The result? **Deterministic structure with adaptive intelligence**: the reliability of software with the flexibility of AI. + +Each pipe processes information: it takes structured inputs and produces structured outputs. Pipelex uses **Concepts** (typing with meaning) to ensure your pipelines make sense. A "non-compete clause" and a "flower description" are both text, but they represent different concepts. Pipelex validates that pipes connect properly, catching errors before they happen. + +The `.plx` language is simple and readable—even for non-technical users. You can generate pipelines with AI or write them yourself. + +Learn more about the philosophy in [The Pipelex Paradigm](../pipelex-paradigm-for-repeatable-ai-workflows/index.md). --- -## Setting up API Keys +## Installation + +Install Pipelex with pip: + +```bash +pip install pipelex +``` + +That's it! Now let's generate your first pipeline. + +--- + +## Generate Your First Pipeline with AI + +The fastest way to get started is to generate pipelines using natural language. Pipelex can create complete, multi-step workflows for you. + +### Step 1: Generate with Pipe Builder + +The Pipe Builder transforms natural language descriptions into working `.plx` files. Here are some real-world examples: + +**Example 1: Expense Report Validation** + +```bash +pipelex build pipe "Given an expense report, apply company rules" -o expense_pipeline.plx +``` + +**Example 2: CV/Job Matching** + +```bash +pipelex build pipe "Take a CV in a PDF file, a Job offer text, and analyze if they match" -o cv_matcher.plx +``` + +**Example 3: Compliance Analysis** + +```bash +pipelex build pipe "Given an RFP PDF, build a compliance matrix" -o compliance_pipeline.plx +``` + +These commands generate complete `.plx` files containing: + +- Domain definitions +- Concept declarations +- Multi-step pipe workflows + +The Pipe Builder handles complexity automatically, creating pipelines with proper structure and validation. + +!!! info "Pipe Builder is in Beta" + The Pipe Builder is currently in beta and improving rapidly. Expect frequent enhancements! + +### Step 2: Install Agent Rules for Iteration + +Once you have a generated pipeline, you can refine it using any AI coding assistant: + +```bash +pipelex kit rules +``` + +This command installs Pipelex rules for: -Before you can make LLM calls with Pipelex, you need to configure API keys. You have two options: +- **Cursor** +- **Claude Code** +- **OpenAI Codex** +- **GitHub Copilot** +- **Windsurf** +- **Blackbox AI** -### Option 1: Use Pipelex Inference (Recommended for Getting Started) +These rules teach your AI assistant Pipelex syntax, concepts, and best practices. -Get **free access** to all well-known commercial and open-source LLMs with a single API key: +### Step 3: Iterate with Your AI Assistant -1. **Join our Discord community to get your free Pipelex Inference key** (no credit card required, limited time offer) - - Visit [https://go.pipelex.com/discord](https://go.pipelex.com/discord) to join - - Request your API key in the appropriate channel once you're in +With the rules installed, use natural language to refine your pipeline: -2. **Set up your environment**: - ```bash - # Create a .env file in your project root - echo "PIPELEX_INFERENCE_API_KEY=your-key-here" > .env - ``` +- "Add a validation step to check for negative amounts" +- "Include confidence scores in the match analysis" +- "Add error handling for missing PDF files" +- "Create a summary report at the end" -With Pipelex Inference, you get instant access to models from OpenAI, Anthropic, Google, Mistral, and more - all through a single API key! +Your AI assistant will understand Pipelex and make the changes correctly. -### Option 2: Use Your Own API Keys +### Step 4: Test Your Pipeline -If you already have API keys from LLM providers, you can use them directly: +Run your pipeline from the command line or Python: + +**CLI:** ```bash -# Add to your .env file -# To directly use models on OpenAI, you will need to set the following variable -OPENAI_API_KEY=your-openai-key -# To directly use models on Anthropic, you will need to set the following variable -ANTHROPIC_API_KEY=your-anthropic-key -# To directly use models on Google, you will need to set the following variable -GOOGLE_API_KEY=your-google-key -# To directly use models on Mistral, you will need to set the following variable -MISTRAL_API_KEY=your-mistral-key -# To directly use models on FAL, you will need to set the following variable -FAL_API_KEY=your-fal-key -# To directly use models on XAI, you will need to set the following variable -XAI_API_KEY=your-xai-key - -# To use models via Ollama, you will need to set the following variables -OLLAMA_API_KEY=your-ollama-key -# To use models via BlackboxAI, you will need to set the following variables -BLACKBOX_API_KEY=your-blackboxai-key -# To use models via Azure OpenAI, you will need to set the following variables -AZURE_API_KEY=your-azure-key -AZURE_API_BASE=your-azure-endpoint -AZURE_API_VERSION=your-azure-version -# To use models via AWS Bedrock, you will need to set the following variables -AWS_ACCESS_KEY_ID=your-aws-key -AWS_SECRET_ACCESS_KEY=your-aws-secret -AWS_REGION=your-aws-region +pipelex run --input-memory-from-json input.json ``` -Adding those env variables is not enough. You also need to configure the inference backend to choose where to route the AI calls. -See the [Inference Backend Configuration](../configuration/config-technical/inference-backend-config.md#inference-backends) documentation. +**Python:** + +```python +import asyncio +from pipelex.pipeline.execute import execute_pipeline +from pipelex.pipelex import Pipelex + +async def run_pipeline(): + pipe_output = await execute_pipeline(pipe_code="your_pipe_code") + print(pipe_output.main_stuff_as_str) + +Pipelex.make() +asyncio.run(run_pipeline()) +``` + +!!! tip "Need API Access?" + To run these pipelines with LLMs, you'll need API access. We offer **free credits** for testing and developing, or you can bring your own keys, or run local AI. See the [Installation & Configuration](../installation/index.md#api-configuration) guide for all options. -For example, if you want to use a gemini model via GOOGLE, enable the google backend in `.pipelex/inference/backends.toml` and set the API key in your env. --- -## Your first LLM call with Pipelex +## Understanding How It Works + +Ready to dive deeper? This section shows you how to manually create pipelines and understand the `.plx` language. + +### Your First LLM Call with Pipelex -Let's start by running your very first LLM call using Pipelex. -For illustration purposes, let's build **a character generator**. Each example relies on asynchronous execution and typed models for reliable prompts. +Let's build a **character generator** to understand the basics. -### Write your first pipeline +#### Write Your First Pipeline -Create a `.plx` file to store your pipe definition. You can place it anywhere in your project - we recommend creating a `pipelines` directory for organization. +Create a `.plx` file anywhere in your project (we recommend a `pipelines` directory): `character.plx` ```plx @@ -87,9 +148,16 @@ prompt = """You are a book writer. Your task is to create a character. Think of it and then output the character description.""" ``` -### Run your first Pipelex script +This pipeline: -Now, create a `.py` python file to run your script. You can save it anywhere in your repository. +- Declares a `characters` domain +- Defines a `create_character` pipe of type `PipeLLM` +- Outputs plain `Text` +- Uses a simple prompt + +#### Run Your First Pipelex Script + +Create a Python file to execute the pipeline: `character.py` ```python @@ -112,7 +180,7 @@ Pipelex.make() asyncio.run(create_character()) ``` -### Get your first Pipelex result +#### Get Your First Pipelex Result ```bash python character.py @@ -120,9 +188,9 @@ python character.py ![Example of a generated character sheet](character_sheet.png) -## How to use a specific LLM or LLM provider +### Using Specific LLMs -### Indicate your LLM selection explicitly using the `llm` attribute +#### Indicate Your LLM Selection Explicitly ```plx [pipe.create_character] @@ -134,7 +202,7 @@ prompt = """You are a book writer. Your task is to create a character. Think of it and then output the character description.""" ``` -### Or use an LLM preset from the LLM deck +#### Or Use an LLM Preset from the LLM Deck ```plx [pipe.create_character] @@ -150,18 +218,17 @@ Think of it and then output the character description.""" # it's a base preset that we provide. you can add your own presets, too. ``` -💡 We have a lot of [LLM presets available by default](https://github.com/Pipelex/pipelex/tree/main/.pipelex/inference/deck/base_deck.toml). -Make sure you have credentials for the underlying LLM provider (and added your API key to the `.env`) and select the one you want! +💡 We have many [LLM presets available by default](https://github.com/Pipelex/pipelex/tree/main/.pipelex/inference/deck/base_deck.toml). -Learn more about LLM presets, LLM handles and LLM deck in our [LLM Configuration Guide](../build-reliable-ai-workflows-with-pipelex/configure-ai-llm-to-optimize-workflows.md) +Learn more in our [LLM Configuration Guide](../build-reliable-ai-workflows-with-pipelex/configure-ai-llm-to-optimize-workflows.md). -### Generate a structured output +### Generate Structured Outputs -Let's say that we no longer want plain text as output but a rigorously structured Character object. +Let's create a rigorously structured `Character` object instead of plain text. -### Define the model +#### Define the Structure -Using the [Pydantic BaseModel](https://docs.pydantic.dev/latest/) syntax, define your object structure as a Python class in your project: +Using [Pydantic BaseModel](https://docs.pydantic.dev/latest/) syntax: `characters.py` ```python @@ -181,7 +248,7 @@ class Character(StructuredContent): 💡 **Alternative: Inline Structure Definition** -Instead of creating a separate Python file, you can define structures directly in your `.plx` file using TOML syntax: +Define structures directly in your `.plx` file: ```plx [concept.Character] @@ -194,20 +261,18 @@ gender = "The character's gender" description = "A description of the character" ``` -Learn more about inline structures and when to use them in [Structuring Concepts](../build-reliable-ai-workflows-with-pipelex/structuring-concepts.md). - -### Improve the pipeline +Learn more in [Structuring Concepts](../build-reliable-ai-workflows-with-pipelex/structuring-concepts.md). -It's time to specify that your output be a `Character` instance. Use the `output` field for that purpose. +#### Improve the Pipeline -💡 Here, the concept name matches the class name (ie. `Character`), the `Character` class will automatically be considered as the structure to output. +Specify that your output is a `Character` instance: `characters.plx` ```plx domain = "characters" [concept] -Character = "A character is a fiction story" # <- Define here your output concept so that it is linked to the class name +Character = "A character in a fiction story" # <- Define your output concept [pipe] [pipe.create_character] @@ -218,25 +283,23 @@ prompt = """You are a book writer. Your task is to create a character. Think of it and then output the character description.""" ``` -💡 Defining the `Character` concept as "A character is a fiction story" might seem obvious but… think of it: "character" can also mean a letter or symbol in a text. Defining concepts is the best way to avoid any ambiguity and make sure the LLMs understand what you mean. +💡 The concept name matches the class name (`Character`), so Pipelex automatically links them. -### Run your pipeline +💡 Defining concepts removes ambiguity—"character" could mean a letter or symbol, but here it clearly means a fictional person. -As you can see, the output is a `Character` instance. - -![Example of a generated character sheet with structure in JSON](structured_character_sheet_json.png) +#### Run Your Pipeline +The output is now a structured `Character` instance: -## Generate using information in a prompt template +![Example of a generated character sheet with structure in JSON](structured_character_sheet_json.png) -What if you want to pass some data into a prompt? -You can do that using a prompt template. +### Using Prompt Templates -In this example, we no longer want to generate characters. We want to process existing ones, especially their description attributes. +Pass data into prompts using templates. -We want to extract structured information from the description field. Thus we have a `Character` input and a `CharacterMetadata` output. +Let's process existing characters and extract metadata from their descriptions. -### Define the output structure +#### Define the Output Structure ```python # character_model.py @@ -257,9 +320,9 @@ class CharacterMetadata(StructuredContent): height: float ``` -### **Let's use a template to fill prompts with data** +#### Use a Template to Fill Prompts with Data -💡 Our template syntax is based on [Jinja2 syntax](https://jinja.palletsprojects.com/en/stable/). You can include a variable using the **classic** `{{ double.curly.braces }}`, and to make it simpler, we've added the possibility to just prefix your variable with the `@` symbol (recommended). Pipes declare their required inputs explicitly with the `inputs` table: +💡 Our template syntax is based on [Jinja2](https://jinja.palletsprojects.com/en/stable/). Use `{{ double.curly.braces }}` or the simpler `@` prefix (recommended). ```plx [concept] @@ -270,7 +333,7 @@ CharacterMetadata = "Metadata regarding a character." [pipe.extract_character_1] type = "PipeLLM" description = "Get character information from a description." -inputs = { character = "Character" } # <- These are the inputs of your pipe, usable in the prompt_template +inputs = { character = "Character" } # <- These inputs are usable in the prompt output = "CharacterMetadata" prompt = """ You are given a text description of a character. @@ -280,11 +343,11 @@ Your task is to extract specific data from the following description. """ ``` -💡 `@character.description` is substituted by grabbing the stuff named `character`in the working memory and using its `description`attribute +💡 `@character.description` grabs the `character` stuff from working memory and uses its `description` attribute. -Learn more about how we use Jinja in the [PipeLLM documentation](../build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md). +Learn more about Jinja in the [PipeLLM documentation](../build-reliable-ai-workflows-with-pipelex/pipe-operators/PipeLLM.md). -### **This is how you do it from the code side** +#### Execute from Python ```python import asyncio @@ -328,7 +391,7 @@ async def process_existing_character(): working_memory=working_memory, ) - # Get the result as a porperly typed instance + # Get the result as a properly typed instance extracted_metadata = pipe_output.main_stuff_as(content_type=CharacterMetadata) # <- This is the output of your pipe, properly typed print(extracted_metadata) @@ -338,6 +401,32 @@ Pipelex.make() asyncio.run(process_existing_character()) ``` -### **Get result** +#### Get Result ![Example of extracted character metadata](extracted_character_metadata.png) + +--- + +## Next Steps + +Now that you understand the basics, explore more: + +**Learn More:** + +- [Cookbook Examples](../cookbook-examples/index.md) - Real-world examples and patterns +- [Build Reliable AI Workflows](../build-reliable-ai-workflows-with-pipelex/kick-off-a-knowledge-pipeline-project.md) - Deep dive into pipeline design +- [Pipe Operators](../build-reliable-ai-workflows-with-pipelex/pipe-operators/index.md) - PipeLLM, PipeExtract, PipeCompose, and more +- [Pipe Controllers](../build-reliable-ai-workflows-with-pipelex/pipe-controllers/index.md) - PipeSequence, PipeParallel, PipeBatch, PipeCondition + +**Explore Tools:** + +- [Pipe Builder](../tools/pipe-builder.md) - Generate pipelines from natural language +- [Kit Commands](../tools/kit.md) - Manage agent rules and migrations +- [CLI Commands](../tools/cli.md) - Command-line interface reference + +**Configure:** + +- [LLM Configuration](../build-reliable-ai-workflows-with-pipelex/configure-ai-llm-to-optimize-workflows.md) - Optimize cost and quality +- [Inference Backend](../configuration/config-technical/inference-backend-config.md) - Configure model providers + +[![Cookbook](https://img.shields.io/badge/Cookbook-5a0dad?logo=github&logoColor=white&style=flat)](https://github.com/Pipelex/pipelex-cookbook/) diff --git a/docs/pages/tools/kit.md b/docs/pages/tools/kit.md new file mode 100644 index 000000000..7787ad8f9 --- /dev/null +++ b/docs/pages/tools/kit.md @@ -0,0 +1,136 @@ +# Pipelex Kit Commands + +The Pipelex Kit provides commands for managing agent rules and migration instructions. These commands help you integrate Pipelex guidelines into your AI coding assistants and keep track of breaking changes across versions. + +## Available Commands + +### Install Agent Rules + +Install Pipelex agent rules for AI coding assistants: + +```bash +pipelex kit rules +``` + +This command: + +1. Exports agent markdown files to Cursor `.mdc` files with YAML front-matter in `.cursor/rules` +2. Builds merged agent documentation and updates target files for other AI assistants + +**Supported AI Assistants:** + +- **Cursor** (`.cursor/rules/`) +- **Claude Code** (`CLAUDE.md`) +- **OpenAI Codex** (`AGENTS.md`) +- **GitHub Copilot** (`.github/copilot-instructions.md`) +- **Windsurf** (`.windsurfrules.md`) +- **Blackbox AI** (`BLACKBOX_RULES.md`) + +**Options:** + +- `--repo-root PATH`: Repository root directory (default: current directory) +- `--cursor/--no-cursor`: Export Cursor rules (default: enabled) +- `--single-files/--no-single-files`: Update single-file agent documentation targets (default: enabled) +- `--dry-run`: Show what would be done without making changes +- `--diff`: Show unified diff of changes +- `--backup SUFFIX`: Create backups with the specified suffix (e.g., `.bak`) + +**Examples:** + +```bash +# Install rules for all supported AI assistants +pipelex kit rules + +# Preview changes without applying them +pipelex kit rules --dry-run --diff + +# Install only Cursor rules +pipelex kit rules --no-single-files + +# Create backups before updating +pipelex kit rules --backup .bak +``` + +### Remove Agent Rules + +Remove Pipelex agent rules from your project: + +```bash +pipelex kit remove-rules +``` + +This command: + +1. Deletes agent markdown files from Cursor `.mdc` files in `.cursor/rules` +2. Removes marked sections from target files (or deletes entire files with `--delete-files`) + +**Options:** + +- `--repo-root PATH`: Repository root directory (default: current directory) +- `--cursor/--no-cursor`: Remove Cursor rules (default: enabled) +- `--single-files/--no-single-files`: Remove agent documentation from target files (default: enabled) +- `--delete-files`: Delete entire target files instead of just removing marked sections +- `--dry-run`: Show what would be done without making changes +- `--diff`: Show unified diff of changes +- `--backup SUFFIX`: Create backups with the specified suffix (e.g., `.bak`) + +**Examples:** + +```bash +# Remove all agent rules +pipelex kit remove-rules + +# Preview what would be removed +pipelex kit remove-rules --dry-run + +# Remove only Cursor rules +pipelex kit remove-rules --no-single-files + +# Delete entire target files +pipelex kit remove-rules --delete-files +``` + +### Sync Migration Instructions + +Sync migration instructions from the Pipelex kit to your project: + +```bash +pipelex kit migrations +``` + +This command copies migration documentation files from the `pipelex.kit` package to your project's `.pipelex/migrations` directory. These files provide detailed instructions for migrating between Pipelex versions. + +**Options:** + +- `--repo-root PATH`: Repository root directory (default: current directory) +- `--dry-run`: Show what would be done without making changes + +**Examples:** + +```bash +# Sync migration instructions +pipelex kit migrations + +# Preview what would be copied +pipelex kit migrations --dry-run +``` + +## Agent Rules Overview + +The agent rules installed by `pipelex kit rules` include: + +- **`write_pipelex.md`**: Guidelines for writing Pipelex pipelines, including syntax, best practices, and common patterns +- **`run_pipelex.md`**: Guidelines for running and testing Pipelex pipelines +- **`llms.md`**: LLM configuration and usage guidelines +- **`python_standards.md`**: Python coding standards and best practices +- **`docs.md`**: Guidelines for writing documentation +- **`pytest_standards.md`**: Guidelines for writing unit tests +- **`tdd.md`**: Test-driven development guidelines + +These rules are configured in `pipelex/kit/index.toml`. + +## Related Documentation + +- [Pipe Builder](pipe-builder.md) - Generate pipelines from natural language +- [CLI Commands](cli.md) - General Pipelex CLI documentation + diff --git a/mkdocs.yml b/mkdocs.yml index 281bc256e..a400b27e0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -85,8 +85,8 @@ markdown_extensions: nav: - Home: - Welcome: index.md - - Installation: pages/installation/index.md - Quick-start: pages/quick-start/index.md + - Installation: pages/installation/index.md - Understand Pipelex: - The Knowledge Pipeline Manifesto: manifesto.md - The Pipelex Paradigm: pages/pipelex-paradigm-for-repeatable-ai-workflows/index.md @@ -148,6 +148,7 @@ nav: - Changelog: changelog.md - Tools: - CLI: pages/tools/cli.md + - Kit Commands: pages/tools/kit.md - Pipe Builder: pages/tools/pipe-builder.md - Logging: pages/tools/logging.md - Advanced Customizations: From b67502b8ce60230d6b7333914e4ede3e57608fa4 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 03:25:15 +0200 Subject: [PATCH 067/115] cookbook docs --- docs/pages/cookbook-examples/extract-dpe.md | 24 ++++++------ docs/pages/cookbook-examples/extract-gantt.md | 20 ++++------ .../cookbook-examples/extract-generic.md | 11 ++---- .../extract-proof-of-purchase.md | 13 +++---- docs/pages/cookbook-examples/extract-table.md | 12 +++--- docs/pages/cookbook-examples/hello-world.md | 8 ++-- .../cookbook-examples/invoice-extractor.md | 35 +++++++++-------- docs/pages/cookbook-examples/simple-ocr.md | 17 ++++----- .../cookbook-examples/write-screenplay.md | 20 ++++------ docs/pages/cookbook-examples/write-tweet.md | 38 +++++++------------ 10 files changed, 85 insertions(+), 113 deletions(-) diff --git a/docs/pages/cookbook-examples/extract-dpe.md b/docs/pages/cookbook-examples/extract-dpe.md index 297670c5f..4af51b595 100644 --- a/docs/pages/cookbook-examples/extract-dpe.md +++ b/docs/pages/cookbook-examples/extract-dpe.md @@ -12,14 +12,11 @@ The pipeline `power_extractor_dpe` is designed to recognize and extract the key ```python async def extract_dpe(pdf_url: str) -> Dpe: - working_memory = WorkingMemoryFactory.make_from_pdf( - pdf_url=pdf_url, - concept_string="PDF", - name="pdf", - ) pipe_output = await execute_pipeline( pipe_code="power_extractor_dpe", - working_memory=working_memory, + input_memory={ + "document": PDFContent(url=pdf_url), + }, ) working_memory = pipe_output.working_memory dpe: Dpe = working_memory.get_list_stuff_first_item_as(name="dpe", item_type=Dpe) @@ -62,21 +59,26 @@ The pipeline uses a `PipeLLM` with a very specific prompt to extract the informa [pipe.write_markdown_from_page_content_dpe] type = "PipeLLM" description = "Write markdown from page content of a 'Diagnostic de Performance Energetique'" -inputs = { page_content = "Page" } -output = "Dpe" # The output is structured as a Dpe object +inputs = { "page_content.page_view" = "Image", page_content = "Page" } +output = "Dpe" model = "llm_for_img_to_text" structuring_method = "preliminary_text" -system_prompt = """You are a multimodal LLM, expert in converting images into perfect markdown.""" +system_prompt = """You are a multimodal LLM, expert at converting images into perfect markdown.""" prompt = """ -You are given an image of a French 'Diagnostic de Performance Energetique'. +You are given an image of a French 'Diagnostic de Performance Energetique': $page_content.page_view Your role is to convert the image into perfect markdown. To help you do so, you are given the text extracted from the page by an OCR model. @page_content.text_and_images.text.text - It is very important that you collect every element, especially if they are related to the energy performance of the building. +- Pay attention to all the pieces of information that may be included in images, graphs, charts, or tables. - We value letters like "A, B, C, D, E, F, G" as they are energy performance classes. -# ... (prompt continues) +- Pay attention to the text alignment, it might have been misaligned by the OCR. +- The OCR extraction may be highly incomplete. It is your job to complete the text and add the missing information using the image. +- Output only the markdown, nothing else. No need for "```markdown" or "```". +- You can use HTML if it helps you. +- You can use tables if it is relevant. """ ``` This is a great example of how to create a highly specialized extraction pipeline by combining a custom data model with a detailed, guiding prompt. \ No newline at end of file diff --git a/docs/pages/cookbook-examples/extract-gantt.md b/docs/pages/cookbook-examples/extract-gantt.md index b18b03856..9d8a5f99e 100644 --- a/docs/pages/cookbook-examples/extract-gantt.md +++ b/docs/pages/cookbook-examples/extract-gantt.md @@ -12,19 +12,16 @@ The pipeline takes an image as input, creates a working memory, and then execute ```python async def extract_gantt(image_url: str) -> GanttChart: - # Create Working Memory - working_memory = WorkingMemoryFactory.make_from_image( - image_url=image_url, - concept_string="gantt.GanttImage", - name="gantt_chart_image", - ) - # Run the pipe pipe_output = await execute_pipeline( pipe_code="extract_gantt_by_steps", - working_memory=working_memory, + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttChartImage", + "content": ImageContent(url=image_url), + } + }, ) - # Output the result return pipe_output.main_stuff_as(content_type=GanttChart) ``` @@ -74,16 +71,15 @@ steps = [ { pipe = "gather_in_a_gantt_chart", result = "gantt_chart" }, ] -# This is the pipe that extracts the details for a single task [pipe.extract_details_of_task] type = "PipeLLM" description = "Extract the precise dates of the task, start_date and end_date" inputs = { gantt_chart_image = "GanttChartImage", gantt_timescale = "GanttTimescaleDescription", gantt_task_name = "GanttTaskName" } -output = "GanttTaskDetails" # The output is structured as a GanttTaskDetails object +output = "GanttTaskDetails" structuring_method = "preliminary_text" model = "llm_to_extract_diagram" prompt = """ -I am sharing an image of a Gantt chart. +I am sharing an image of a Gantt chart: $gantt_chart_image. Please analyse the image and for a given task name (and only this task), extract the information of the task, if relevant. Be careful, the time unit is this: diff --git a/docs/pages/cookbook-examples/extract-generic.md b/docs/pages/cookbook-examples/extract-generic.md index a08e5e586..e62da82cc 100644 --- a/docs/pages/cookbook-examples/extract-generic.md +++ b/docs/pages/cookbook-examples/extract-generic.md @@ -12,14 +12,11 @@ The `power_extractor` pipeline is at the heart of this example. After its execut ```python async def extract_generic(pdf_url: str) -> TextAndImagesContent: - working_memory = WorkingMemoryFactory.make_from_pdf( - pdf_url=pdf_url, - concept_string="PDF", - name="pdf", - ) pipe_output = await execute_pipeline( pipe_code="power_extractor", - working_memory=working_memory, + input_memory={ + "document": PDFContent(url=pdf_url), + }, ) working_memory = pipe_output.working_memory markdown_and_images: TextAndImagesContent = merge_markdown_and_images(working_memory) @@ -38,7 +35,7 @@ def merge_markdown_and_images(working_memory: WorkingMemory) -> TextAndImagesCon # ... (check for length equality) # Concatenate the markdown text - concatenated_markdown_text: str = "\\n".join([page_markdown.text for page_markdown in page_markdown_list.items]) + concatenated_markdown_text: str = "\n".join([page_markdown.text for page_markdown in page_markdown_list.items]) # Aggregate the images from the page contents image_contents: List[ImageContent] = [] diff --git a/docs/pages/cookbook-examples/extract-proof-of-purchase.md b/docs/pages/cookbook-examples/extract-proof-of-purchase.md index 9cf496d27..141dbf294 100644 --- a/docs/pages/cookbook-examples/extract-proof-of-purchase.md +++ b/docs/pages/cookbook-examples/extract-proof-of-purchase.md @@ -12,14 +12,11 @@ The pipeline `power_extractor_proof_of_purchase` is specifically designed to han ```python async def extract_proof_of_purchase(pdf_url: str) -> ProofOfPurchase: - working_memory = WorkingMemoryFactory.make_from_pdf( - pdf_url=pdf_url, - concept_string="PDF", - name="pdf", - ) pipe_output = await execute_pipeline( pipe_code="power_extractor_proof_of_purchase", - working_memory=working_memory, + input_memory={ + "document": PDFContent(url=pdf_url), + }, ) working_memory = pipe_output.working_memory proof_of_purchase: ProofOfPurchase = working_memory.get_list_stuff_first_item_as(name="proof_of_purchase", item_type=ProofOfPurchase) @@ -58,8 +55,8 @@ The pipeline uses a powerful `PipeLLM` to extract the structured data from the d [pipe.write_markdown_from_page_content_proof_of_purchase] type = "PipeLLM" description = "Write markdown from page content" -inputs = { "page_content.page_view" = "Page" } # The LLM receives the image of the page -output = "ProofOfPurchase" # The LLM is forced to output a ProofOfPurchase object +inputs = { "page_content.page_view" = "Image", page_content = "Page" } +output = "ProofOfPurchase" model = "llm_for_img_to_text" structuring_method = "preliminary_text" system_prompt = """You are a multimodal LLM, expert at converting images into perfect markdown.""" diff --git a/docs/pages/cookbook-examples/extract-table.md b/docs/pages/cookbook-examples/extract-table.md index f490619ab..80791a2ad 100644 --- a/docs/pages/cookbook-examples/extract-table.md +++ b/docs/pages/cookbook-examples/extract-table.md @@ -12,14 +12,14 @@ The pipeline `extract_html_table_and_review` takes an image of a table, processe ```python async def extract_table(table_screenshot: str) -> HtmlTable: - working_memory = WorkingMemoryFactory.make_from_image( - image_url=table_screenshot, - concept_string="tables.TableScreenshot", - name="table_screenshot", - ) pipe_output = await execute_pipeline( pipe_code="extract_html_table_and_review", - working_memory=working_memory, + input_memory={ + "table_screenshot": { + "concept": "tables.TableScreenshot", + "content": ImageContent(url=table_screenshot), + } + }, ) html_table = pipe_output.main_stuff_as(content_type=HtmlTable) return html_table diff --git a/docs/pages/cookbook-examples/hello-world.md b/docs/pages/cookbook-examples/hello-world.md index 443690e5f..e9fd824b4 100644 --- a/docs/pages/cookbook-examples/hello-world.md +++ b/docs/pages/cookbook-examples/hello-world.md @@ -17,14 +17,16 @@ The `hello_world` function demonstrates the simplest possible Pipelex pipeline. ```python import asyncio -from pipelex.tools.misc.pretty import pretty_print +from pipelex import pretty_print from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline async def hello_world(): - - # Execute the pipeline + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe pipe_output = await execute_pipeline( pipe_code="hello_world", ) diff --git a/docs/pages/cookbook-examples/invoice-extractor.md b/docs/pages/cookbook-examples/invoice-extractor.md index 5f3c8d8f7..8e37231ff 100644 --- a/docs/pages/cookbook-examples/invoice-extractor.md +++ b/docs/pages/cookbook-examples/invoice-extractor.md @@ -11,17 +11,12 @@ This example provides a comprehensive pipeline for processing invoices. It takes The `process_invoice` pipeline is a complete workflow for invoice processing. ```python -async def process_expense_report() -> ListContent[Invoice]: - invoice_pdf_path = "assets/invoice_extractor/invoice_1.pdf" - - # Create Stuff objects - working_memory = WorkingMemoryFactory.make_from_pdf( - pdf_url=invoice_pdf_path, - name="invoice_pdf", - ) +async def process_invoice(pdf_url: str) -> ListContent[Invoice]: pipe_output = await execute_pipeline( pipe_code="process_invoice", - working_memory=working_memory, + input_memory={ + "document": PDFContent(url=pdf_url), + }, ) return pipe_output.main_stuff_as_list(item_type=Invoice) @@ -62,27 +57,30 @@ class Invoice(StructuredContent): The entire workflow is defined in a PLX file. This declarative approach makes the pipeline easy to understand and modify. Here's a snippet from `invoice.plx`: ```plx -# The main pipeline, a sequence of steps [pipe.process_invoice] type = "PipeSequence" description = "Process relevant information from an invoice" -inputs = { invoice_pdf = "PDF" } +inputs = { document = "PDF" } output = "Invoice" steps = [ - # First, run OCR on the PDF { pipe = "extract_text_from_image", result = "invoice_pages" }, - # Then, run the invoice extraction on each page { pipe = "extract_invoice", batch_over = "invoice_pages", batch_as = "invoice_page", result = "invoice" }, ] -# A sub-pipeline that uses an LLM to extract the data +[pipe.extract_text_from_image] +type = "PipeExtract" +description = "Extract page contents from an image" +inputs = { document = "PDF" } +output = "Page" +page_views = true +model = "base_extract_mistral" + [pipe.extract_invoice_data] type = "PipeLLM" description = "Extract invoice information from an invoice text transcript" -inputs = { "invoice_page.page_view" = "Page", invoice_details = "InvoiceDetails" } +inputs = { "invoice_page.page_view" = "Image", invoice_details = "InvoiceDetails", invoice_page = "Page" } output = "Invoice" -# The output is constrained to the "Invoice" model -model = "llm_to_extract_invoice" +model = "llm_to_extract_invoice" prompt = """ Extract invoice information from this invoice: @@ -91,7 +89,8 @@ The category of this invoice is: $invoice_details.category. @invoice_page.text_and_images.text.text """ ``` -This shows how a complex workflow, including OCR and LLM calls, can be defined in a simple, readable format. The `model = "llm_to_extract_invoice"` line is particularly powerful, as it tells the LLM to structure its output according to the `Invoice` model. + +This shows how a complex workflow, including text extraction with `PipeExtract` and LLM calls, can be defined in a simple, readable format. The `model = "llm_to_extract_invoice"` line is particularly powerful, as it tells the LLM to structure its output according to the `Invoice` model. ## The Pipeline Flowchart diff --git a/docs/pages/cookbook-examples/simple-ocr.md b/docs/pages/cookbook-examples/simple-ocr.md index f606cffb2..5ec655a87 100644 --- a/docs/pages/cookbook-examples/simple-ocr.md +++ b/docs/pages/cookbook-examples/simple-ocr.md @@ -6,25 +6,22 @@ This is a fundamental building block for many document processing workflows. ## Get the code -[**➡️ View on GitHub: examples/simple_extract.py**](https://github.com/Pipelex/pipelex-cookbook/blob/main/examples/simple_ocr.py) +[**➡️ View on GitHub: examples/_quick_start/simple_ocr.py**](https://github.com/Pipelex/pipelex-cookbook/blob/main/examples/_quick_start/simple_ocr.py) ## The Pipeline Explained -The core of this example is a simple function that creates a "working memory" from a PDF and then executes a pre-defined pipeline called `extract_page_contents_from_pdf`. +The core of this example is a simple function that executes a pre-defined pipeline called `extract_page_contents_from_pdf`. ```python -async def simple_ocr(pdf_url: str): - working_memory = WorkingMemoryFactory.make_from_pdf( - pdf_url=pdf_url, - concept_string="PDF", - name="pdf", - ) +async def simple_ocr(pdf_url: str) -> ListContent[PageContent]: pipe_output = await execute_pipeline( pipe_code="extract_page_contents_from_pdf", - working_memory=working_memory, + input_memory={ + "document": PDFContent(url=pdf_url), + }, ) page_content_list: ListContent[PageContent] = pipe_output.main_stuff_as_list(item_type=PageContent) return page_content_list ``` -This showcases how easy it is to kick off a complex process with just a few lines of code. \ No newline at end of file +This showcases how easy it is to kick off a complex process with just a few lines of code. The `input_memory` dictionary simply maps the input name to the PDF content, and the pipeline handles the rest. \ No newline at end of file diff --git a/docs/pages/cookbook-examples/write-screenplay.md b/docs/pages/cookbook-examples/write-screenplay.md index 0f1ef8b19..b9c809949 100644 --- a/docs/pages/cookbook-examples/write-screenplay.md +++ b/docs/pages/cookbook-examples/write-screenplay.md @@ -8,26 +8,20 @@ This example demonstrates how to use Pipelex for creative text generation. It ta ## The Pipeline Explained -The `generate_screenplay` function takes a pitch as a string, creates a `Stuff` object with the `screenplay.Pitch` concept, and then runs the `generate_screenplay` pipeline. +The `generate_screenplay` function takes a pitch as a string and executes the `generate_screenplay` pipeline, passing the pitch through the `input_memory` dictionary with the concept specification. ```python async def generate_screenplay(pitch: str): """Generate a screenplay from a pitch using the pipeline.""" - # Create Stuff object for the pitch - pitch_stuff = StuffFactory.make_from_concept_string( - concept_string="screenplay.Pitch", - content=TextContent(text=pitch), - name="pitch", - ) - - # Create Working Memory - working_memory = WorkingMemoryFactory.make_from_single_stuff(pitch_stuff) - - # Run the pipe pipe_output = await execute_pipeline( pipe_code="generate_screenplay", - working_memory=working_memory, + input_memory={ + "pitch": { + "concept": "screenplay.Pitch", + "content": pitch, + } + }, ) pretty_print(pipe_output, title="Pipe Output") ``` diff --git a/docs/pages/cookbook-examples/write-tweet.md b/docs/pages/cookbook-examples/write-tweet.md index 4e8bdd6d5..f91d853f3 100644 --- a/docs/pages/cookbook-examples/write-tweet.md +++ b/docs/pages/cookbook-examples/write-tweet.md @@ -8,38 +8,26 @@ This example demonstrates how to create a pipeline that takes a draft of a tweet ## The Pipeline Explained -The `optimize_tweet` function is the core of this example. It takes two strings, `draft_tweet_str` and `writing_style_str`, creates two `Stuff` objects with the concepts `tech_tweet.DraftTweet` and `tech_tweet.WritingStyle`, and then runs the `optimize_tweet_sequence` pipeline. +The `optimize_tweet` function is the core of this example. It takes two strings, `draft_tweet` and `writing_style`, and executes the `optimize_tweet_sequence` pipeline, passing both inputs through the `input_memory` dictionary with their respective concept specifications. ```python -async def optimize_tweet(draft_tweet_str: str, writing_style_str: str) -> OptimizedTweet: - # Create the draft tweet stuff - draft_tweet = StuffFactory.make_from_concept_string( - concept_string="tech_tweet.DraftTweet", - content=TextContent(text=draft_tweet_str), - name="draft_tweet", - ) - writing_style = StuffFactory.make_from_concept_string( - concept_string="tech_tweet.WritingStyle", - content=TextContent(text=writing_style_str), - name="writing_style", - ) - - # Create working memory - working_memory = WorkingMemoryFactory.make_from_multiple_stuffs( - [ - draft_tweet, - writing_style, - ] - ) - - # Run the sequence pipe +async def optimize_tweet(draft_tweet: str, writing_style: str) -> str: pipe_output = await execute_pipeline( pipe_code="optimize_tweet_sequence", - working_memory=working_memory, + input_memory={ + "draft_tweet": { + "concept": "tech_tweet.DraftTweet", + "content": draft_tweet, + }, + "writing_style": { + "concept": "tech_tweet.WritingStyle", + "content": writing_style, + }, + }, ) # Get the optimized tweet - optimized_tweet = pipe_output.main_stuff_as(content_type=OptimizedTweet) + optimized_tweet = pipe_output.main_stuff_as_str return optimized_tweet ``` From 70c1f0eb22ae468c2353194681c396e3d0f52a6f Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 03:26:24 +0200 Subject: [PATCH 068/115] Rules --- .cursor/rules/docs.mdc | 14 + .cursor/rules/llms.mdc | 85 +++ .cursor/rules/pytest_standards.mdc | 164 ++++ .cursor/rules/python_standards.mdc | 143 ++++ .cursor/rules/run_pipelex.mdc | 231 ++++++ .cursor/rules/tdd.mdc | 28 + .cursor/rules/write_pipelex.mdc | 837 ++++++++++++++++++++ .github/copilot-instructions.md | 1135 ++++++++++++++++++++++++++++ .windsurfrules.md | 1135 ++++++++++++++++++++++++++++ AGENTS.md | 1135 ++++++++++++++++++++++++++++ BLACKBOX_RULES.md | 1135 ++++++++++++++++++++++++++++ CLAUDE.md | 1135 ++++++++++++++++++++++++++++ 12 files changed, 7177 insertions(+) create mode 100644 .cursor/rules/docs.mdc create mode 100644 .cursor/rules/llms.mdc create mode 100644 .cursor/rules/pytest_standards.mdc create mode 100644 .cursor/rules/python_standards.mdc create mode 100644 .cursor/rules/run_pipelex.mdc create mode 100644 .cursor/rules/tdd.mdc create mode 100644 .cursor/rules/write_pipelex.mdc create mode 100644 .github/copilot-instructions.md create mode 100644 .windsurfrules.md create mode 100644 AGENTS.md create mode 100644 BLACKBOX_RULES.md create mode 100644 CLAUDE.md diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc new file mode 100644 index 000000000..1400c5cd6 --- /dev/null +++ b/.cursor/rules/docs.mdc @@ -0,0 +1,14 @@ +--- +alwaysApply: false +description: Guidelines for writing documentation +globs: +- docs/**/*.md +--- +Write docs and answer questions about writing docs. + +We use Material for MkDocs. All markdown in our docs must be compatible with Material for MkDocs and done using best practices to get the best results with Material for MkDocs. + +## MkDocs Markdown Requirements + +- Always add a blank line before any bullet lists or numbered lists in MkDocs markdown. + diff --git a/.cursor/rules/llms.mdc b/.cursor/rules/llms.mdc new file mode 100644 index 000000000..a21831a30 --- /dev/null +++ b/.cursor/rules/llms.mdc @@ -0,0 +1,85 @@ +--- +alwaysApply: false +description: LLM configuration and usage guidelines +globs: +- '*.plx' +- '*.toml' +--- +# Rules to choose LLM models used in PipeLLMs. + +## LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +## LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +## Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +## LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc new file mode 100644 index 000000000..13265cb95 --- /dev/null +++ b/.cursor/rules/pytest_standards.mdc @@ -0,0 +1,164 @@ +--- +alwaysApply: false +description: Guidelines for writing unit tests +globs: +- tests/**/*.py +--- +# Writing unit tests + +## Unit test generalities + +NEVER USE unittest.mock or MagicMock. YOU MUST USE pytest-mock instead. + +### Test file structure + +- Name test files with `test_` prefix +- Use descriptive names that match the functionality being tested +- Place test files in the appropriate test category directory: + - `tests/unit/` - for unit tests that test individual functions/classes in isolation + - `tests/integration/` - for integration tests that test component interactions + - `tests/e2e/` - for end-to-end tests that test complete workflows + - `tests/test_pipelines/` - for test pipeline definitions (PLX files and their structuring python files) +- Fixtures are defined in conftest.py modules at different levels of the hierarchy, their scope is handled by pytest +- Test data is placed inside test_data.py at different levels of the hierarchy, they must be imported with package paths from the root like `tests.pipelex.test_data`. Their content is all constants, regrouped inside classes to keep things tidy. +- Always put test inside Test classes. +- The pipelex pipelines should be stored in `tests/test_pipelines` as well as the related structured Output classes that inherit from `StructuredContent` + +### Markers + +Apply the appropriate markers: +- "llm: uses an LLM to generate text or objects" +- "img_gen: uses an image generation AI" +- "extract: uses text/image extraction from documents" +- "inference: uses either an LLM or an image generation AI" +- "gha_disabled: will not be able to run properly on GitHub Actions" + +Several markers may be applied. For instance, if the test uses an LLM, then it uses inference, so you must mark with both `inference`and `llm`. + +### Important rules + +- Never use the unittest.mock. Use pytest-mock. + +### Test Class Structure + +Always group the tests of a module into a test class: + +```python +@pytest.mark.llm +@pytest.mark.inference +@pytest.mark.asyncio(loop_scope="class") +class TestFooBar: + @pytest.mark.parametrize( + "topic test_case_blueprint", + [ + TestCases.CASE_1, + TestCases.CASE_2, + ], + ) + async def test_pipe_processing( + self, + request: FixtureRequest, + topic: str, + test_case_blueprint: StuffBlueprint, + ): + # Test implementation +``` + +Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. + +## Writing integration test to test pipes + +### Required imports for pipe tests + +```python +import pytest +from pytest import FixtureRequest +from pipelex import log, pretty_print +from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory +from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory +from pipelex.hub import get_report_delegate +from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt +from pipelex.config_pipelex import get_config + +from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe +from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.core.pipes.pipe_run_params import PipeRunParams +from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol +``` + +### Pipe test implementation steps + +1. Create Stuff from blueprint: + +```python +stuff = StuffFactory.make_stuff( + concept_code="RetrievedExcerpt", + domain="retrieve", + content=RetrievedExcerpt(text="", justification="") + name="retrieved_text", +) +``` + +2. Create Working Memory: + +```python +working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) +``` + +3. Run the pipe: + +```python +pipe_output = await pipe_router.run_pipe( + pipe_code="pipe_name", + pipe_run_params=PipeRunParamsFactory.make_run_params(), + working_memory=working_memory, + job_metadata=JobMetadata(), +) +``` + +4. Basic assertions: + +```python +assert pipe_output is not None +assert pipe_output.working_memory is not None +assert pipe_output.main_stuff is not None +``` + +### Test Data Organization + +- If it's not already there, create a `test_data.py` file in the test directory +- Define test cases using `StuffBlueprint`: + +```python +class TestCases: + CASE_BLUEPRINT_1 = StuffBlueprint( + name="test_case_1", + concept_code="domain.ConceptName1", + value="test_value" + ) + CASE_BLUEPRINT_2 = StuffBlueprint( + name="test_case_2", + concept_code="domain.ConceptName2", + value="test_value" + ) + + CASE_BLUEPRINTS: ClassVar[list[tuple[str, str]]] = [ # topic, blueprint" + ("topic1", CASE_BLUEPRINT_1), + ("topic2", CASE_BLUEPRINT_2), + ] +``` + +Note how we avoid initializing a default mutable value within a class instance, instead we use ClassVar. +Also note that we provide a topic for the test case, which is purely for convenience. + +## Best Practices for Testing + +- Use parametrize for multiple test cases +- Test both success and failure cases +- Verify working memory state +- Check output structure and content +- Use meaningful test case names +- Include docstrings explaining test purpose +- Log outputs for debugging +- Generate reports for cost tracking diff --git a/.cursor/rules/python_standards.mdc b/.cursor/rules/python_standards.mdc new file mode 100644 index 000000000..150864d1e --- /dev/null +++ b/.cursor/rules/python_standards.mdc @@ -0,0 +1,143 @@ +--- +alwaysApply: false +description: Python coding standards and best practices +globs: +- '**/*.py' +--- +# Coding Standards & Best Practices for Python Code + +This document outlines the core coding standards, best practices, and quality control procedures for the codebase. + +## Type Hints + +1. **Always Use Type Hints** + + - Every function parameter must be typed + - Every function return must be typed + - Use type hints for all variables where type is not obvious + - Use dict, list, tuple types with lowercase first letter: dict[], list[], tuple[] + - Use type hints for all fields + - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals (not `Optional[]`) + - Use Field(default_factory=...) for mutable defaults + +2. **BaseModel / Pydantic Standards** + + - Use `BaseModel` and respect Pydantic v2 standards + - Use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` + - Keep models focused and single-purpose + - For list fields with non-string items in BaseModels, use `empty_list_factory_of()` to avoid linter complaints: + ```python + from pydantic import BaseModel, Field + from pipelex.tools.typing.pydantic_utils import empty_list_factory_of + + class MyModel(BaseModel): + names: list[str] = Field(default_factory=list) # OK for strings + numbers: list[int] = Field(default_factory=empty_list_factory_of(int), description="A list of numbers") + items: list[MyItem] = Field(default_factory=empty_list_factory_of(MyItem), description="A list of items") + ``` + +3. **StrEnum** + - Import from `pipelex.types`: + ```python + from pipelex.types import StrEnum + ``` + +4. **Self type** + - Import from `pipelex.types`: + ```python + from pipelex.types import Self + ``` + +## Factory Pattern + + - Use Factory Pattern for object creation when dealing with multiple implementations + - Our factory methods are named `make_from_...` and such + +## Error Handling + + - Always catch exceptions at the place where you can add useful context to it. + - Use try/except blocks with specific exceptions + - Convert third-party exceptions to our custom ones + - Never catch Exception, only catch specific exceptions + - Always add `from exc` to the exception + + ```python + try: + self.models_manager.setup() + except RoutingProfileLibraryNotFoundError as exc: + msg = "The routing library could not be found, please call `pipelex init config` to create it" + raise PipelexSetupError(msg) from exc + ``` + + **Note**: Following Ruff rules, we set the error message as a variable before raising it, for cleaner error traces. + +## Documentation + +1. **Docstring Format** + ```python + def process_image(image_path: str, size: tuple[int, int]) -> bytes: + """Process and resize an image. + + Args: + image_path: Path to the source image + size: Tuple of (width, height) for resizing + + Returns: + Processed image as bytes + """ + pass + ``` + +2. **Class Documentation** + ```python + class ImageProcessor: + """Handles image processing operations. + + Provides methods for resizing, converting, and optimizing images. + """ + ``` + +## Code Quality Checks + +### Linting and Type Checking + +Before finalizing a task, run: +```bash +make fix-unused-imports +make check +``` + +This runs multiple code quality tools: +- Pyright: Static type checking +- Ruff: Fast Python linter +- Mypy: Static type checker + +Always fix any issues reported by these tools before proceeding. + +### Running Tests + +1. **Quick Test Run** (no LLM/image generation): + ```bash + make tp + ``` + Runs tests with markers: `(dry_runnable or not (inference or llm or img_gen or extract)) and not (needs_output or pipelex_api)` + +2. **Specific Tests**: + ```bash + make tp TEST=TestClassName + # or + make tp TEST=test_function_name + ``` + Note: Matches names starting with the provided string. + +**Important**: Never run `make ti`, `make test-inference`, `make te`, `make test-extract`, `make tg`, or `make test-img-gen` - these use costly inference. + +## Pipelines + +- Always validate pipelines after creation/edit with `make validate`. + Iterate if there are errors. + +## Project Structure + +- **Tests**: `tests/` directory +- **Documentation**: `docs/` directory diff --git a/.cursor/rules/run_pipelex.mdc b/.cursor/rules/run_pipelex.mdc new file mode 100644 index 000000000..11e6c668f --- /dev/null +++ b/.cursor/rules/run_pipelex.mdc @@ -0,0 +1,231 @@ +--- +alwaysApply: false +description: Guidelines for running Pipelex pipelines +globs: +- examples/**/*.py +--- +# Guide to execute a pipeline and write example code + +## Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +# start Pipelex +Pipelex.make() +# run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +## Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent + +from my_project.gantt.gantt_struct import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +# start Pipelex +Pipelex.make() + +# run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +## Setting up the input memory + +### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str +ImplicitMemory = dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +# Here we have a single input and it's a Text. +# If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +# Here we have a single input and it's a PDF. +# Because PDFContent is a native concept, we can use it directly as a value, +# the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +# Here we have a single input and it's an Image. +# Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +# Here we have a single input, it's an image but +# its actually a more specific concept gantt.GanttImage which refines Image, +# so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +# Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +## Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + diff --git a/.cursor/rules/tdd.mdc b/.cursor/rules/tdd.mdc new file mode 100644 index 000000000..4b4f058b5 --- /dev/null +++ b/.cursor/rules/tdd.mdc @@ -0,0 +1,28 @@ +--- +alwaysApply: false +description: Guidelines for writing test-driven development code +--- +# Test-Driven Development Guide + +This document outlines our test-driven development (TDD) process and the tools available for testing. + +## TDD Cycle + +1. **Write a Test First** +[pytest.mdc](pytest.mdc) + +2. **Write the Code** + - Implement the minimum amount of code needed to pass the test + - Follow the project's coding standards + - Keep it simple - don't write more than needed + +3. **Run Linting and Type Checking** +[coding_standards.mdc](coding_standards.mdc) + +4. **Refactor if needed** +If the code needs refactoring, with the best practices [coding_standards.mdc](coding_standards.mdc) + +5. **Validate tests** + +Remember: The key to TDD is writing the test first and letting it drive your implementation. Always run the full test suite and quality checks before considering a feature complete. + diff --git a/.cursor/rules/write_pipelex.mdc b/.cursor/rules/write_pipelex.mdc new file mode 100644 index 000000000..fb1b64cfd --- /dev/null +++ b/.cursor/rules/write_pipelex.mdc @@ -0,0 +1,837 @@ +--- +alwaysApply: false +description: Guidelines for writing Pipelex pipelines +globs: +- '**/*.plx' +- '**/pipelines/**/*.py' +--- +# Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +## Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for code defining the data structures +- Use descriptive names in `snake_case` + +## Pipeline File Outline +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +### Concept Definitions + +Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. + +```plx +[concept] +ConceptName = "Description of the concept" +``` + +**Naming Rules:** +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex +- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page) + +**Native Concepts:** +Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`. Use these directly or refine them when appropriate. + +**Refining Native Concepts:** +To create a concept that specializes a native concept without adding fields: + +```plx +[concept.Landscape] +description = "A scenic outdoor photograph" +refines = "Image" +``` + +For details on how to structure concepts with fields, see the "Structuring Models" section below. + +### Pipe Definitions + +## Pipe Base Definition + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +The pipes will all have at least this base definition. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +## Structuring Models + +Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. + +### Three Ways to Structure Concepts + +**1. No Structure Needed** + +If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. + +**2. Inline Structure Definition (RECOMMENDED for most cases)** + +For concepts with structured fields, define them inline using TOML syntax: + +```plx +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items", required = false } +``` + +**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` + +**Field properties:** `type`, `description`, `required` (default: true), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) + +**Simple syntax** (creates required text field): +```plx +field_name = "Field description" +``` + +**Detailed syntax** (with explicit properties): +```plx +field_name = { type = "text", description = "Field description", required = false, default_value = "default" } +``` + +**3. Python StructuredContent Class (For Advanced Features)** + +Create a Python class when you need: +- Custom validation logic (@field_validator, @model_validator) +- Computed properties (@property methods) +- Custom methods or class methods +- Complex cross-field validation +- Reusable structures across multiple domains + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + invoice_number: str = Field(description="The unique invoice identifier") + total_amount: float = Field(ge=0, description="The total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. + +### Decision Rules for Agents + +**If concept already exists:** +- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class +- If it's already a Python class → KEEP IT as Python class + +**If creating new concept:** +1. Does it only refine a native concept without adding fields? → Use concept-only declaration +2. Does it need custom validation, computed properties, or methods? → Use Python class +3. Otherwise → Use inline structure (fastest and simplest) + +**When to suggest conversion to Python class:** +- User needs validation logic beyond type checking +- User needs computed properties or custom methods +- Structure needs to be reused across multiple domains +- Complex type relationships or inheritance required + +### Inline Structure Limitations + +Inline structures: +- ✅ Support all common field types (text, number, date, list, dict, etc.) +- ✅ Support required/optional fields, defaults, choices +- ✅ Generate full Pydantic models with validation +- ❌ Cannot have custom validators or complex validation logic +- ❌ Cannot have computed properties or custom methods +- ❌ Cannot refine custom (non-native) concepts +- ❌ Limited IDE autocomplete compared to explicit Python classes + + +## Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +## PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +### Basic Definition +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +## PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +## PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +## PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: TextContent | None + images: list[ImageContent] | None + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: ImageContent | None = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +## PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +# Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +## PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +## PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000..0ef0bbe83 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,1135 @@ + +## Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +### Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for code defining the data structures +- Use descriptive names in `snake_case` + +### Pipeline File Outline +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +#### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +#### Concept Definitions + +Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. + +```plx +[concept] +ConceptName = "Description of the concept" +``` + +**Naming Rules:** +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex +- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page) + +**Native Concepts:** +Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`. Use these directly or refine them when appropriate. + +**Refining Native Concepts:** +To create a concept that specializes a native concept without adding fields: + +```plx +[concept.Landscape] +description = "A scenic outdoor photograph" +refines = "Image" +``` + +For details on how to structure concepts with fields, see the "Structuring Models" section below. + +#### Pipe Definitions + +### Pipe Base Definition + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +The pipes will all have at least this base definition. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +### Structuring Models + +Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. + +#### Three Ways to Structure Concepts + +**1. No Structure Needed** + +If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. + +**2. Inline Structure Definition (RECOMMENDED for most cases)** + +For concepts with structured fields, define them inline using TOML syntax: + +```plx +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items", required = false } +``` + +**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` + +**Field properties:** `type`, `description`, `required` (default: true), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) + +**Simple syntax** (creates required text field): +```plx +field_name = "Field description" +``` + +**Detailed syntax** (with explicit properties): +```plx +field_name = { type = "text", description = "Field description", required = false, default_value = "default" } +``` + +**3. Python StructuredContent Class (For Advanced Features)** + +Create a Python class when you need: +- Custom validation logic (@field_validator, @model_validator) +- Computed properties (@property methods) +- Custom methods or class methods +- Complex cross-field validation +- Reusable structures across multiple domains + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + invoice_number: str = Field(description="The unique invoice identifier") + total_amount: float = Field(ge=0, description="The total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. + +#### Decision Rules for Agents + +**If concept already exists:** +- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class +- If it's already a Python class → KEEP IT as Python class + +**If creating new concept:** +1. Does it only refine a native concept without adding fields? → Use concept-only declaration +2. Does it need custom validation, computed properties, or methods? → Use Python class +3. Otherwise → Use inline structure (fastest and simplest) + +**When to suggest conversion to Python class:** +- User needs validation logic beyond type checking +- User needs computed properties or custom methods +- Structure needs to be reused across multiple domains +- Complex type relationships or inheritance required + +#### Inline Structure Limitations + +Inline structures: +- ✅ Support all common field types (text, number, date, list, dict, etc.) +- ✅ Support required/optional fields, defaults, choices +- ✅ Generate full Pydantic models with validation +- ❌ Cannot have custom validators or complex validation logic +- ❌ Cannot have computed properties or custom methods +- ❌ Cannot refine custom (non-native) concepts +- ❌ Limited IDE autocomplete compared to explicit Python classes + + +### Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +### PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +#### Basic Definition +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +#### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +#### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +### PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +#### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +#### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +#### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +#### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +#### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +### PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +#### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: TextContent | None + images: list[ImageContent] | None + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: ImageContent | None = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +### PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +#### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +## Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +#### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +### PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +#### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +#### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +#### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +### PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +#### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +#### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +#### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +#### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +#### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +### Rules to choose LLM models used in PipeLLMs. + +#### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +#### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +#### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +#### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +## Guide to execute a pipeline and write example code + +### Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +## start Pipelex +Pipelex.make() +## run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +### Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent + +from my_project.gantt.gantt_struct import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +## start Pipelex +Pipelex.make() + +## run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +### Setting up the input memory + +#### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str +ImplicitMemory = dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +#### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +## Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +### Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +#### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/.windsurfrules.md b/.windsurfrules.md new file mode 100644 index 000000000..0ef0bbe83 --- /dev/null +++ b/.windsurfrules.md @@ -0,0 +1,1135 @@ + +## Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +### Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for code defining the data structures +- Use descriptive names in `snake_case` + +### Pipeline File Outline +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +#### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +#### Concept Definitions + +Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. + +```plx +[concept] +ConceptName = "Description of the concept" +``` + +**Naming Rules:** +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex +- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page) + +**Native Concepts:** +Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`. Use these directly or refine them when appropriate. + +**Refining Native Concepts:** +To create a concept that specializes a native concept without adding fields: + +```plx +[concept.Landscape] +description = "A scenic outdoor photograph" +refines = "Image" +``` + +For details on how to structure concepts with fields, see the "Structuring Models" section below. + +#### Pipe Definitions + +### Pipe Base Definition + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +The pipes will all have at least this base definition. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +### Structuring Models + +Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. + +#### Three Ways to Structure Concepts + +**1. No Structure Needed** + +If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. + +**2. Inline Structure Definition (RECOMMENDED for most cases)** + +For concepts with structured fields, define them inline using TOML syntax: + +```plx +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items", required = false } +``` + +**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` + +**Field properties:** `type`, `description`, `required` (default: true), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) + +**Simple syntax** (creates required text field): +```plx +field_name = "Field description" +``` + +**Detailed syntax** (with explicit properties): +```plx +field_name = { type = "text", description = "Field description", required = false, default_value = "default" } +``` + +**3. Python StructuredContent Class (For Advanced Features)** + +Create a Python class when you need: +- Custom validation logic (@field_validator, @model_validator) +- Computed properties (@property methods) +- Custom methods or class methods +- Complex cross-field validation +- Reusable structures across multiple domains + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + invoice_number: str = Field(description="The unique invoice identifier") + total_amount: float = Field(ge=0, description="The total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. + +#### Decision Rules for Agents + +**If concept already exists:** +- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class +- If it's already a Python class → KEEP IT as Python class + +**If creating new concept:** +1. Does it only refine a native concept without adding fields? → Use concept-only declaration +2. Does it need custom validation, computed properties, or methods? → Use Python class +3. Otherwise → Use inline structure (fastest and simplest) + +**When to suggest conversion to Python class:** +- User needs validation logic beyond type checking +- User needs computed properties or custom methods +- Structure needs to be reused across multiple domains +- Complex type relationships or inheritance required + +#### Inline Structure Limitations + +Inline structures: +- ✅ Support all common field types (text, number, date, list, dict, etc.) +- ✅ Support required/optional fields, defaults, choices +- ✅ Generate full Pydantic models with validation +- ❌ Cannot have custom validators or complex validation logic +- ❌ Cannot have computed properties or custom methods +- ❌ Cannot refine custom (non-native) concepts +- ❌ Limited IDE autocomplete compared to explicit Python classes + + +### Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +### PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +#### Basic Definition +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +#### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +#### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +### PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +#### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +#### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +#### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +#### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +#### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +### PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +#### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: TextContent | None + images: list[ImageContent] | None + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: ImageContent | None = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +### PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +#### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +## Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +#### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +### PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +#### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +#### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +#### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +### PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +#### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +#### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +#### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +#### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +#### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +### Rules to choose LLM models used in PipeLLMs. + +#### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +#### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +#### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +#### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +## Guide to execute a pipeline and write example code + +### Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +## start Pipelex +Pipelex.make() +## run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +### Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent + +from my_project.gantt.gantt_struct import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +## start Pipelex +Pipelex.make() + +## run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +### Setting up the input memory + +#### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str +ImplicitMemory = dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +#### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +## Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +### Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +#### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..0ef0bbe83 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,1135 @@ + +## Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +### Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for code defining the data structures +- Use descriptive names in `snake_case` + +### Pipeline File Outline +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +#### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +#### Concept Definitions + +Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. + +```plx +[concept] +ConceptName = "Description of the concept" +``` + +**Naming Rules:** +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex +- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page) + +**Native Concepts:** +Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`. Use these directly or refine them when appropriate. + +**Refining Native Concepts:** +To create a concept that specializes a native concept without adding fields: + +```plx +[concept.Landscape] +description = "A scenic outdoor photograph" +refines = "Image" +``` + +For details on how to structure concepts with fields, see the "Structuring Models" section below. + +#### Pipe Definitions + +### Pipe Base Definition + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +The pipes will all have at least this base definition. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +### Structuring Models + +Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. + +#### Three Ways to Structure Concepts + +**1. No Structure Needed** + +If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. + +**2. Inline Structure Definition (RECOMMENDED for most cases)** + +For concepts with structured fields, define them inline using TOML syntax: + +```plx +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items", required = false } +``` + +**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` + +**Field properties:** `type`, `description`, `required` (default: true), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) + +**Simple syntax** (creates required text field): +```plx +field_name = "Field description" +``` + +**Detailed syntax** (with explicit properties): +```plx +field_name = { type = "text", description = "Field description", required = false, default_value = "default" } +``` + +**3. Python StructuredContent Class (For Advanced Features)** + +Create a Python class when you need: +- Custom validation logic (@field_validator, @model_validator) +- Computed properties (@property methods) +- Custom methods or class methods +- Complex cross-field validation +- Reusable structures across multiple domains + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + invoice_number: str = Field(description="The unique invoice identifier") + total_amount: float = Field(ge=0, description="The total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. + +#### Decision Rules for Agents + +**If concept already exists:** +- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class +- If it's already a Python class → KEEP IT as Python class + +**If creating new concept:** +1. Does it only refine a native concept without adding fields? → Use concept-only declaration +2. Does it need custom validation, computed properties, or methods? → Use Python class +3. Otherwise → Use inline structure (fastest and simplest) + +**When to suggest conversion to Python class:** +- User needs validation logic beyond type checking +- User needs computed properties or custom methods +- Structure needs to be reused across multiple domains +- Complex type relationships or inheritance required + +#### Inline Structure Limitations + +Inline structures: +- ✅ Support all common field types (text, number, date, list, dict, etc.) +- ✅ Support required/optional fields, defaults, choices +- ✅ Generate full Pydantic models with validation +- ❌ Cannot have custom validators or complex validation logic +- ❌ Cannot have computed properties or custom methods +- ❌ Cannot refine custom (non-native) concepts +- ❌ Limited IDE autocomplete compared to explicit Python classes + + +### Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +### PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +#### Basic Definition +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +#### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +#### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +### PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +#### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +#### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +#### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +#### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +#### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +### PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +#### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: TextContent | None + images: list[ImageContent] | None + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: ImageContent | None = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +### PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +#### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +## Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +#### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +### PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +#### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +#### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +#### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +### PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +#### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +#### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +#### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +#### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +#### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +### Rules to choose LLM models used in PipeLLMs. + +#### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +#### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +#### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +#### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +## Guide to execute a pipeline and write example code + +### Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +## start Pipelex +Pipelex.make() +## run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +### Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent + +from my_project.gantt.gantt_struct import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +## start Pipelex +Pipelex.make() + +## run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +### Setting up the input memory + +#### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str +ImplicitMemory = dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +#### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +## Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +### Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +#### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/BLACKBOX_RULES.md b/BLACKBOX_RULES.md new file mode 100644 index 000000000..0ef0bbe83 --- /dev/null +++ b/BLACKBOX_RULES.md @@ -0,0 +1,1135 @@ + +## Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +### Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for code defining the data structures +- Use descriptive names in `snake_case` + +### Pipeline File Outline +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +#### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +#### Concept Definitions + +Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. + +```plx +[concept] +ConceptName = "Description of the concept" +``` + +**Naming Rules:** +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex +- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page) + +**Native Concepts:** +Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`. Use these directly or refine them when appropriate. + +**Refining Native Concepts:** +To create a concept that specializes a native concept without adding fields: + +```plx +[concept.Landscape] +description = "A scenic outdoor photograph" +refines = "Image" +``` + +For details on how to structure concepts with fields, see the "Structuring Models" section below. + +#### Pipe Definitions + +### Pipe Base Definition + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +The pipes will all have at least this base definition. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +### Structuring Models + +Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. + +#### Three Ways to Structure Concepts + +**1. No Structure Needed** + +If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. + +**2. Inline Structure Definition (RECOMMENDED for most cases)** + +For concepts with structured fields, define them inline using TOML syntax: + +```plx +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items", required = false } +``` + +**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` + +**Field properties:** `type`, `description`, `required` (default: true), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) + +**Simple syntax** (creates required text field): +```plx +field_name = "Field description" +``` + +**Detailed syntax** (with explicit properties): +```plx +field_name = { type = "text", description = "Field description", required = false, default_value = "default" } +``` + +**3. Python StructuredContent Class (For Advanced Features)** + +Create a Python class when you need: +- Custom validation logic (@field_validator, @model_validator) +- Computed properties (@property methods) +- Custom methods or class methods +- Complex cross-field validation +- Reusable structures across multiple domains + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + invoice_number: str = Field(description="The unique invoice identifier") + total_amount: float = Field(ge=0, description="The total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. + +#### Decision Rules for Agents + +**If concept already exists:** +- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class +- If it's already a Python class → KEEP IT as Python class + +**If creating new concept:** +1. Does it only refine a native concept without adding fields? → Use concept-only declaration +2. Does it need custom validation, computed properties, or methods? → Use Python class +3. Otherwise → Use inline structure (fastest and simplest) + +**When to suggest conversion to Python class:** +- User needs validation logic beyond type checking +- User needs computed properties or custom methods +- Structure needs to be reused across multiple domains +- Complex type relationships or inheritance required + +#### Inline Structure Limitations + +Inline structures: +- ✅ Support all common field types (text, number, date, list, dict, etc.) +- ✅ Support required/optional fields, defaults, choices +- ✅ Generate full Pydantic models with validation +- ❌ Cannot have custom validators or complex validation logic +- ❌ Cannot have computed properties or custom methods +- ❌ Cannot refine custom (non-native) concepts +- ❌ Limited IDE autocomplete compared to explicit Python classes + + +### Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +### PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +#### Basic Definition +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +#### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +#### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +### PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +#### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +#### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +#### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +#### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +#### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +### PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +#### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: TextContent | None + images: list[ImageContent] | None + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: ImageContent | None = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +### PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +#### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +## Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +#### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +### PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +#### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +#### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +#### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +### PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +#### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +#### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +#### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +#### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +#### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +### Rules to choose LLM models used in PipeLLMs. + +#### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +#### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +#### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +#### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +## Guide to execute a pipeline and write example code + +### Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +## start Pipelex +Pipelex.make() +## run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +### Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent + +from my_project.gantt.gantt_struct import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +## start Pipelex +Pipelex.make() + +## run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +### Setting up the input memory + +#### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str +ImplicitMemory = dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +#### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +## Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +### Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +#### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..0ef0bbe83 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,1135 @@ + +## Guide to write or edit pipelines using the Pipelex language in .plx files + +- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. +- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) + +### Pipeline File Naming +- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.py` for code defining the data structures +- Use descriptive names in `snake_case` + +### Pipeline File Outline +A pipeline file has three main sections: +1. Domain statement +2. Concept definitions +3. Pipe definitions + +#### Domain Statement +```plx +domain = "domain_name" +description = "Description of the domain" # Optional +``` +Note: The domain name usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. + +#### Concept Definitions + +Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. + +```plx +[concept] +ConceptName = "Description of the concept" +``` + +**Naming Rules:** +- Use PascalCase for concept names +- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex +- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents +- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page) + +**Native Concepts:** +Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`. Use these directly or refine them when appropriate. + +**Refining Native Concepts:** +To create a concept that specializes a native concept without adding fields: + +```plx +[concept.Landscape] +description = "A scenic outdoor photograph" +refines = "Image" +``` + +For details on how to structure concepts with fields, see the "Structuring Models" section below. + +#### Pipe Definitions + +### Pipe Base Definition + +```plx +[pipe.your_pipe_name] +type = "PipeLLM" +description = "A description of what your pipe does" +inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } +output = "ConceptName" +``` + +The pipes will all have at least this base definition. +- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +So If you have this error: +`StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • +variable='['invoice']'`` +That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. + +NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: + +```plx +inputs = { + input_1 = "ConceptName1", + input_2 = "ConceptName2" +} +``` + + +- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: + +### Structuring Models + +Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. + +#### Three Ways to Structure Concepts + +**1. No Structure Needed** + +If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. + +**2. Inline Structure Definition (RECOMMENDED for most cases)** + +For concepts with structured fields, define them inline using TOML syntax: + +```plx +[concept.Invoice] +description = "A commercial document issued by a seller to a buyer" + +[concept.Invoice.structure] +invoice_number = "The unique invoice identifier" +issue_date = { type = "date", description = "The date the invoice was issued", required = true } +total_amount = { type = "number", description = "The total invoice amount", required = true } +vendor_name = "The name of the vendor" +line_items = { type = "list", item_type = "text", description = "List of items", required = false } +``` + +**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` + +**Field properties:** `type`, `description`, `required` (default: true), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) + +**Simple syntax** (creates required text field): +```plx +field_name = "Field description" +``` + +**Detailed syntax** (with explicit properties): +```plx +field_name = { type = "text", description = "Field description", required = false, default_value = "default" } +``` + +**3. Python StructuredContent Class (For Advanced Features)** + +Create a Python class when you need: +- Custom validation logic (@field_validator, @model_validator) +- Computed properties (@property methods) +- Custom methods or class methods +- Complex cross-field validation +- Reusable structures across multiple domains + +```python +from pipelex.core.stuffs.structured_content import StructuredContent +from pydantic import Field, field_validator + +class Invoice(StructuredContent): + """A commercial invoice with validation.""" + + invoice_number: str = Field(description="The unique invoice identifier") + total_amount: float = Field(ge=0, description="The total invoice amount") + tax_amount: float = Field(ge=0, description="Tax amount") + + @field_validator('tax_amount') + @classmethod + def validate_tax(cls, v, info): + """Ensure tax doesn't exceed total.""" + total = info.data.get('total_amount', 0) + if v > total: + raise ValueError('Tax amount cannot exceed total amount') + return v +``` + +**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. + +#### Decision Rules for Agents + +**If concept already exists:** +- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class +- If it's already a Python class → KEEP IT as Python class + +**If creating new concept:** +1. Does it only refine a native concept without adding fields? → Use concept-only declaration +2. Does it need custom validation, computed properties, or methods? → Use Python class +3. Otherwise → Use inline structure (fastest and simplest) + +**When to suggest conversion to Python class:** +- User needs validation logic beyond type checking +- User needs computed properties or custom methods +- Structure needs to be reused across multiple domains +- Complex type relationships or inheritance required + +#### Inline Structure Limitations + +Inline structures: +- ✅ Support all common field types (text, number, date, list, dict, etc.) +- ✅ Support required/optional fields, defaults, choices +- ✅ Generate full Pydantic models with validation +- ❌ Cannot have custom validators or complex validation logic +- ❌ Cannot have computed properties or custom methods +- ❌ Cannot refine custom (non-native) concepts +- ❌ Limited IDE autocomplete compared to explicit Python classes + + +### Pipe Controllers and Pipe Operators + +Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: + +1. **Controllers** - For flow control: + - `PipeSequence` - For creating a sequence of multiple steps + - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory + - `PipeParallel` - For parallelizing pipes + +2. **Operators** - For specific tasks: + - `PipeLLM` - Generate Text and Objects (include Vision LLM) + - `PipeExtract` - Extract text and images from an image or a PDF + - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. + - `PipeImgGen` - Generate Images + - `PipeFunc` - For running classic python scripts + +### PipeSequence controller + +Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. + +#### Basic Definition +```plx +[pipe.your_sequence_name] +type = "PipeSequence" +description = "Description of what this sequence does" +inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps +output = "OutputType" +steps = [ + { pipe = "first_pipe", result = "first_result" }, + { pipe = "second_pipe", result = "second_result" }, + { pipe = "final_pipe", result = "final_result" } +] +``` + +#### Key Components + +1. **Steps Array**: List of pipes to execute in sequence + - `pipe`: Name of the pipe to execute + - `result`: Name to assign to the pipe's output that will be in the working memory + +#### Using PipeBatch in Steps + +You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: + +```plx +steps = [ + { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" + } +] +``` + +1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. + - Must be a `ListContent` type containing the items to process + - Can reference inputs or results from previous steps + +2. **batch_as**: Defines the name that will be used to reference the current item being processed + - This name can be used in the pipe's input mappings + - Makes each item from the batch available as a single element + +The result of a batched step will be a `ListContent` containing the outputs from processing each item. + +### PipeCondition controller + +The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. + +#### Basic usage + +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression = "input_data.category" +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` +or +```plx +[pipe.conditional_operation] +type = "PipeCondition" +description = "A conditonal pipe to decide wheter..." +inputs = { input_data = "CategoryInput" } +output = "native.Text" +expression_template = "{{ input_data.category }}" # Jinja2 code +default_outcome = "process_medium" + +[pipe.conditional_operation.outcomes] +small = "process_small" +medium = "process_medium" +large = "process_large" +``` + +#### Key Parameters + +- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) +- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) +- `outcomes`: Dictionary mapping expression results to pipe codes: + 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` + 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger +- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found + +Example with fail as default: +```plx +[pipe.strict_validation] +type = "PipeCondition" +description = "Validate with strict matching" +inputs = { status = "Status" } +output = "Text" +expression = "status.value" +default_outcome = "fail" + +[pipe.strict_validation.outcomes] +approved = "process_approved" +rejected = "process_rejected" +``` + +### PipeLLM operator + +PipeLLM is used to: +1. Generate text or objects with LLMs +2. Process images with Vision LLMs + +#### Basic Usage + +Simple Text Generation: +```plx +[pipe.write_story] +type = "PipeLLM" +description = "Write a short story" +output = "Text" +prompt = """ +Write a short story about a programmer. +""" +``` + +Structured Data Extraction: +```plx +[pipe.extract_info] +type = "PipeLLM" +description = "Extract information" +inputs = { text = "Text" } +output = "PersonInfo" +prompt = """ +Extract person information from this text: +@text +""" +``` + +Supports system instructions: +```plx +[pipe.expert_analysis] +type = "PipeLLM" +description = "Expert analysis" +output = "Analysis" +system_prompt = "You are a data analysis expert" +prompt = "Analyze this data" +``` + +#### Multiple Outputs + +Generate multiple outputs (fixed number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +nb_output = 3 # Generate exactly 3 ideas +``` + +Generate multiple outputs (variable number): +```plx +[pipe.generate_ideas] +type = "PipeLLM" +description = "Generate ideas" +output = "Idea" +multiple_output = true # Let the LLM decide how many to generate +``` + +#### Vision + +Process images with VLMs (image inputs must be tagged in the prompt): +```plx +[pipe.analyze_image] +type = "PipeLLM" +description = "Analyze image" +inputs = { image = "Image" } +output = "ImageAnalysis" +prompt = """ +Describe what you see in this image: + +$image +""" +``` + +You can also reference images inline in meaningful sentences to guide the Visual LLM: +```plx +[pipe.compare_images] +type = "PipeLLM" +description = "Compare two images" +inputs = { photo = "Image", painting = "Image" } +output = "Analysis" +prompt = "Analyze the colors in $photo and the shapes in $painting." +``` + +#### Writing prompts for PipeLLM + +**Insert stuff inside a tagged block** + +If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. + +Example template: +```plx +prompt = """ +Match the expense with its corresponding invoice: + +@expense + +@invoices +""" +``` +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. + +DO NOT write things like "Here is the expense: @expense". +DO write simply "@expense" alone in an isolated line. + +**Insert stuff inline** + +If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. + +Example template: +```plx +prompt = """ +Your goal is to summarize everything related to $topic in the provided text: + +@text + +Please provide only the summary, with no additional text or explanations. +Your summary should not be longer than 2 sentences. +""" +``` + +In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. +Be sure to make the proper choice of prefix for each insertion. + +DO NOT write "$topic" alone in an isolated line. +DO write things like "Write an essay about $topic" to include text into an actual sentence. + + +### PipeExtract operator + +The PipeExtract operator is used to extract text and images from an image or a PDF + +#### Simple Text Extraction +```plx +[pipe.extract_info] +type = "PipeExtract" +description = "extract the information" +inputs = { document = "PDF" } # or { image = "Image" } if it's an image. This is the only input. +output = "Page" +``` + +Using Extract Model Settings: +```plx +[pipe.extract_with_model] +type = "PipeExtract" +description = "Extract with specific model" +inputs = { document = "PDF" } +output = "Page" +model = "base_extract_mistral" # Use predefined extract preset or model alias +``` + +Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. + +The output concept `Page` is a native concept, with the structure `PageContent`: +It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` + +```python +class TextAndImagesContent(StuffContent): + text: TextContent | None + images: list[ImageContent] | None + +class PageContent(StructuredContent): # CONCEPT IS "Page" + text_and_images: TextAndImagesContent + page_view: ImageContent | None = None +``` +- `text_and_images` are the text, and the related images found in the input image or PDF. +- `page_view` is the screenshot of the whole pdf page/image. + +### PipeCompose operator + +The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. + +#### Basic Usage + +Simple Template Composition: +```plx +[pipe.compose_report] +type = "PipeCompose" +description = "Compose a report using template" +inputs = { data = "ReportData" } +output = "Text" +template = """ +## Report Summary + +Based on the analysis: +$data + +Generated on: {{ current_date }} +""" +``` + +Using Named Templates: +```plx +[pipe.use_template] +type = "PipeCompose" +description = "Use a predefined template" +inputs = { content = "Text" } +output = "Text" +template_name = "standard_report_template" +``` + +Using Nested Template Section (for more control): +```plx +[pipe.advanced_template] +type = "PipeCompose" +description = "Use advanced template settings" +inputs = { data = "ReportData" } +output = "Text" + +[pipe.advanced_template.template] +template = "Report: $data" +category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +``` + +CRM Email Template: +```plx +[pipe.compose_follow_up_email] +type = "PipeCompose" +description = "Compose a personalized follow-up email for CRM" +inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } +output = "Text" +template_category = "html" +templating_style = { tag_style = "square_brackets", text_format = "html" } +template = """ +Subject: Following up on our $deal.product_name discussion + +Hi $customer.first_name, + +I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. + +Based on our discussion, I understand that your key requirements are: $deal.customer_requirements + +I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: + +**Next Steps:** +- Schedule a demo tailored to your specific needs +- Provide you with a customized quote based on your requirements +- Connect you with our implementation team + +Would you be available for a 30-minute call this week? I have openings on: +{% for slot in available_slots %} +- {{ slot }} +{% endfor %} + +Looking forward to moving this forward together! + +Best regards, +$sales_rep.name +$sales_rep.title +$sales_rep.phone | $sales_rep.email +""" +``` + +#### Key Parameters + +- `template`: Inline template string (mutually exclusive with template_name) +- `template_name`: Name of a predefined template (mutually exclusive with template) +- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) +- `templating_style`: Styling options for template rendering +- `extra_context`: Additional context variables for template + +For more control, you can use a nested `template` section instead of the `template` field: +- `template.template`: The template string +- `template.category`: Template type +- `template.templating_style`: Styling options + +#### Template Variables + +Use the same variable insertion rules as PipeLLM: +- `@variable` for block insertion (multi-line content) +- `$variable` for inline insertion (short text) + +### PipeImgGen operator + +The PipeImgGen operator is used to generate images using AI image generation models. + +#### Basic Usage + +Simple Image Generation: +```plx +[pipe.generate_image] +type = "PipeImgGen" +description = "Generate an image from prompt" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +``` + +Using Image Generation Settings: +```plx +[pipe.generate_photo] +type = "PipeImgGen" +description = "Generate a high-quality photo" +inputs = { prompt = "ImgGenPrompt" } +output = "Photo" +model = { model = "fast-img-gen" } +aspect_ratio = "16:9" +quality = "hd" +``` + +Multiple Image Generation: +```plx +[pipe.generate_variations] +type = "PipeImgGen" +description = "Generate multiple image variations" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +nb_output = 3 +seed = "auto" +``` + +Advanced Configuration: +```plx +[pipe.generate_custom] +type = "PipeImgGen" +description = "Generate image with custom settings" +inputs = { prompt = "ImgGenPrompt" } +output = "Image" +model = "img_gen_preset_name" # Use predefined preset +aspect_ratio = "1:1" +quality = "hd" +background = "transparent" +output_format = "png" +is_raw = false +safety_tolerance = 3 +``` + +#### Key Parameters + +**Image Generation Settings:** +- `model`: Model choice (preset name or inline settings with model name) +- `quality`: Image quality ("standard", "hd") + +**Output Configuration:** +- `nb_output`: Number of images to generate +- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) +- `output_format`: File format ("png", "jpeg", "webp") +- `background`: Background type ("default", "transparent") + +**Generation Control:** +- `seed`: Random seed (integer or "auto") +- `is_raw`: Whether to apply post-processing +- `is_moderated`: Enable content moderation +- `safety_tolerance`: Content safety level (1-6) + +#### Input Requirements + +PipeImgGen requires exactly one input that must be either: +- An `ImgGenPrompt` concept +- A concept that refines `ImgGenPrompt` + +The input can be named anything but must contain the prompt text for image generation. + +### PipeFunc operator + +The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. + +#### Basic Usage + +Simple Function Call: +```plx +[pipe.process_data] +type = "PipeFunc" +description = "Process data using custom function" +inputs = { input_data = "DataType" } +output = "ProcessedData" +function_name = "process_data_function" +``` + +File Processing Example: +```plx +[pipe.read_file] +type = "PipeFunc" +description = "Read file content" +inputs = { file_path = "FilePath" } +output = "FileContent" +function_name = "read_file_content" +``` + +#### Key Parameters + +- `function_name`: Name of the Python function to call (must be registered in func_registry) + +#### Function Requirements + +The Python function must: + +1. **Be registered** in the `func_registry` +2. **Accept `working_memory`** as a parameter: + ```python + async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: + # Function implementation + pass + ``` + +3. **Return appropriate types**: + - `StuffContent`: Single content object + - `list[StuffContent]`: Multiple content objects (becomes ListContent) + - `str`: Simple string (becomes TextContent) + +#### Function Registration + +Functions must be registered in the function registry before use: + +```python +from pipelex.tools.func_registry import func_registry + +@func_registry.register("my_function_name") +async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: + # Access inputs from working memory + input_data = working_memory.get_stuff("input_name") + + # Process data + result = process_logic(input_data.content) + + # Return result + return MyResultContent(data=result) +``` + +#### Working Memory Access + +Inside the function, access pipeline inputs through working memory: + +```python +async def process_function(working_memory: WorkingMemory) -> TextContent: + # Get input stuff by name + input_stuff = working_memory.get_stuff("input_name") + + # Access the content + input_content = input_stuff.content + + # Process and return + processed_text = f"Processed: {input_content.text}" + return TextContent(text=processed_text) +``` + +--- + +### Rules to choose LLM models used in PipeLLMs. + +#### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +#### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +#### Using an LLM Handle in a PipeLLM + +Here is an example of using a model to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +#### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + +--- + +ALWAYS RUN `make validate` when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. +Then, create an example file to run the pipeline in the `examples` folder. +But don't write documentation unless asked explicitly to. + +## Guide to execute a pipeline and write example code + +### Example to execute a pipeline with text output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def hello_world() -> str: + """ + This function demonstrates the use of a super simple Pipelex pipeline to generate text. + """ + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="hello_world", + ) + + return pipe_output.main_stuff_as_str + + +## start Pipelex +Pipelex.make() +## run sample using asyncio +output_text = asyncio.run(hello_world()) +pretty_print(output_text, title="Your first Pipelex output") +``` + +### Example to execute a pipeline with structured output + +```python +import asyncio + +from pipelex import pretty_print +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline +from pipelex.core.stuffs.image_content import ImageContent + +from my_project.gantt.gantt_struct import GanttChart + +SAMPLE_NAME = "extract_gantt" +IMAGE_URL = "assets/gantt/gantt_tree_house.png" + + +async def extract_gantt(image_url: str) -> GanttChart: + # Run the pipe + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + # Output the result + return pipe_output.main_stuff_as(content_type=GanttChart) + + +## start Pipelex +Pipelex.make() + +## run sample using asyncio +gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) +pretty_print(gantt_chart, title="Gantt Chart") +``` + +### Setting up the input memory + +#### Explanation of input memory + +The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: +```python +StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str +ImplicitMemory = dict[str, StuffContentOrData] +``` +As you can seen, we made it so different ways can be used to define that stuff using structured content or data. + +#### Different ways to set up the input memory + +So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: + +```python +## Here we have a single input and it's a Text. +## If you assign a string, by default it will be considered as a TextContent. + pipe_output = await execute_pipeline( + pipe_code="master_advisory_orchestrator", + input_memory={ + "user_input": problem_description, + }, + ) + +## Here we have a single input and it's a PDF. +## Because PDFContent is a native concept, we can use it directly as a value, +## the system knows what content it corresponds to: + pipe_output = await execute_pipeline( + pipe_code="power_extractor_dpe", + input_memory={ + "document": PDFContent(url=pdf_url), + }, + ) + +## Here we have a single input and it's an Image. +## Because ImageContent is a native concept, we can use it directly as a value: + pipe_output = await execute_pipeline( + pipe_code="fashion_variation_pipeline", + input_memory={ + "fashion_photo": ImageContent(url=image_url), + }, + ) + +## Here we have a single input, it's an image but +## its actually a more specific concept gantt.GanttImage which refines Image, +## so we must provide it using a dict with the concept and the content: + pipe_output = await execute_pipeline( + pipe_code="extract_gantt_by_steps", + input_memory={ + "gantt_chart_image": { + "concept": "gantt.GanttImage", + "content": ImageContent(url=image_url), + } + }, + ) + +## Here is a more complex example with multiple inputs assigned using different ways: + pipe_output = await execute_pipeline( + pipe_code="retrieve_then_answer", + dynamic_output_concept_code="contracts.Fees", + input_memory={ + "text": load_text_from_path(path=text_path), + "question": { + "concept": "answer.Question", + "content": question, + }, + "client_instructions": client_instructions, + }, + ) +``` + +### Using the outputs of a pipeline + +All pipe executions return a `PipeOutput` object. +It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. +It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: + +```python + +class PipeOutput(BaseModel): + working_memory: WorkingMemory = Field(default_factory=WorkingMemory) + pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) + + @property + def main_stuff(self) -> Stuff: + ... + + def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: + ... + + def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: + ... + + def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: + ... + + @property + def main_stuff_as_text(self) -> TextContent: + ... + + @property + def main_stuff_as_str(self) -> str: + ... + + @property + def main_stuff_as_image(self) -> ImageContent: + ... + + @property + def main_stuff_as_text_and_image(self) -> TextAndImagesContent: + ... + + @property + def main_stuff_as_number(self) -> NumberContent: + ... + + @property + def main_stuff_as_html(self) -> HtmlContent: + ... + + @property + def main_stuff_as_mermaid(self) -> MermaidContent: + ... +``` + +As you can see, you can extarct any variable from the output working memory. + +#### Getting the main stuff as a specific type + +Simple text as a string: + +```python +result = pipe_output.main_stuff_as_str +``` +Structured object (BaseModel): + +```python +result = pipe_output.main_stuff_as(content_type=GanttChart) +``` + +If it's a list, you can get a `ListContent` of the specific type. + +```python +result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) +``` + +or if you want, you can get the actual items as a regular python list: + +```python +result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) +``` + +--- + +## Rules to choose LLM models used in PipeLLMs. + +### LLM Configuration System + +In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. +LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: + +- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` +- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` +- **Routing**: `.pipelex/inference/routing_profiles.toml` + +### LLM Handles + +An llm_handle can be either: +1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system +2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: + +```toml +[aliases] +base-claude = "claude-4.5-sonnet" +base-gpt = "gpt-5" +base-gemini = "gemini-2.5-flash" +base-mistral = "mistral-medium" +``` + +The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. + +### Using an LLM Handle in a PipeLLM + +Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: + +```plx +[pipe.hello_world] +type = "PipeLLM" +description = "Write text about Hello World." +output = "Text" +model = { model = "gpt-5", temperature = 0.9 } +prompt = """ +Write a haiku about Hello World. +""" +``` + +As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). + +### LLM Presets + +Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. + +Examples: +```toml +llm_to_reason = { model = "base-claude", temperature = 1 } +llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } +``` + +The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: + +```plx +[pipe.extract_invoice] +type = "PipeLLM" +description = "Extract invoice information from an invoice text transcript" +inputs = { invoice_text = "InvoiceText" } +output = "Invoice" +model = "llm_to_extract_invoice" +prompt = """ +Extract invoice information from this invoice: + +The category of this invoice is: $invoice_details.category. + +@invoice_text +""" +``` + +The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. +You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. + + +You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. + From 23cba07d73d619f14e67f250bbd39a321083bab0 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 03:33:11 +0200 Subject: [PATCH 069/115] config --- .pipelex/pipelex.toml | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/.pipelex/pipelex.toml b/.pipelex/pipelex.toml index 0f6eaf0a1..e246c84bb 100644 --- a/.pipelex/pipelex.toml +++ b/.pipelex/pipelex.toml @@ -1,20 +1,23 @@ -[pipelex] -[pipelex.observer_config] -observer_dir = "results/observer" +[pipelex.log_config] +default_log_level = "INFO" + +[pipelex.log_config.package_log_levels] +pipelex = "INFO" -[pipelex.aws_config] -api_key_method = "env" -# The possible values are "env" and "secret_provider". -# "env" means means that the env var are stored in your .env file. -# "secret_provider" means that the env var are stored in your Secret Manager (See the doc for injecting a secret provider). +[cogt.llm_config] +# is_dump_text_prompts_enabled = true +# is_dump_response_text_enabled = true -[cogt] +[cogt.llm_config.instructor_config] +# is_dump_kwargs_enabled = true +# is_dump_response_enabled = true +# is_dump_error_enabled = true -[cogt.extract_config] -page_output_text_file_name = "page_text.md" +[pipelex.observer_config] +observer_dir = "results/observer" [pipelex.feature_config] # WIP/Experimental feature flags -is_pipeline_tracking_enabled = false -is_activity_tracking_enabled = false +# is_pipeline_tracking_enabled = true +# is_activity_tracking_enabled = true is_reporting_enabled = true From b0f001db332a718e4bc3e0960d74d3ed5acf6ce1 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 03:58:41 +0200 Subject: [PATCH 070/115] Cleanup pytest rules --- .cursor/rules/pytest_standards.mdc | 59 --------------------- pipelex/kit/agent_rules/pytest_standards.md | 59 --------------------- 2 files changed, 118 deletions(-) diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc index 13265cb95..85b219cfb 100644 --- a/.cursor/rules/pytest_standards.mdc +++ b/.cursor/rules/pytest_standards.mdc @@ -66,65 +66,6 @@ class TestFooBar: Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. -## Writing integration test to test pipes - -### Required imports for pipe tests - -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol -``` - -### Pipe test implementation steps - -1. Create Stuff from blueprint: - -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` - -2. Create Working Memory: - -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) -``` - -3. Run the pipe: - -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` - -4. Basic assertions: - -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None -``` - ### Test Data Organization - If it's not already there, create a `test_data.py` file in the test directory diff --git a/pipelex/kit/agent_rules/pytest_standards.md b/pipelex/kit/agent_rules/pytest_standards.md index ac560a8bd..214015020 100644 --- a/pipelex/kit/agent_rules/pytest_standards.md +++ b/pipelex/kit/agent_rules/pytest_standards.md @@ -60,65 +60,6 @@ class TestFooBar: Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. -## Writing integration test to test pipes - -### Required imports for pipe tests - -```python -import pytest -from pytest import FixtureRequest -from pipelex import log, pretty_print -from pipelex.core.stuffs.stuff_factory import StuffBlueprint, StuffFactory -from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory -from pipelex.hub import get_report_delegate -from pipelex.libraries.pipelines.base_library.retrieve import RetrievedExcerpt -from pipelex.config_pipelex import get_config - -from pipelex.core.pipe import PipeAbstract, update_job_metadata_for_pipe -from pipelex.core.pipes.pipe_output import PipeOutput, PipeOutputType -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.core.pipes.pipe_run_params import PipeRunParams -from pipelex.pipe_works.pipe_router_protocol import PipeRouterProtocol -``` - -### Pipe test implementation steps - -1. Create Stuff from blueprint: - -```python -stuff = StuffFactory.make_stuff( - concept_code="RetrievedExcerpt", - domain="retrieve", - content=RetrievedExcerpt(text="", justification="") - name="retrieved_text", -) -``` - -2. Create Working Memory: - -```python -working_memory = WorkingMemoryFactory.make_from_single_stuff(stuff=stuff) -``` - -3. Run the pipe: - -```python -pipe_output = await pipe_router.run_pipe( - pipe_code="pipe_name", - pipe_run_params=PipeRunParamsFactory.make_run_params(), - working_memory=working_memory, - job_metadata=JobMetadata(), -) -``` - -4. Basic assertions: - -```python -assert pipe_output is not None -assert pipe_output.working_memory is not None -assert pipe_output.main_stuff is not None -``` - ### Test Data Organization - If it's not already there, create a `test_data.py` file in the test directory From 76715b09f07e9945733049f81299727c199d9b43 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 04:09:35 +0200 Subject: [PATCH 071/115] Enable client projects to get pipes from Pipelex package, useful for Pipe builder --- pipelex/kit/targets_update.py | 1 + pipelex/libraries/library_manager.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pipelex/kit/targets_update.py b/pipelex/kit/targets_update.py index 80c2b2f86..099ef2ba0 100644 --- a/pipelex/kit/targets_update.py +++ b/pipelex/kit/targets_update.py @@ -49,6 +49,7 @@ def demote_match(match: re.Match[str]) -> str: return re.sub(pattern, demote_match, md_content, flags=re.MULTILINE) +# TODO: fix bug which makes it not idempotent (because heading 1 gets deleted) def build_merged_rules(idx: KitIndex, agent_set: str | None = None) -> str: """Build merged agent documentation from ordered files. diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 8cde74323..19dc185ac 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -95,11 +95,6 @@ def reset(self) -> None: self.teardown() self.setup() - def _get_pipeline_library_dirs(self) -> list[Path]: - # Scan the entire project root for .plx files - project_root = Path(config_manager.local_root_dir) - return [project_root] - def _find_plx_files_in_dir(self, dir_path: str, pattern: str, is_recursive: bool) -> list[Path]: """Find PLX files matching a pattern in a directory, excluding problematic directories. @@ -128,7 +123,7 @@ def _find_plx_files_in_dir(self, dir_path: str, pattern: str, is_recursive: bool return filtered_files - def _get_pipelex_plx_files_from_dirs(self, dirs: list[Path]) -> list[Path]: + def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: """Get all valid Pipelex PLX files from the given directories.""" all_plx_paths: list[Path] = [] seen_files: set[str] = set() # Track by absolute path to avoid duplicates @@ -256,7 +251,13 @@ def load_libraries( library_dirs: list[Path] | None = None, library_file_paths: list[Path] | None = None, ) -> None: - dirs_to_use = library_dirs or self._get_pipeline_library_dirs() + # dirs_to_use = library_dirs or [Path(config_manager.local_root_dir)] + dirs_to_use: set[Path] = set() + if library_dirs: + dirs_to_use.update(library_dirs) + else: + dirs_to_use.add(Path(config_manager.local_root_dir)) + dirs_to_use.add(Path(config_manager.pipelex_root_dir)) valid_plx_paths: list[Path] if library_file_paths: From 718e39282a0a716458020334ee750c137fab8e8d Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 10:21:38 +0200 Subject: [PATCH 072/115] Fix finding the PLX from the Pipelex package (for Pipe Builder) --- pipelex/libraries/library_manager.py | 106 ++++++++++++++++++++++++--- 1 file changed, 95 insertions(+), 11 deletions(-) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 19dc185ac..fc4b2463e 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -1,3 +1,5 @@ +from importlib.abc import Traversable +from importlib.resources import files from pathlib import Path from typing import ClassVar @@ -245,32 +247,101 @@ def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[ pipes.append(pipe) return pipes + def _get_pipelex_plx_files_from_package(self) -> list[Path]: + """Get all PLX files from the pipelex package using importlib.resources. + + This works reliably whether pipelex is installed as a wheel, from source, + or as a relative path import. + + Returns: + List of Path objects to PLX files in pipelex package + """ + plx_files: list[Path] = [] + pipelex_package = files("pipelex") + + def _find_plx_in_traversable(traversable: Traversable, collected: list[Path]) -> None: + """Recursively find .plx files in a Traversable.""" + try: + if not traversable.is_dir(): + return + + for child in traversable.iterdir(): + if child.is_file() and child.name.endswith(".plx"): + # Convert to path string for validation + plx_path_str = str(child) + if PipelexInterpreter.is_pipelex_file(Path(plx_path_str)): + collected.append(Path(plx_path_str)) + log.debug(f"Found pipelex package PLX file: {plx_path_str}") + elif child.is_dir(): + # Skip excluded directories + excluded = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} + if child.name not in excluded: + _find_plx_in_traversable(child, collected) + except (PermissionError, OSError) as exc: + log.debug(f"Could not access {traversable}: {exc}") + + _find_plx_in_traversable(pipelex_package, plx_files) + log.debug(f"Found {len(plx_files)} PLX files in pipelex package") + return plx_files + + def _get_pipelex_package_dir_for_imports(self) -> Path | None: + """Get the pipelex package directory as a Path for importing Python modules. + + Returns: + Path to the pipelex package directory, or None if not accessible as filesystem + """ + pipelex_package = files("pipelex") + try: + # Try to convert to Path (works for filesystem paths) + pkg_path = Path(str(pipelex_package)) + if pkg_path.exists() and pkg_path.is_dir(): + return pkg_path + except (TypeError, ValueError, OSError) as exc: + log.debug(f"Could not convert importlib.resources Traversable to filesystem Path: {exc}") + return None + @override def load_libraries( self, library_dirs: list[Path] | None = None, library_file_paths: list[Path] | None = None, ) -> None: - # dirs_to_use = library_dirs or [Path(config_manager.local_root_dir)] - dirs_to_use: set[Path] = set() + # Collect directories to scan (user project directories) + user_dirs: set[Path] = set() if library_dirs: - dirs_to_use.update(library_dirs) + user_dirs.update(library_dirs) else: - dirs_to_use.add(Path(config_manager.local_root_dir)) - dirs_to_use.add(Path(config_manager.pipelex_root_dir)) + user_dirs.add(Path(config_manager.local_root_dir)) valid_plx_paths: list[Path] if library_file_paths: valid_plx_paths = library_file_paths else: - all_plx_paths: list[Path] = self._get_pipelex_plx_files_from_dirs(dirs_to_use) - # Remove failing pipelines from the list - # failing_pipelines_file_paths = get_config().pipelex.library_config.failing_pipelines_file_paths - # valid_plx_paths = [path for path in all_plx_paths if path not in failing_pipelines_file_paths] - valid_plx_paths = all_plx_paths + # Get PLX files from user directories + user_plx_paths: list[Path] = self._get_pipelex_plx_files_from_dirs(user_dirs) + + # Get PLX files from pipelex package using importlib.resources + # This works reliably in all installation modes (wheel, source, relative) + pipelex_plx_paths: list[Path] = self._get_pipelex_plx_files_from_package() + + # Combine and deduplicate + all_plx_paths = user_plx_paths + pipelex_plx_paths + seen_absolute_paths: set[str] = set() + valid_plx_paths = [] + for plx_path in all_plx_paths: + try: + absolute_path = str(plx_path.resolve()) + except (OSError, RuntimeError): + # For paths that can't be resolved (e.g., in zipped packages), use string representation + absolute_path = str(plx_path) + + if absolute_path not in seen_absolute_paths: + valid_plx_paths.append(plx_path) + seen_absolute_paths.add(absolute_path) # Import modules to load them into sys.modules (but don't register classes yet) - for library_dir in dirs_to_use: + # Import from user directories + for library_dir in user_dirs: # Only import files that contain StructuredContent subclasses (uses AST pre-check) ClassRegistryUtils.import_modules_in_folder( folder_path=str(library_dir), @@ -283,6 +354,19 @@ def load_libraries( require_decorator=True, ) + # Import from pipelex package if accessible as filesystem + if pipelex_pkg_dir := self._get_pipelex_package_dir_for_imports(): + log.debug(f"Importing pipelex package modules from: {pipelex_pkg_dir}") + ClassRegistryUtils.import_modules_in_folder( + folder_path=str(pipelex_pkg_dir), + base_class_names=[StructuredContent.__name__], + ) + FuncRegistryUtils.register_funcs_in_folder( + folder_path=str(pipelex_pkg_dir), + decorator_names=[pipe_func.__name__], + require_decorator=True, + ) + # Auto-discover and register all StructuredContent classes from sys.modules num_registered = ClassRegistryUtils.auto_register_all_subclasses(base_class=StructuredContent) log.debug(f"Auto-registered {num_registered} StructuredContent classes from loaded modules") From bfa06dc3dff21ecfcd0019bcadcd18b5d006ad16 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 10:35:28 +0200 Subject: [PATCH 073/115] Use pkgutil.walk_packages to find structures and pipe_funcs in Pipelex package (for builder) --- pipelex/cli/commands/build_cmd.py | 1 + pipelex/libraries/library_manager.py | 40 +++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index 04f01a9ac..5a398ddef 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -28,6 +28,7 @@ pipelex build pipe "Take a photo as input, and render the opposite of the photo, don't structure anything, use only text content, be super concise" pipelex build pipe "Take a photo as input, and render the opposite of the photo" pipelex build pipe "Given an RDFP PDF, build a compliance matrix" +pipelex build pipe "Given an theme, write a Haiku" """ diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index fc4b2463e..a79d8b9d6 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -1,3 +1,5 @@ +import importlib +import pkgutil from importlib.abc import Traversable from importlib.resources import files from pathlib import Path @@ -300,6 +302,33 @@ def _get_pipelex_package_dir_for_imports(self) -> Path | None: log.debug(f"Could not convert importlib.resources Traversable to filesystem Path: {exc}") return None + def _import_pipelex_modules_directly(self) -> None: + """Import pipelex modules directly to register @pipe_func decorated functions. + + This ensures critical pipelex functions are registered regardless of how pipelex + is installed (wheel, source, relative path, etc.). + + Uses pkgutil.walk_packages to auto-discover all pipelex.builder modules. + """ + import pipelex.builder # noqa: PLC0415 - intentional local import + + try: + # Walk all submodules in pipelex.builder to discover @pipe_func decorated functions + if hasattr(pipelex.builder, "__path__"): + for _importer, modname, _ispkg in pkgutil.walk_packages( + path=pipelex.builder.__path__, prefix="pipelex.builder.", onerror=lambda _: None + ): + try: + # Import each module to trigger @pipe_func decorator registration + importlib.import_module(modname) + log.debug(f"Imported {modname} for @pipe_func registration") + except Exception as exc: + log.debug(f"Could not import {modname}: {exc}") + else: + log.warning("Could not walk pipelex.builder package - no __path__ attribute") + except ImportError as exc: + log.warning(f"Could not import pipelex.builder package: {exc}") + @override def load_libraries( self, @@ -354,9 +383,14 @@ def load_libraries( require_decorator=True, ) - # Import from pipelex package if accessible as filesystem - if pipelex_pkg_dir := self._get_pipelex_package_dir_for_imports(): - log.debug(f"Importing pipelex package modules from: {pipelex_pkg_dir}") + # Import from pipelex package + # Always directly import critical builder modules first (works in all installation modes) + self._import_pipelex_modules_directly() + + # Then try filesystem-based scanning if package is accessible (for completeness) + pipelex_pkg_dir = self._get_pipelex_package_dir_for_imports() + if pipelex_pkg_dir: + log.debug(f"Additionally scanning pipelex package filesystem: {pipelex_pkg_dir}") ClassRegistryUtils.import_modules_in_folder( folder_path=str(pipelex_pkg_dir), base_class_names=[StructuredContent.__name__], From 53fd4258a386449c7cac26e9eae30b8d90c66462 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 10:46:33 +0200 Subject: [PATCH 074/115] Add extensive logging for @pipe_func registration debugging --- pipelex/libraries/library_manager.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index a79d8b9d6..9dfb6569d 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -312,22 +312,29 @@ def _import_pipelex_modules_directly(self) -> None: """ import pipelex.builder # noqa: PLC0415 - intentional local import + log.info("Starting pipelex.builder module discovery for @pipe_func registration") + try: # Walk all submodules in pipelex.builder to discover @pipe_func decorated functions if hasattr(pipelex.builder, "__path__"): + log.info(f"pipelex.builder has __path__: {pipelex.builder.__path__}") + module_count = 0 for _importer, modname, _ispkg in pkgutil.walk_packages( path=pipelex.builder.__path__, prefix="pipelex.builder.", onerror=lambda _: None ): + module_count += 1 try: # Import each module to trigger @pipe_func decorator registration importlib.import_module(modname) - log.debug(f"Imported {modname} for @pipe_func registration") + log.info(f"Successfully imported {modname} for @pipe_func registration") except Exception as exc: - log.debug(f"Could not import {modname}: {exc}") + log.warning(f"Could not import {modname}: {exc}") + + log.info(f"Discovered and attempted to import {module_count} modules in pipelex.builder") else: - log.warning("Could not walk pipelex.builder package - no __path__ attribute") + log.error("Could not walk pipelex.builder package - no __path__ attribute") except ImportError as exc: - log.warning(f"Could not import pipelex.builder package: {exc}") + log.error(f"Could not import pipelex.builder package: {exc}") @override def load_libraries( @@ -385,8 +392,19 @@ def load_libraries( # Import from pipelex package # Always directly import critical builder modules first (works in all installation modes) + log.info("About to import pipelex.builder modules for @pipe_func registration") self._import_pipelex_modules_directly() + # Verify critical functions were registered + from pipelex.tools.func_registry import func_registry # noqa: PLC0415 - intentional local import + + critical_functions = ["create_concept_spec", "assemble_pipelex_bundle_spec"] + for func_name in critical_functions: + if func_registry.has_function(func_name): + log.info(f"✓ Function '{func_name}' successfully registered") + else: + log.error(f"✗ Function '{func_name}' NOT registered - this will cause errors!") + # Then try filesystem-based scanning if package is accessible (for completeness) pipelex_pkg_dir = self._get_pipelex_package_dir_for_imports() if pipelex_pkg_dir: From fcb1c157bb0e8598583fe0767db8b8975a9f0f0a Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 10:55:13 +0200 Subject: [PATCH 075/115] force registration of pipe funcs --- pipelex/libraries/library_manager.py | 40 +++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 9dfb6569d..959fab9e5 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -1,9 +1,10 @@ import importlib +import inspect import pkgutil from importlib.abc import Traversable from importlib.resources import files from pathlib import Path -from typing import ClassVar +from typing import Any, ClassVar from pydantic import ValidationError from typing_extensions import override @@ -311,6 +312,7 @@ def _import_pipelex_modules_directly(self) -> None: Uses pkgutil.walk_packages to auto-discover all pipelex.builder modules. """ import pipelex.builder # noqa: PLC0415 - intentional local import + from pipelex.tools.func_registry import func_registry # noqa: PLC0415 - intentional local import log.info("Starting pipelex.builder module discovery for @pipe_func registration") @@ -319,18 +321,44 @@ def _import_pipelex_modules_directly(self) -> None: if hasattr(pipelex.builder, "__path__"): log.info(f"pipelex.builder has __path__: {pipelex.builder.__path__}") module_count = 0 + functions_registered = 0 + for _importer, modname, _ispkg in pkgutil.walk_packages( path=pipelex.builder.__path__, prefix="pipelex.builder.", onerror=lambda _: None ): module_count += 1 try: - # Import each module to trigger @pipe_func decorator registration - importlib.import_module(modname) - log.info(f"Successfully imported {modname} for @pipe_func registration") + # Import the module + module = importlib.import_module(modname) + log.info(f"Successfully imported {modname}") + + # Find @pipe_func decorated functions in this module + for _name, obj in inspect.getmembers(module, inspect.isfunction): + # Skip functions imported from other modules + if obj.__module__ != modname: + continue + + # Only process functions marked with @pipe_func + if not func_registry.is_marked_pipe_func(obj): + continue + + # Check for custom name from decorator + custom_name = getattr(obj, "_pipe_func_name", None) + func_name = custom_name if custom_name is not None else obj.__name__ + + # Register the function + func_registry.register_function( + func=obj, + name=func_name, + should_warn_if_already_registered=False, + ) + functions_registered += 1 + log.info(f"Registered @pipe_func: {func_name} from {modname}") + except Exception as exc: - log.warning(f"Could not import {modname}: {exc}") + log.warning(f"Could not process {modname}: {exc}") - log.info(f"Discovered and attempted to import {module_count} modules in pipelex.builder") + log.info(f"Discovered {module_count} modules and registered {functions_registered} @pipe_func functions") else: log.error("Could not walk pipelex.builder package - no __path__ attribute") except ImportError as exc: From 34e81eb7cf97a11803e2934cb37ecb46e9206af7 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 11:18:31 +0200 Subject: [PATCH 076/115] Cleanup - move code to util module --- pipelex/libraries/library_manager.py | 158 +++------------------------ pipelex/libraries/library_utils.py | 150 +++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 144 deletions(-) create mode 100644 pipelex/libraries/library_utils.py diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 959fab9e5..1b8212720 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -1,10 +1,5 @@ -import importlib -import inspect -import pkgutil -from importlib.abc import Traversable -from importlib.resources import files from pathlib import Path -from typing import Any, ClassVar +from typing import ClassVar from pydantic import ValidationError from typing_extensions import override @@ -37,11 +32,16 @@ PipeLoadingError, ) from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract +from pipelex.libraries.library_utils import ( + find_plx_files_in_dir, + get_pipelex_package_dir_for_imports, + get_pipelex_plx_files_from_package, + register_pipe_funcs_from_package, +) from pipelex.tools.class_registry_utils import ClassRegistryUtils from pipelex.tools.config.manager import config_manager from pipelex.tools.func_registry import pipe_func from pipelex.tools.func_registry_utils import FuncRegistryUtils -from pipelex.tools.misc.file_utils import find_files_in_dir from pipelex.types import StrEnum @@ -100,34 +100,6 @@ def reset(self) -> None: self.teardown() self.setup() - def _find_plx_files_in_dir(self, dir_path: str, pattern: str, is_recursive: bool) -> list[Path]: - """Find PLX files matching a pattern in a directory, excluding problematic directories. - - Args: - dir_path: Directory path to search in - pattern: File pattern to match (e.g. "*.plx") - is_recursive: Whether to search recursively in subdirectories - - Returns: - List of matching Path objects, filtered to exclude problematic directories - - """ - # Get all files using the base utility - all_files = find_files_in_dir(dir_path, pattern, is_recursive) - - # Directories to exclude from scanning to avoid loading invalid PLX files - exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} - - # Filter out files in excluded directories - filtered_files: list[Path] = [] - for file_path in all_files: - # Check if any parent directory is in the exclude list - should_exclude = any(part in exclude_dirs for part in file_path.parts) - if not should_exclude: - filtered_files.append(file_path) - - return filtered_files - def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: """Get all valid Pipelex PLX files from the given directories.""" all_plx_paths: list[Path] = [] @@ -139,7 +111,7 @@ def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: continue # Find all .plx files in the directory, excluding problematic directories - plx_files = self._find_plx_files_in_dir( + plx_files = find_plx_files_in_dir( dir_path=str(dir_path), pattern="*.plx", is_recursive=True, @@ -250,119 +222,17 @@ def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[ pipes.append(pipe) return pipes - def _get_pipelex_plx_files_from_package(self) -> list[Path]: - """Get all PLX files from the pipelex package using importlib.resources. - - This works reliably whether pipelex is installed as a wheel, from source, - or as a relative path import. - - Returns: - List of Path objects to PLX files in pipelex package - """ - plx_files: list[Path] = [] - pipelex_package = files("pipelex") - - def _find_plx_in_traversable(traversable: Traversable, collected: list[Path]) -> None: - """Recursively find .plx files in a Traversable.""" - try: - if not traversable.is_dir(): - return - - for child in traversable.iterdir(): - if child.is_file() and child.name.endswith(".plx"): - # Convert to path string for validation - plx_path_str = str(child) - if PipelexInterpreter.is_pipelex_file(Path(plx_path_str)): - collected.append(Path(plx_path_str)) - log.debug(f"Found pipelex package PLX file: {plx_path_str}") - elif child.is_dir(): - # Skip excluded directories - excluded = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} - if child.name not in excluded: - _find_plx_in_traversable(child, collected) - except (PermissionError, OSError) as exc: - log.debug(f"Could not access {traversable}: {exc}") - - _find_plx_in_traversable(pipelex_package, plx_files) - log.debug(f"Found {len(plx_files)} PLX files in pipelex package") - return plx_files - - def _get_pipelex_package_dir_for_imports(self) -> Path | None: - """Get the pipelex package directory as a Path for importing Python modules. - - Returns: - Path to the pipelex package directory, or None if not accessible as filesystem - """ - pipelex_package = files("pipelex") - try: - # Try to convert to Path (works for filesystem paths) - pkg_path = Path(str(pipelex_package)) - if pkg_path.exists() and pkg_path.is_dir(): - return pkg_path - except (TypeError, ValueError, OSError) as exc: - log.debug(f"Could not convert importlib.resources Traversable to filesystem Path: {exc}") - return None - def _import_pipelex_modules_directly(self) -> None: - """Import pipelex modules directly to register @pipe_func decorated functions. + """Import pipelex modules to register @pipe_func decorated functions. This ensures critical pipelex functions are registered regardless of how pipelex is installed (wheel, source, relative path, etc.). - - Uses pkgutil.walk_packages to auto-discover all pipelex.builder modules. """ import pipelex.builder # noqa: PLC0415 - intentional local import - from pipelex.tools.func_registry import func_registry # noqa: PLC0415 - intentional local import - - log.info("Starting pipelex.builder module discovery for @pipe_func registration") - try: - # Walk all submodules in pipelex.builder to discover @pipe_func decorated functions - if hasattr(pipelex.builder, "__path__"): - log.info(f"pipelex.builder has __path__: {pipelex.builder.__path__}") - module_count = 0 - functions_registered = 0 - - for _importer, modname, _ispkg in pkgutil.walk_packages( - path=pipelex.builder.__path__, prefix="pipelex.builder.", onerror=lambda _: None - ): - module_count += 1 - try: - # Import the module - module = importlib.import_module(modname) - log.info(f"Successfully imported {modname}") - - # Find @pipe_func decorated functions in this module - for _name, obj in inspect.getmembers(module, inspect.isfunction): - # Skip functions imported from other modules - if obj.__module__ != modname: - continue - - # Only process functions marked with @pipe_func - if not func_registry.is_marked_pipe_func(obj): - continue - - # Check for custom name from decorator - custom_name = getattr(obj, "_pipe_func_name", None) - func_name = custom_name if custom_name is not None else obj.__name__ - - # Register the function - func_registry.register_function( - func=obj, - name=func_name, - should_warn_if_already_registered=False, - ) - functions_registered += 1 - log.info(f"Registered @pipe_func: {func_name} from {modname}") - - except Exception as exc: - log.warning(f"Could not process {modname}: {exc}") - - log.info(f"Discovered {module_count} modules and registered {functions_registered} @pipe_func functions") - else: - log.error("Could not walk pipelex.builder package - no __path__ attribute") - except ImportError as exc: - log.error(f"Could not import pipelex.builder package: {exc}") + log.info("Registering @pipe_func functions from pipelex.builder") + functions_count = register_pipe_funcs_from_package("pipelex.builder", pipelex.builder) + log.info(f"Registered {functions_count} @pipe_func functions from pipelex.builder") @override def load_libraries( @@ -386,7 +256,7 @@ def load_libraries( # Get PLX files from pipelex package using importlib.resources # This works reliably in all installation modes (wheel, source, relative) - pipelex_plx_paths: list[Path] = self._get_pipelex_plx_files_from_package() + pipelex_plx_paths: list[Path] = get_pipelex_plx_files_from_package() # Combine and deduplicate all_plx_paths = user_plx_paths + pipelex_plx_paths @@ -434,7 +304,7 @@ def load_libraries( log.error(f"✗ Function '{func_name}' NOT registered - this will cause errors!") # Then try filesystem-based scanning if package is accessible (for completeness) - pipelex_pkg_dir = self._get_pipelex_package_dir_for_imports() + pipelex_pkg_dir = get_pipelex_package_dir_for_imports() if pipelex_pkg_dir: log.debug(f"Additionally scanning pipelex package filesystem: {pipelex_pkg_dir}") ClassRegistryUtils.import_modules_in_folder( diff --git a/pipelex/libraries/library_utils.py b/pipelex/libraries/library_utils.py new file mode 100644 index 000000000..f1455325d --- /dev/null +++ b/pipelex/libraries/library_utils.py @@ -0,0 +1,150 @@ +"""Utility functions for library management.""" + +import importlib +import inspect +import pkgutil +from importlib.abc import Traversable +from importlib.resources import files +from pathlib import Path +from typing import Any + +from pipelex import log +from pipelex.core.interpreter import PipelexInterpreter +from pipelex.tools.func_registry import func_registry +from pipelex.tools.misc.file_utils import find_files_in_dir + + +def get_pipelex_plx_files_from_package() -> list[Path]: + """Get all PLX files from the pipelex package using importlib.resources. + + This works reliably whether pipelex is installed as a wheel, from source, + or as a relative path import. + + Returns: + List of Path objects to PLX files in pipelex package + """ + plx_files: list[Path] = [] + pipelex_package = files("pipelex") + + def _find_plx_in_traversable(traversable: Traversable, collected: list[Path]) -> None: + """Recursively find .plx files in a Traversable.""" + try: + if not traversable.is_dir(): + return + + for child in traversable.iterdir(): + if child.is_file() and child.name.endswith(".plx"): + # Convert to path string for validation + plx_path_str = str(child) + if PipelexInterpreter.is_pipelex_file(Path(plx_path_str)): + collected.append(Path(plx_path_str)) + log.debug(f"Found pipelex package PLX file: {plx_path_str}") + elif child.is_dir(): + # Skip excluded directories + excluded = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} + if child.name not in excluded: + _find_plx_in_traversable(child, collected) + except (PermissionError, OSError) as exc: + log.debug(f"Could not access {traversable}: {exc}") + + _find_plx_in_traversable(pipelex_package, plx_files) + log.debug(f"Found {len(plx_files)} PLX files in pipelex package") + return plx_files + + +def get_pipelex_package_dir_for_imports() -> Path | None: + """Get the pipelex package directory as a Path for importing Python modules. + + Returns: + Path to the pipelex package directory, or None if not accessible as filesystem + """ + pipelex_package = files("pipelex") + try: + # Try to convert to Path (works for filesystem paths) + pkg_path = Path(str(pipelex_package)) + if pkg_path.exists() and pkg_path.is_dir(): + return pkg_path + except (TypeError, ValueError, OSError) as exc: + log.debug(f"Could not convert importlib.resources Traversable to filesystem Path: {exc}") + return None + + +def find_plx_files_in_dir(dir_path: str, pattern: str, is_recursive: bool) -> list[Path]: + """Find PLX files matching a pattern in a directory, excluding problematic directories. + + Args: + dir_path: Directory path to search in + pattern: File pattern to match (e.g. "*.plx") + is_recursive: Whether to search recursively in subdirectories + + Returns: + List of matching Path objects, filtered to exclude problematic directories + """ + # Get all files using the base utility + all_files = find_files_in_dir(dir_path, pattern, is_recursive) + + # Directories to exclude from scanning to avoid loading invalid PLX files + exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} + + # Filter out files in excluded directories + filtered_files: list[Path] = [] + for file_path in all_files: + # Check if any parent directory is in the exclude list + should_exclude = any(part in exclude_dirs for part in file_path.parts) + if not should_exclude: + filtered_files.append(file_path) + + return filtered_files + + +def register_pipe_funcs_from_package(package_name: str, package: Any) -> int: + """Register all @pipe_func decorated functions from a package. + + Args: + package_name: Full name of the package (e.g. "pipelex.builder") + package: The imported package object + + Returns: + Number of functions registered + """ + functions_registered = 0 + + if not hasattr(package, "__path__"): + log.warning(f"Package {package_name} has no __path__ attribute, cannot walk modules") + return 0 + + log.debug(f"Walking package {package_name} at {package.__path__}") + + for _importer, modname, _ispkg in pkgutil.walk_packages(path=package.__path__, prefix=f"{package_name}.", onerror=lambda _: None): + try: + # Import the module + module = importlib.import_module(modname) + log.debug(f"Imported {modname}") + + # Find @pipe_func decorated functions in this module + for _name, obj in inspect.getmembers(module, inspect.isfunction): + # Skip functions imported from other modules + if obj.__module__ != modname: + continue + + # Only process functions marked with @pipe_func + if not func_registry.is_marked_pipe_func(obj): + continue + + # Check for custom name from decorator + custom_name = getattr(obj, "_pipe_func_name", None) + func_name = custom_name if custom_name is not None else obj.__name__ + + # Register the function + func_registry.register_function( + func=obj, + name=func_name, + should_warn_if_already_registered=False, + ) + functions_registered += 1 + log.debug(f"Registered @pipe_func: {func_name} from {modname}") + + except Exception as exc: + log.debug(f"Could not process {modname}: {exc}") + + return functions_registered From f7151ff548a331819060bd2358d9e9045b09a818 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 11:35:40 +0200 Subject: [PATCH 077/115] Cleanup --- pipelex/libraries/library_manager.py | 5 -- pipelex/tools/func_registry_utils.py | 72 +++++++------------ .../pipelex/tools/test_func_registry_utils.py | 10 +++ 3 files changed, 37 insertions(+), 50 deletions(-) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 1b8212720..8d0455abf 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -40,7 +40,6 @@ ) from pipelex.tools.class_registry_utils import ClassRegistryUtils from pipelex.tools.config.manager import config_manager -from pipelex.tools.func_registry import pipe_func from pipelex.tools.func_registry_utils import FuncRegistryUtils from pipelex.types import StrEnum @@ -284,8 +283,6 @@ def load_libraries( # Only import files that contain @pipe_func decorated functions (uses AST pre-check) FuncRegistryUtils.register_funcs_in_folder( folder_path=str(library_dir), - decorator_names=[pipe_func.__name__], - require_decorator=True, ) # Import from pipelex package @@ -313,8 +310,6 @@ def load_libraries( ) FuncRegistryUtils.register_funcs_in_folder( folder_path=str(pipelex_pkg_dir), - decorator_names=[pipe_func.__name__], - require_decorator=True, ) # Auto-discover and register all StructuredContent classes from sys.modules diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index fa1a0bf11..cc7f8a11d 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -4,11 +4,10 @@ from typing import Any from pipelex import log -from pipelex.tools.func_registry import func_registry +from pipelex.tools.func_registry import func_registry, pipe_func from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir from pipelex.tools.typing.module_inspector import ( ModuleFileError, - import_module_from_file, import_module_from_file_if_has_decorated_functions, ) @@ -19,30 +18,23 @@ def register_funcs_in_folder( cls, folder_path: str, is_recursive: bool = True, - decorator_names: list[str] | None = None, - require_decorator: bool = False, ) -> None: """Discovers and attempts to register all functions in Python files within a folder. Only functions that meet the eligibility criteria will be registered: - Must be an async function - Exactly 1 parameter named "working_memory" with type WorkingMemory - Return type that is a subclass of StuffContent - - Optionally must be marked with a decorator (if decorator_names provided) + - Must be marked with the @pipe_func decorator - If decorator_names is provided, uses AST parsing to first check if files - contain decorated functions before importing them. This avoids executing - module-level code in files that don't contain the functions you're looking for. + Uses AST parsing to first check if files contain @pipe_func decorated functions + before importing them. This avoids executing module-level code in files that + don't contain the functions you're looking for. - The function name is used as the registry key. + The function name is used as the registry key (or custom name if provided to decorator). Args: folder_path: Path to folder containing Python files is_recursive: Whether to search recursively in subdirectories - decorator_names: Optional list of decorator names (e.g. ["pipe_func"]). - If provided, only imports files that contain functions with these decorators. - If None, imports all Python files. - require_decorator: If True, only functions with decorators in decorator_names are registered. - Only used if decorator_names is provided. """ python_files = cls._find_files_in_dir( @@ -52,45 +44,34 @@ def register_funcs_in_folder( ) for python_file in python_files: - cls._register_funcs_in_file( - file_path=str(python_file), - decorator_names=decorator_names, - require_decorator=require_decorator, - ) + cls._register_funcs_in_file(file_path=str(python_file)) @classmethod def _register_funcs_in_file( cls, file_path: str, - decorator_names: list[str] | None = None, - require_decorator: bool = False, ) -> None: - """Processes a Python file to find and register eligible functions. + """Processes a Python file to find and register eligible @pipe_func decorated functions. + + Uses AST parsing to check if the file contains @pipe_func decorated functions before + importing. Only functions marked with @pipe_func decorator are registered. Args: file_path: Path to the Python file - decorator_names: Optional list of decorator names to filter by - require_decorator: If True, only functions with the specified decorators are registered """ try: - # Import the module (potentially with AST pre-check if decorator_names provided) - if decorator_names is not None: - module = import_module_from_file_if_has_decorated_functions( - file_path, - decorator_names=decorator_names, - ) - # If no decorated functions found, module will be None - if module is None: - return - else: - module = import_module_from_file(file_path) + # Import the module only if it has @pipe_func decorated functions + module = import_module_from_file_if_has_decorated_functions( + file_path, + decorator_names=[pipe_func.__name__], + ) + # If no decorated functions found, module will be None + if module is None: + return # Find functions that match criteria - functions_to_register = cls._find_functions_in_module( - module, - require_decorator=require_decorator, - ) + functions_to_register = cls._find_functions_in_module(module) for func in functions_to_register: # Check for custom name from decorator @@ -118,16 +99,17 @@ def _register_funcs_in_file( def _find_functions_in_module( cls, module: Any, - require_decorator: bool = False, ) -> list[Callable[..., Any]]: - """Finds all functions in a module (eligibility will be checked during registration). + """Finds all @pipe_func decorated functions in a module. + + Only functions marked with @pipe_func decorator are included. + Full eligibility (signature, return type) will be checked during registration. Args: module: The module to search for functions - require_decorator: If True, only functions marked with @pipe_func are included Returns: - List of functions found in the module + List of @pipe_func decorated functions found in the module """ functions: list[Callable[..., Any]] = [] @@ -139,8 +121,8 @@ def _find_functions_in_module( if obj.__module__ != module_name: continue - # If decorator is required, check for it - if require_decorator and not func_registry.is_marked_pipe_func(obj): + # Only include functions marked with @pipe_func + if not func_registry.is_marked_pipe_func(obj): continue # Add function - full eligibility will be checked by func_registry.register_function diff --git a/tests/unit/pipelex/tools/test_func_registry_utils.py b/tests/unit/pipelex/tools/test_func_registry_utils.py index 8b4880def..fe77f0aaf 100644 --- a/tests/unit/pipelex/tools/test_func_registry_utils.py +++ b/tests/unit/pipelex/tools/test_func_registry_utils.py @@ -23,6 +23,9 @@ class CodebaseFileContent(StructuredContent): class TestCases: VALID_ASYNC_FUNCTION = """ +from pipelex.tools.func_registry import pipe_func + +@pipe_func() async def read_file_content(working_memory: WorkingMemory) -> ListContent[CodebaseFileContent]: '''Read the content of related codebase files.''' @@ -43,7 +46,10 @@ async def read_file_content(working_memory: WorkingMemory) -> ListContent[Codeba """ VALID_SYNC_FUNCTION = """ +from pipelex.tools.func_registry import pipe_func + # Sync function - should be accepted +@pipe_func() def sync_function(working_memory: WorkingMemory) -> StructuredContent: '''This should be registered - sync functions are now eligible.''' pass @@ -172,7 +178,9 @@ def test_recursive_folder_search(self): root_file.write_text(""" from pipelex.core.memory.working_memory import WorkingMemory from pipelex.core.stuffs.text_content import TextContent +from pipelex.tools.func_registry import pipe_func +@pipe_func() async def root_function(working_memory: WorkingMemory) -> TextContent: return TextContent(text="root") """) @@ -182,7 +190,9 @@ async def root_function(working_memory: WorkingMemory) -> TextContent: nested_file.write_text(""" from pipelex.core.memory.working_memory import WorkingMemory from pipelex.core.stuffs.text_content import TextContent +from pipelex.tools.func_registry import pipe_func +@pipe_func() async def nested_function(working_memory: WorkingMemory) -> TextContent: return TextContent(text="nested") """) From 530d4466c7562073d0d3b0c4175be2d69a759686 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 11:51:32 +0200 Subject: [PATCH 078/115] Cleanup logs --- pipelex/kit/agent_rules/python_standards.md | 3 +- pipelex/libraries/library_utils.py | 66 ++++++++++----------- pipelex/tools/func_registry.py | 11 ++-- pipelex/tools/func_registry_utils.py | 2 +- 4 files changed, 39 insertions(+), 43 deletions(-) diff --git a/pipelex/kit/agent_rules/python_standards.md b/pipelex/kit/agent_rules/python_standards.md index 8471ef3be..d7ad37a75 100644 --- a/pipelex/kit/agent_rules/python_standards.md +++ b/pipelex/kit/agent_rules/python_standards.md @@ -52,7 +52,8 @@ This document outlines the core coding standards, best practices, and quality co - Always catch exceptions at the place where you can add useful context to it. - Use try/except blocks with specific exceptions - Convert third-party exceptions to our custom ones - - Never catch Exception, only catch specific exceptions + - NEVER catch the generic Exception, only catch specific exceptions, except at the root of CLI commands + - NEVER raise geenric exceptions like ValueError or TypeError, create new error classes and raise them instead - Always add `from exc` to the exception ```python diff --git a/pipelex/libraries/library_utils.py b/pipelex/libraries/library_utils.py index f1455325d..21663fd6b 100644 --- a/pipelex/libraries/library_utils.py +++ b/pipelex/libraries/library_utils.py @@ -38,17 +38,17 @@ def _find_plx_in_traversable(traversable: Traversable, collected: list[Path]) -> plx_path_str = str(child) if PipelexInterpreter.is_pipelex_file(Path(plx_path_str)): collected.append(Path(plx_path_str)) - log.debug(f"Found pipelex package PLX file: {plx_path_str}") + log.verbose(f"Found pipelex package PLX file: {plx_path_str}") elif child.is_dir(): # Skip excluded directories excluded = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} if child.name not in excluded: _find_plx_in_traversable(child, collected) except (PermissionError, OSError) as exc: - log.debug(f"Could not access {traversable}: {exc}") + log.warning(f"Could not access {traversable}: {exc}") _find_plx_in_traversable(pipelex_package, plx_files) - log.debug(f"Found {len(plx_files)} PLX files in pipelex package") + log.verbose(f"Found {len(plx_files)} PLX files in pipelex package") return plx_files @@ -65,7 +65,7 @@ def get_pipelex_package_dir_for_imports() -> Path | None: if pkg_path.exists() and pkg_path.is_dir(): return pkg_path except (TypeError, ValueError, OSError) as exc: - log.debug(f"Could not convert importlib.resources Traversable to filesystem Path: {exc}") + log.warning(f"Could not convert importlib.resources Traversable to filesystem Path: {exc}") return None @@ -113,38 +113,34 @@ def register_pipe_funcs_from_package(package_name: str, package: Any) -> int: log.warning(f"Package {package_name} has no __path__ attribute, cannot walk modules") return 0 - log.debug(f"Walking package {package_name} at {package.__path__}") + log.verbose(f"Walking package {package_name} at {package.__path__}") for _importer, modname, _ispkg in pkgutil.walk_packages(path=package.__path__, prefix=f"{package_name}.", onerror=lambda _: None): - try: - # Import the module - module = importlib.import_module(modname) - log.debug(f"Imported {modname}") - - # Find @pipe_func decorated functions in this module - for _name, obj in inspect.getmembers(module, inspect.isfunction): - # Skip functions imported from other modules - if obj.__module__ != modname: - continue - - # Only process functions marked with @pipe_func - if not func_registry.is_marked_pipe_func(obj): - continue - - # Check for custom name from decorator - custom_name = getattr(obj, "_pipe_func_name", None) - func_name = custom_name if custom_name is not None else obj.__name__ - - # Register the function - func_registry.register_function( - func=obj, - name=func_name, - should_warn_if_already_registered=False, - ) - functions_registered += 1 - log.debug(f"Registered @pipe_func: {func_name} from {modname}") - - except Exception as exc: - log.debug(f"Could not process {modname}: {exc}") + # Import the module + module = importlib.import_module(modname) + log.verbose(f"Imported {modname}") + + # Find @pipe_func decorated functions in this module + for _name, obj in inspect.getmembers(module, inspect.isfunction): + # Skip functions imported from other modules + if obj.__module__ != modname: + continue + + # Only process functions marked with @pipe_func + if not func_registry.is_marked_pipe_func(obj): + continue + + # Check for custom name from decorator + custom_name = getattr(obj, "_pipe_func_name", None) + func_name = custom_name if custom_name is not None else obj.__name__ + + # Register the function + func_registry.register_function( + func=obj, + name=func_name, + should_raise_if_already_registered=False, + ) + functions_registered += 1 + log.verbose(f"Registered @pipe_func: {func_name} from {modname}") return functions_registered diff --git a/pipelex/tools/func_registry.py b/pipelex/tools/func_registry.py index ab8009be7..54335a698 100644 --- a/pipelex/tools/func_registry.py +++ b/pipelex/tools/func_registry.py @@ -77,7 +77,7 @@ def register_function( self, func: Callable[..., Any], name: str | None = None, - should_warn_if_already_registered: bool = True, + should_raise_if_already_registered: bool = False, ) -> None: """Registers a function in the registry with a name if it meets eligibility criteria.""" if not self.is_eligible_function(func): @@ -85,11 +85,10 @@ def register_function( key = name or func.__name__ if key in self.root: - if should_warn_if_already_registered: - self.log(f"Function '{key}' already exists in registry") - else: + if should_raise_if_already_registered: msg = f"Function '{key}' already exists in registry" raise FuncRegistryError(msg) + self.log(f"Function '{key}' already exists in registry") else: self.log(f"Registered new single function '{key}' in registry") self.root[key] = func @@ -113,12 +112,12 @@ def unregister_function_by_name(self, name: str) -> None: def register_functions_dict(self, functions: dict[str, Callable[..., Any]]) -> None: """Registers multiple functions in the registry with names if they meet eligibility criteria.""" for name, func in functions.items(): - self.register_function(func=func, name=name, should_warn_if_already_registered=False) + self.register_function(func=func, name=name, should_raise_if_already_registered=False) def register_functions(self, functions: list[Callable[..., Any]]) -> None: """Registers multiple functions in the registry with names if they meet eligibility criteria.""" for func in functions: - self.register_function(func=func, should_warn_if_already_registered=False) + self.register_function(func=func, should_raise_if_already_registered=False) def get_function(self, name: str) -> Callable[..., Any] | None: """Retrieves a function from the registry by its name. Returns None if not found.""" diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index cc7f8a11d..f6495b564 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -81,7 +81,7 @@ def _register_funcs_in_file( func_registry.register_function( func=func, name=func_name, - should_warn_if_already_registered=True, + should_raise_if_already_registered=False, ) except ModuleFileError: # Expected: file validation issues (directories with .py extension, etc.) From 6d2f96c491af0f421477a6859d85f211cee02857 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 11:51:44 +0200 Subject: [PATCH 079/115] Cleanup redundant code --- pipelex/libraries/library_manager.py | 3 +- pipelex/libraries/library_utils.py | 63 +-------------------- pipelex/tools/class_registry_utils.py | 6 +- pipelex/tools/func_registry_utils.py | 74 ++++++++++++++++++++++--- pipelex/tools/misc/common_exclusions.py | 17 ++++++ 5 files changed, 89 insertions(+), 74 deletions(-) create mode 100644 pipelex/tools/misc/common_exclusions.py diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 8d0455abf..828fd16a2 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -36,7 +36,6 @@ find_plx_files_in_dir, get_pipelex_package_dir_for_imports, get_pipelex_plx_files_from_package, - register_pipe_funcs_from_package, ) from pipelex.tools.class_registry_utils import ClassRegistryUtils from pipelex.tools.config.manager import config_manager @@ -230,7 +229,7 @@ def _import_pipelex_modules_directly(self) -> None: import pipelex.builder # noqa: PLC0415 - intentional local import log.info("Registering @pipe_func functions from pipelex.builder") - functions_count = register_pipe_funcs_from_package("pipelex.builder", pipelex.builder) + functions_count = FuncRegistryUtils.register_pipe_funcs_from_package("pipelex.builder", pipelex.builder) log.info(f"Registered {functions_count} @pipe_func functions from pipelex.builder") @override diff --git a/pipelex/libraries/library_utils.py b/pipelex/libraries/library_utils.py index 21663fd6b..d406ed8dc 100644 --- a/pipelex/libraries/library_utils.py +++ b/pipelex/libraries/library_utils.py @@ -1,16 +1,12 @@ """Utility functions for library management.""" -import importlib -import inspect -import pkgutil from importlib.abc import Traversable from importlib.resources import files from pathlib import Path -from typing import Any from pipelex import log from pipelex.core.interpreter import PipelexInterpreter -from pipelex.tools.func_registry import func_registry +from pipelex.tools.misc.common_exclusions import EXCLUDED_SCAN_DIRS from pipelex.tools.misc.file_utils import find_files_in_dir @@ -41,8 +37,7 @@ def _find_plx_in_traversable(traversable: Traversable, collected: list[Path]) -> log.verbose(f"Found pipelex package PLX file: {plx_path_str}") elif child.is_dir(): # Skip excluded directories - excluded = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} - if child.name not in excluded: + if child.name not in EXCLUDED_SCAN_DIRS: _find_plx_in_traversable(child, collected) except (PermissionError, OSError) as exc: log.warning(f"Could not access {traversable}: {exc}") @@ -83,64 +78,12 @@ def find_plx_files_in_dir(dir_path: str, pattern: str, is_recursive: bool) -> li # Get all files using the base utility all_files = find_files_in_dir(dir_path, pattern, is_recursive) - # Directories to exclude from scanning to avoid loading invalid PLX files - exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} - # Filter out files in excluded directories filtered_files: list[Path] = [] for file_path in all_files: # Check if any parent directory is in the exclude list - should_exclude = any(part in exclude_dirs for part in file_path.parts) + should_exclude = any(part in EXCLUDED_SCAN_DIRS for part in file_path.parts) if not should_exclude: filtered_files.append(file_path) return filtered_files - - -def register_pipe_funcs_from_package(package_name: str, package: Any) -> int: - """Register all @pipe_func decorated functions from a package. - - Args: - package_name: Full name of the package (e.g. "pipelex.builder") - package: The imported package object - - Returns: - Number of functions registered - """ - functions_registered = 0 - - if not hasattr(package, "__path__"): - log.warning(f"Package {package_name} has no __path__ attribute, cannot walk modules") - return 0 - - log.verbose(f"Walking package {package_name} at {package.__path__}") - - for _importer, modname, _ispkg in pkgutil.walk_packages(path=package.__path__, prefix=f"{package_name}.", onerror=lambda _: None): - # Import the module - module = importlib.import_module(modname) - log.verbose(f"Imported {modname}") - - # Find @pipe_func decorated functions in this module - for _name, obj in inspect.getmembers(module, inspect.isfunction): - # Skip functions imported from other modules - if obj.__module__ != modname: - continue - - # Only process functions marked with @pipe_func - if not func_registry.is_marked_pipe_func(obj): - continue - - # Check for custom name from decorator - custom_name = getattr(obj, "_pipe_func_name", None) - func_name = custom_name if custom_name is not None else obj.__name__ - - # Register the function - func_registry.register_function( - func=obj, - name=func_name, - should_raise_if_already_registered=False, - ) - functions_registered += 1 - log.verbose(f"Registered @pipe_func: {func_name} from {modname}") - - return functions_registered diff --git a/pipelex/tools/class_registry_utils.py b/pipelex/tools/class_registry_utils.py index 4c600c876..7add0b70a 100644 --- a/pipelex/tools/class_registry_utils.py +++ b/pipelex/tools/class_registry_utils.py @@ -7,6 +7,7 @@ from kajson.kajson_manager import KajsonManager +from pipelex.tools.misc.common_exclusions import EXCLUDED_SCAN_DIRS from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir if TYPE_CHECKING: @@ -92,14 +93,11 @@ def find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> l # Get all files using the base utility all_files = base_find_files_in_dir(dir_path, pattern, is_recursive) - # Directories to exclude from scanning to avoid import issues - exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} - # Filter out files in excluded directories filtered_files: list[Path] = [] for file_path in all_files: # Check if any parent directory is in the exclude list - should_exclude = any(part in exclude_dirs for part in file_path.parts) + should_exclude = any(part in EXCLUDED_SCAN_DIRS for part in file_path.parts) if not should_exclude: filtered_files.append(file_path) diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index f6495b564..e3bb8928d 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -1,10 +1,13 @@ +import importlib import inspect +import pkgutil from collections.abc import Callable from pathlib import Path from typing import Any from pipelex import log from pipelex.tools.func_registry import func_registry, pipe_func +from pipelex.tools.misc.common_exclusions import EXCLUDED_SCAN_DIRS from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir from pipelex.tools.typing.module_inspector import ( ModuleFileError, @@ -13,6 +16,50 @@ class FuncRegistryUtils: + @classmethod + def register_pipe_funcs_from_package(cls, package_name: str, package: Any) -> int: + """Register all @pipe_func decorated functions from a package. + + Args: + package_name: Full name of the package (e.g. "pipelex.builder") + package: The imported package object + + Returns: + Number of functions registered + + """ + functions_registered = 0 + + if not hasattr(package, "__path__"): + log.warning(f"Package {package_name} has no __path__ attribute, cannot walk modules") + return 0 + + log.verbose(f"Walking package {package_name} at {package.__path__}") + + for _importer, modname, _ispkg in pkgutil.walk_packages( + path=package.__path__, + prefix=f"{package_name}.", + onerror=lambda _: None, + ): + # Import the module + module = importlib.import_module(modname) + log.verbose(f"Imported {modname}") + + # Find @pipe_func decorated functions in this module + functions_to_register = cls._find_functions_in_module(module) + + for func in functions_to_register: + func_name = cls._get_function_registration_name(func) + func_registry.register_function( + func=func, + name=func_name, + should_raise_if_already_registered=False, + ) + functions_registered += 1 + log.verbose(f"Registered @pipe_func: {func_name} from {modname}") + + return functions_registered + @classmethod def register_funcs_in_folder( cls, @@ -74,10 +121,7 @@ def _register_funcs_in_file( functions_to_register = cls._find_functions_in_module(module) for func in functions_to_register: - # Check for custom name from decorator - custom_name = getattr(func, "_pipe_func_name", None) - func_name = custom_name if custom_name is not None else func.__name__ - + func_name = cls._get_function_registration_name(func) func_registry.register_function( func=func, name=func_name, @@ -130,6 +174,23 @@ def _find_functions_in_module( return functions + @classmethod + def _get_function_registration_name(cls, func: Callable[..., Any]) -> str: + """Extract the registration name for a function. + + If the function has a custom name from the @pipe_func decorator, use that. + Otherwise, use the function's __name__. + + Args: + func: The function to get the registration name for + + Returns: + The name to use when registering the function + + """ + custom_name = getattr(func, "_pipe_func_name", None) + return custom_name if custom_name is not None else func.__name__ + @classmethod def _find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> list[Path]: """Find files matching a pattern in a directory, excluding common build/cache directories. @@ -146,14 +207,11 @@ def _find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> # Get all files using the base utility all_files = base_find_files_in_dir(dir_path, pattern, is_recursive) - # Directories to exclude from scanning to avoid import issues - exclude_dirs = {".venv", ".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", "node_modules", ".env", "results"} - # Filter out files in excluded directories filtered_files: list[Path] = [] for file_path in all_files: # Check if any parent directory is in the exclude list - should_exclude = any(part in exclude_dirs for part in file_path.parts) + should_exclude = any(part in EXCLUDED_SCAN_DIRS for part in file_path.parts) if not should_exclude: filtered_files.append(file_path) diff --git a/pipelex/tools/misc/common_exclusions.py b/pipelex/tools/misc/common_exclusions.py new file mode 100644 index 000000000..d8c20fc15 --- /dev/null +++ b/pipelex/tools/misc/common_exclusions.py @@ -0,0 +1,17 @@ +"""Common exclusion patterns for directory scanning operations.""" + +# Directories to exclude when scanning for Python files or PLX files +# These directories are typically build artifacts, caches, or environment folders +EXCLUDED_SCAN_DIRS = frozenset( + { + ".venv", + ".git", + "__pycache__", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", + "node_modules", + ".env", + "results", + } +) From 89628d88239a5e6e1d30411991ca3e31101f9eb8 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 11:57:04 +0200 Subject: [PATCH 080/115] Remove useless arg --- pipelex/tools/func_registry.py | 8 ++------ pipelex/tools/func_registry_utils.py | 2 -- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/pipelex/tools/func_registry.py b/pipelex/tools/func_registry.py index 54335a698..7c89582ab 100644 --- a/pipelex/tools/func_registry.py +++ b/pipelex/tools/func_registry.py @@ -77,7 +77,6 @@ def register_function( self, func: Callable[..., Any], name: str | None = None, - should_raise_if_already_registered: bool = False, ) -> None: """Registers a function in the registry with a name if it meets eligibility criteria.""" if not self.is_eligible_function(func): @@ -85,9 +84,6 @@ def register_function( key = name or func.__name__ if key in self.root: - if should_raise_if_already_registered: - msg = f"Function '{key}' already exists in registry" - raise FuncRegistryError(msg) self.log(f"Function '{key}' already exists in registry") else: self.log(f"Registered new single function '{key}' in registry") @@ -112,12 +108,12 @@ def unregister_function_by_name(self, name: str) -> None: def register_functions_dict(self, functions: dict[str, Callable[..., Any]]) -> None: """Registers multiple functions in the registry with names if they meet eligibility criteria.""" for name, func in functions.items(): - self.register_function(func=func, name=name, should_raise_if_already_registered=False) + self.register_function(func=func, name=name) def register_functions(self, functions: list[Callable[..., Any]]) -> None: """Registers multiple functions in the registry with names if they meet eligibility criteria.""" for func in functions: - self.register_function(func=func, should_raise_if_already_registered=False) + self.register_function(func=func) def get_function(self, name: str) -> Callable[..., Any] | None: """Retrieves a function from the registry by its name. Returns None if not found.""" diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index e3bb8928d..4f4d16474 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -53,7 +53,6 @@ def register_pipe_funcs_from_package(cls, package_name: str, package: Any) -> in func_registry.register_function( func=func, name=func_name, - should_raise_if_already_registered=False, ) functions_registered += 1 log.verbose(f"Registered @pipe_func: {func_name} from {modname}") @@ -125,7 +124,6 @@ def _register_funcs_in_file( func_registry.register_function( func=func, name=func_name, - should_raise_if_already_registered=False, ) except ModuleFileError: # Expected: file validation issues (directories with .py extension, etc.) From cd3bc0ae8e66ccf7ff5db42f22d19b1f12479390 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 12:02:43 +0200 Subject: [PATCH 081/115] Proofread agent rules --- .cursor/rules/pytest_standards.mdc | 2 +- .cursor/rules/python_standards.mdc | 3 ++- .cursor/rules/run_pipelex.mdc | 2 +- .cursor/rules/write_pipelex.mdc | 12 ++++++------ .github/copilot-instructions.md | 14 +++++++------- .windsurfrules.md | 14 +++++++------- AGENTS.md | 14 +++++++------- BLACKBOX_RULES.md | 14 +++++++------- CLAUDE.md | 14 +++++++------- pipelex/kit/agent_rules/pytest_standards.md | 2 +- pipelex/kit/agent_rules/python_standards.md | 2 +- pipelex/kit/agent_rules/run_pipelex.md | 2 +- pipelex/kit/agent_rules/write_pipelex.md | 12 ++++++------ 13 files changed, 54 insertions(+), 53 deletions(-) diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc index 85b219cfb..cfd03064f 100644 --- a/.cursor/rules/pytest_standards.mdc +++ b/.cursor/rules/pytest_standards.mdc @@ -49,7 +49,7 @@ Always group the tests of a module into a test class: @pytest.mark.asyncio(loop_scope="class") class TestFooBar: @pytest.mark.parametrize( - "topic test_case_blueprint", + "topic, test_case_blueprint", [ TestCases.CASE_1, TestCases.CASE_2, diff --git a/.cursor/rules/python_standards.mdc b/.cursor/rules/python_standards.mdc index 150864d1e..aa4357c0a 100644 --- a/.cursor/rules/python_standards.mdc +++ b/.cursor/rules/python_standards.mdc @@ -58,7 +58,8 @@ This document outlines the core coding standards, best practices, and quality co - Always catch exceptions at the place where you can add useful context to it. - Use try/except blocks with specific exceptions - Convert third-party exceptions to our custom ones - - Never catch Exception, only catch specific exceptions + - NEVER catch the generic Exception, only catch specific exceptions, except at the root of CLI commands + - NEVER raise generic exceptions like ValueError or TypeError, create new error classes and raise them instead - Always add `from exc` to the exception ```python diff --git a/.cursor/rules/run_pipelex.mdc b/.cursor/rules/run_pipelex.mdc index 11e6c668f..5f29c45ab 100644 --- a/.cursor/rules/run_pipelex.mdc +++ b/.cursor/rules/run_pipelex.mdc @@ -200,7 +200,7 @@ class PipeOutput(BaseModel): ... ``` -As you can see, you can extarct any variable from the output working memory. +As you can see, you can extract any variable from the output working memory. ### Getting the main stuff as a specific type diff --git a/.cursor/rules/write_pipelex.mdc b/.cursor/rules/write_pipelex.mdc index fb1b64cfd..46cab53b3 100644 --- a/.cursor/rules/write_pipelex.mdc +++ b/.cursor/rules/write_pipelex.mdc @@ -7,9 +7,9 @@ globs: --- # Guide to write or edit pipelines using the Pipelex language in .plx files -- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- Always first write your "plan" in natural language, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) +- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ## Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) @@ -71,7 +71,7 @@ output = "ConceptName" ``` The pipes will all have at least this base definition. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • variable='['invoice']'`` @@ -260,7 +260,7 @@ The PipeCondition controller allows you to implement conditional logic in your p ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" @@ -275,7 +275,7 @@ or ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code @@ -415,7 +415,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 0ef0bbe83..21c273719 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,9 +1,9 @@ ## Guide to write or edit pipelines using the Pipelex language in .plx files -- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- Always first write your "plan" in natural language, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) +- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) @@ -65,7 +65,7 @@ output = "ConceptName" ``` The pipes will all have at least this base definition. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • variable='['invoice']'`` @@ -254,7 +254,7 @@ The PipeCondition controller allows you to implement conditional logic in your p ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" @@ -269,7 +269,7 @@ or ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code @@ -409,7 +409,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -1026,7 +1026,7 @@ class PipeOutput(BaseModel): ... ``` -As you can see, you can extarct any variable from the output working memory. +As you can see, you can extract any variable from the output working memory. #### Getting the main stuff as a specific type diff --git a/.windsurfrules.md b/.windsurfrules.md index 0ef0bbe83..21c273719 100644 --- a/.windsurfrules.md +++ b/.windsurfrules.md @@ -1,9 +1,9 @@ ## Guide to write or edit pipelines using the Pipelex language in .plx files -- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- Always first write your "plan" in natural language, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) +- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) @@ -65,7 +65,7 @@ output = "ConceptName" ``` The pipes will all have at least this base definition. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • variable='['invoice']'`` @@ -254,7 +254,7 @@ The PipeCondition controller allows you to implement conditional logic in your p ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" @@ -269,7 +269,7 @@ or ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code @@ -409,7 +409,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -1026,7 +1026,7 @@ class PipeOutput(BaseModel): ... ``` -As you can see, you can extarct any variable from the output working memory. +As you can see, you can extract any variable from the output working memory. #### Getting the main stuff as a specific type diff --git a/AGENTS.md b/AGENTS.md index 0ef0bbe83..21c273719 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,9 +1,9 @@ ## Guide to write or edit pipelines using the Pipelex language in .plx files -- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- Always first write your "plan" in natural language, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) +- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) @@ -65,7 +65,7 @@ output = "ConceptName" ``` The pipes will all have at least this base definition. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • variable='['invoice']'`` @@ -254,7 +254,7 @@ The PipeCondition controller allows you to implement conditional logic in your p ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" @@ -269,7 +269,7 @@ or ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code @@ -409,7 +409,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -1026,7 +1026,7 @@ class PipeOutput(BaseModel): ... ``` -As you can see, you can extarct any variable from the output working memory. +As you can see, you can extract any variable from the output working memory. #### Getting the main stuff as a specific type diff --git a/BLACKBOX_RULES.md b/BLACKBOX_RULES.md index 0ef0bbe83..21c273719 100644 --- a/BLACKBOX_RULES.md +++ b/BLACKBOX_RULES.md @@ -1,9 +1,9 @@ ## Guide to write or edit pipelines using the Pipelex language in .plx files -- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- Always first write your "plan" in natural language, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) +- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) @@ -65,7 +65,7 @@ output = "ConceptName" ``` The pipes will all have at least this base definition. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • variable='['invoice']'`` @@ -254,7 +254,7 @@ The PipeCondition controller allows you to implement conditional logic in your p ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" @@ -269,7 +269,7 @@ or ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code @@ -409,7 +409,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -1026,7 +1026,7 @@ class PipeOutput(BaseModel): ... ``` -As you can see, you can extarct any variable from the output working memory. +As you can see, you can extract any variable from the output working memory. #### Getting the main stuff as a specific type diff --git a/CLAUDE.md b/CLAUDE.md index 0ef0bbe83..21c273719 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,9 +1,9 @@ ## Guide to write or edit pipelines using the Pipelex language in .plx files -- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- Always first write your "plan" in natural language, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) +- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ### Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) @@ -65,7 +65,7 @@ output = "ConceptName" ``` The pipes will all have at least this base definition. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • variable='['invoice']'`` @@ -254,7 +254,7 @@ The PipeCondition controller allows you to implement conditional logic in your p ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" @@ -269,7 +269,7 @@ or ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code @@ -409,7 +409,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. @@ -1026,7 +1026,7 @@ class PipeOutput(BaseModel): ... ``` -As you can see, you can extarct any variable from the output working memory. +As you can see, you can extract any variable from the output working memory. #### Getting the main stuff as a specific type diff --git a/pipelex/kit/agent_rules/pytest_standards.md b/pipelex/kit/agent_rules/pytest_standards.md index 214015020..c2386faa3 100644 --- a/pipelex/kit/agent_rules/pytest_standards.md +++ b/pipelex/kit/agent_rules/pytest_standards.md @@ -43,7 +43,7 @@ Always group the tests of a module into a test class: @pytest.mark.asyncio(loop_scope="class") class TestFooBar: @pytest.mark.parametrize( - "topic test_case_blueprint", + "topic, test_case_blueprint", [ TestCases.CASE_1, TestCases.CASE_2, diff --git a/pipelex/kit/agent_rules/python_standards.md b/pipelex/kit/agent_rules/python_standards.md index d7ad37a75..ca3aea237 100644 --- a/pipelex/kit/agent_rules/python_standards.md +++ b/pipelex/kit/agent_rules/python_standards.md @@ -53,7 +53,7 @@ This document outlines the core coding standards, best practices, and quality co - Use try/except blocks with specific exceptions - Convert third-party exceptions to our custom ones - NEVER catch the generic Exception, only catch specific exceptions, except at the root of CLI commands - - NEVER raise geenric exceptions like ValueError or TypeError, create new error classes and raise them instead + - NEVER raise generic exceptions like ValueError or TypeError, create new error classes and raise them instead - Always add `from exc` to the exception ```python diff --git a/pipelex/kit/agent_rules/run_pipelex.md b/pipelex/kit/agent_rules/run_pipelex.md index 93bac64e6..21787573f 100644 --- a/pipelex/kit/agent_rules/run_pipelex.md +++ b/pipelex/kit/agent_rules/run_pipelex.md @@ -194,7 +194,7 @@ class PipeOutput(BaseModel): ... ``` -As you can see, you can extarct any variable from the output working memory. +As you can see, you can extract any variable from the output working memory. ### Getting the main stuff as a specific type diff --git a/pipelex/kit/agent_rules/write_pipelex.md b/pipelex/kit/agent_rules/write_pipelex.md index f5d16f512..8fe8d4a8f 100644 --- a/pipelex/kit/agent_rules/write_pipelex.md +++ b/pipelex/kit/agent_rules/write_pipelex.md @@ -1,8 +1,8 @@ # Guide to write or edit pipelines using the Pipelex language in .plx files -- Always first write your "plan" in natural langage, then transcribe it in pipelex. +- Always first write your "plan" in natural language, then transcribe it in pipelex. - You should ALWAYS RUN the terminal command `make validate` when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. -- Please use POSIX standard for files. (enmpty lines, no trailing whitespaces, etc.) +- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ## Pipeline File Naming - Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) @@ -64,7 +64,7 @@ output = "ConceptName" ``` The pipes will all have at least this base definition. -- `inputs`: Dictionnary of key behing the variable used in the prompts, and the value behing the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditionnal pipes (if PipeCondition). +- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). So If you have this error: `StaticValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • variable='['invoice']'`` @@ -253,7 +253,7 @@ The PipeCondition controller allows you to implement conditional logic in your p ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression = "input_data.category" @@ -268,7 +268,7 @@ or ```plx [pipe.conditional_operation] type = "PipeCondition" -description = "A conditonal pipe to decide wheter..." +description = "A conditional pipe to decide whether..." inputs = { input_data = "CategoryInput" } output = "native.Text" expression_template = "{{ input_data.category }}" # Jinja2 code @@ -408,7 +408,7 @@ Match the expense with its corresponding invoice: @invoices """ ``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doens't need to be explictly written in the prompt. +In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. DO NOT write things like "Here is the expense: @expense". DO write simply "@expense" alone in an isolated line. From 80e80e661f94b261abbd4037c31978ac1709e26b Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 13:06:13 +0200 Subject: [PATCH 082/115] More python standards --- .cursor/rules/pytest_standards.mdc | 7 ++- .cursor/rules/python_standards.mdc | 62 ++++++++++++++++----- pipelex/kit/agent_rules/pytest_standards.md | 7 ++- pipelex/kit/agent_rules/python_standards.md | 62 ++++++++++++++++----- 4 files changed, 102 insertions(+), 36 deletions(-) diff --git a/.cursor/rules/pytest_standards.mdc b/.cursor/rules/pytest_standards.mdc index cfd03064f..74eba2ad8 100644 --- a/.cursor/rules/pytest_standards.mdc +++ b/.cursor/rules/pytest_standards.mdc @@ -41,7 +41,7 @@ Several markers may be applied. For instance, if the test uses an LLM, then it u ### Test Class Structure -Always group the tests of a module into a test class: +- Always group the tests of a module into a test class: ```python @pytest.mark.llm @@ -64,11 +64,12 @@ class TestFooBar: # Test implementation ``` -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. +- Never more than 1 class per test file. +- Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. ### Test Data Organization -- If it's not already there, create a `test_data.py` file in the test directory +- If it's not already there, create a `test_data.py` file in the proper test directory - Define test cases using `StuffBlueprint`: ```python diff --git a/.cursor/rules/python_standards.mdc b/.cursor/rules/python_standards.mdc index aa4357c0a..f1064bbfc 100644 --- a/.cursor/rules/python_standards.mdc +++ b/.cursor/rules/python_standards.mdc @@ -8,9 +8,52 @@ globs: This document outlines the core coding standards, best practices, and quality control procedures for the codebase. -## Type Hints +## Variables, loops and indexes -1. **Always Use Type Hints** + - Variable names should have a minimum length of 3 characters. No exceptions: name your `for` loop indexes like `index_foobar`, your exceptions `exc` or more specific like `validation_error` when there are several layers of exceptions, and use `for key, value in ...` for key/value pairs. + - When looping on the keys of a dict, use `for key in the_dict` rather than `for key in the_dict.keys()` otherwise you won't pass linting. + - Avoid inline for loops, unless it's ultra-simple and holds on oneline. + +## Enums and tests + + - When defining enums related to string values, always inherit from `StrEnum` + - Never test equality to an enum value: use match/case, even to single out 1 case out of 10 cases. To avoid heavy match/case code in awkward places, add methods to the enum class such as `is_foobar()`. This is to avoid bugs: when new enum values are added we want the linter to complain. Use the `|` operator to group cases + - As our match/case constructs over enums are always exhaustive, NEVER add a default `case _: ...`. Otherwise, you won't pass linting. + +## Imports + +### **Imports at the top of the file** + + - Import all necessary libraries at the top of the file + - Do not import libraries in functions or classes unless in very specific cases, to be discussed with the user, as they would required a `# noqa: ...` comment to pass linting + - Do not bother with ordering the imports, our Ruff linter will handle it for us. Same goes with removing unused imports. + +- **Logging and Pretty Printing**: + + - Both `log()` and `pretty_print()` can be imported from `pipelex` directly: + ```python + from pipelex import log, pretty_print + + log.info("Hello, world!") + ``` + - Both have a title arg which is handy when logging/printing objects: + + ```python + log.debug("Hello, world!", title="Your first Pipelex log") + pretty_print(output_object, title="Your first Pipelex output") + ``` + - Both handle formatting json using Rich, pretty_print makes it prettier. + +- **StrEnum and Self type**: + + - Both `StrEnum` and `Self` must be imported from `pipelex.types` (handles python retrocompatibility): + ```python + from pipelex.types import Self, StrEnum + ``` + +## Typing + +### **Always Use Type Hints** - Every function parameter must be typed - Every function return must be typed @@ -19,8 +62,9 @@ This document outlines the core coding standards, best practices, and quality co - Use type hints for all fields - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals (not `Optional[]`) - Use Field(default_factory=...) for mutable defaults + - Use `# pyright: ignore[specificError]` or `# type: ignore` only as a last resort. In particular, if you are sure about the type, you often solve issues by using cast() or creating a new typed variable. -2. **BaseModel / Pydantic Standards** +### **BaseModel / Pydantic Standards** - Use `BaseModel` and respect Pydantic v2 standards - Use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` @@ -36,18 +80,6 @@ This document outlines the core coding standards, best practices, and quality co items: list[MyItem] = Field(default_factory=empty_list_factory_of(MyItem), description="A list of items") ``` -3. **StrEnum** - - Import from `pipelex.types`: - ```python - from pipelex.types import StrEnum - ``` - -4. **Self type** - - Import from `pipelex.types`: - ```python - from pipelex.types import Self - ``` - ## Factory Pattern - Use Factory Pattern for object creation when dealing with multiple implementations diff --git a/pipelex/kit/agent_rules/pytest_standards.md b/pipelex/kit/agent_rules/pytest_standards.md index c2386faa3..b4b4e35ef 100644 --- a/pipelex/kit/agent_rules/pytest_standards.md +++ b/pipelex/kit/agent_rules/pytest_standards.md @@ -35,7 +35,7 @@ Several markers may be applied. For instance, if the test uses an LLM, then it u ### Test Class Structure -Always group the tests of a module into a test class: +- Always group the tests of a module into a test class: ```python @pytest.mark.llm @@ -58,11 +58,12 @@ class TestFooBar: # Test implementation ``` -Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. +- Never more than 1 class per test file. +- Sometimes it can be convenient to access the test's name in its body, for instance to include into a job_id. To achieve that, add the argument `request: FixtureRequest` into the signature and then you can get the test name using `cast(str, request.node.originalname), # type: ignore`. ### Test Data Organization -- If it's not already there, create a `test_data.py` file in the test directory +- If it's not already there, create a `test_data.py` file in the proper test directory - Define test cases using `StuffBlueprint`: ```python diff --git a/pipelex/kit/agent_rules/python_standards.md b/pipelex/kit/agent_rules/python_standards.md index ca3aea237..8ff3c842d 100644 --- a/pipelex/kit/agent_rules/python_standards.md +++ b/pipelex/kit/agent_rules/python_standards.md @@ -2,9 +2,52 @@ This document outlines the core coding standards, best practices, and quality control procedures for the codebase. -## Type Hints +## Variables, loops and indexes -1. **Always Use Type Hints** + - Variable names should have a minimum length of 3 characters. No exceptions: name your `for` loop indexes like `index_foobar`, your exceptions `exc` or more specific like `validation_error` when there are several layers of exceptions, and use `for key, value in ...` for key/value pairs. + - When looping on the keys of a dict, use `for key in the_dict` rather than `for key in the_dict.keys()` otherwise you won't pass linting. + - Avoid inline for loops, unless it's ultra-simple and holds on oneline. + +## Enums and tests + + - When defining enums related to string values, always inherit from `StrEnum` + - Never test equality to an enum value: use match/case, even to single out 1 case out of 10 cases. To avoid heavy match/case code in awkward places, add methods to the enum class such as `is_foobar()`. This is to avoid bugs: when new enum values are added we want the linter to complain. Use the `|` operator to group cases + - As our match/case constructs over enums are always exhaustive, NEVER add a default `case _: ...`. Otherwise, you won't pass linting. + +## Imports + +### **Imports at the top of the file** + + - Import all necessary libraries at the top of the file + - Do not import libraries in functions or classes unless in very specific cases, to be discussed with the user, as they would required a `# noqa: ...` comment to pass linting + - Do not bother with ordering the imports, our Ruff linter will handle it for us. Same goes with removing unused imports. + +- **Logging and Pretty Printing**: + + - Both `log()` and `pretty_print()` can be imported from `pipelex` directly: + ```python + from pipelex import log, pretty_print + + log.info("Hello, world!") + ``` + - Both have a title arg which is handy when logging/printing objects: + + ```python + log.debug("Hello, world!", title="Your first Pipelex log") + pretty_print(output_object, title="Your first Pipelex output") + ``` + - Both handle formatting json using Rich, pretty_print makes it prettier. + +- **StrEnum and Self type**: + + - Both `StrEnum` and `Self` must be imported from `pipelex.types` (handles python retrocompatibility): + ```python + from pipelex.types import Self, StrEnum + ``` + +## Typing + +### **Always Use Type Hints** - Every function parameter must be typed - Every function return must be typed @@ -13,8 +56,9 @@ This document outlines the core coding standards, best practices, and quality co - Use type hints for all fields - Use the `|` syntax for union types (e.g `str | int`) and `| None` for optionals (not `Optional[]`) - Use Field(default_factory=...) for mutable defaults + - Use `# pyright: ignore[specificError]` or `# type: ignore` only as a last resort. In particular, if you are sure about the type, you often solve issues by using cast() or creating a new typed variable. -2. **BaseModel / Pydantic Standards** +### **BaseModel / Pydantic Standards** - Use `BaseModel` and respect Pydantic v2 standards - Use the modern `ConfigDict` when needed, e.g. `model_config = ConfigDict(extra="forbid", strict=True)` @@ -30,18 +74,6 @@ This document outlines the core coding standards, best practices, and quality co items: list[MyItem] = Field(default_factory=empty_list_factory_of(MyItem), description="A list of items") ``` -3. **StrEnum** - - Import from `pipelex.types`: - ```python - from pipelex.types import StrEnum - ``` - -4. **Self type** - - Import from `pipelex.types`: - ```python - from pipelex.types import Self - ``` - ## Factory Pattern - Use Factory Pattern for object creation when dealing with multiple implementations From 0589fa8979369981c9befc0a857a76a551be3b64 Mon Sep 17 00:00:00 2001 From: Louis Choquel Date: Tue, 14 Oct 2025 13:28:26 +0200 Subject: [PATCH 083/115] ScanConfig to exclude directories from scanning. --- pipelex/config.py | 12 ++++ pipelex/core/concepts/concept.py | 6 +- pipelex/kit/configs/pipelex.toml | 41 ++++++++++++ pipelex/libraries/library_utils.py | 8 ++- pipelex/pipelex.toml | 17 +++++ pipelex/tools/class_registry_utils.py | 87 ++----------------------- pipelex/tools/func_registry_utils.py | 5 +- pipelex/tools/misc/common_exclusions.py | 17 ----- pipelex/tools/typing/class_utils.py | 79 ++++++++++++++++++++++ 9 files changed, 165 insertions(+), 107 deletions(-) delete mode 100644 pipelex/tools/misc/common_exclusions.py create mode 100644 pipelex/tools/typing/class_utils.py diff --git a/pipelex/config.py b/pipelex/config.py index a6437e5d8..6bf969a1c 100644 --- a/pipelex/config.py +++ b/pipelex/config.py @@ -94,6 +94,17 @@ class ObserverConfig(ConfigModel): observer_dir: str +class ScanConfig(ConfigModel): + excluded_dirs: frozenset[str] + + @field_validator("excluded_dirs", mode="before") + @classmethod + def validate_excluded_dirs(cls, value: list[str] | frozenset[str]) -> frozenset[str]: + if isinstance(value, frozenset): + return value + return frozenset(value) + + class Pipelex(ConfigModel): feature_config: FeatureConfig log_config: LogConfig @@ -109,6 +120,7 @@ class Pipelex(ConfigModel): pipe_run_config: PipeRunConfig reporting_config: ReportingConfig observer_config: ObserverConfig + scan_config: ScanConfig class MigrationConfig(ConfigModel): diff --git a/pipelex/core/concepts/concept.py b/pipelex/core/concepts/concept.py index 1720d8c1f..3b981555b 100644 --- a/pipelex/core/concepts/concept.py +++ b/pipelex/core/concepts/concept.py @@ -9,8 +9,8 @@ from pipelex.core.stuffs.image_field_search import search_for_nested_image_fields from pipelex.core.stuffs.stuff_content import StuffContent from pipelex.exceptions import PipelexUnexpectedError -from pipelex.tools.class_registry_utils import ClassRegistryUtils from pipelex.tools.misc.string_utils import pascal_case_to_sentence +from pipelex.tools.typing.class_utils import are_classes_equivalent, has_compatible_field class Concept(BaseModel): @@ -74,7 +74,7 @@ def are_concept_compatible(cls, concept_1: "Concept", concept_2: "Concept", stri if strict: # Check if classes are equivalent (same fields, types, descriptions) - return ClassRegistryUtils.are_classes_equivalent(concept_1_class, concept_2_class) + return are_classes_equivalent(concept_1_class, concept_2_class) # Check if concept_1 is a subclass of concept_2 try: if issubclass(concept_1_class, concept_2_class): @@ -83,7 +83,7 @@ def are_concept_compatible(cls, concept_1: "Concept", concept_2: "Concept", stri pass # Check if concept_1 has compatible fields with concept_2 - return ClassRegistryUtils.has_compatible_field(concept_1_class, concept_2_class) + return has_compatible_field(concept_1_class, concept_2_class) return False @classmethod diff --git a/pipelex/kit/configs/pipelex.toml b/pipelex/kit/configs/pipelex.toml index 70170c31b..9cbddb56e 100644 --- a/pipelex/kit/configs/pipelex.toml +++ b/pipelex/kit/configs/pipelex.toml @@ -1,3 +1,44 @@ +#################################################################################################### +# Pipelex Configuration File +#################################################################################################### +# +# This configuration file is copied to your project's .pipelex/ directory when you run: +# pipelex init config +# +# Purpose: +# - This file allows you to override Pipelex's default settings for your specific project +# - Feel free to modify any settings below to suit your needs +# - You can add any configuration sections that exist in the main pipelex.toml +# +# Finding Available Settings: +# - See the full default configuration in: pipelex/pipelex.toml (in the Pipelex package) +# - See the configuration structure classes in: pipelex/config.py and pipelex/cogt/config_cogt.py +# +# Common Customizations: +# - Logging levels and behavior +# - Excluded directories for scanning +# - LLM prompt dumping for debugging +# - Feature flags for tracking and reporting +# - Observer and reporting output directories +# +#################################################################################################### + +[pipelex.scan_config] +excluded_dirs = [ + ".venv", + "venv", + "env", + ".env", + "virtualenv", + ".virtualenv", + ".git", + "__pycache__", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", + "node_modules", + "results", +] [pipelex.log_config] default_log_level = "INFO" diff --git a/pipelex/libraries/library_utils.py b/pipelex/libraries/library_utils.py index d406ed8dc..40c58a6c2 100644 --- a/pipelex/libraries/library_utils.py +++ b/pipelex/libraries/library_utils.py @@ -5,8 +5,8 @@ from pathlib import Path from pipelex import log +from pipelex.config import get_config from pipelex.core.interpreter import PipelexInterpreter -from pipelex.tools.misc.common_exclusions import EXCLUDED_SCAN_DIRS from pipelex.tools.misc.file_utils import find_files_in_dir @@ -24,6 +24,7 @@ def get_pipelex_plx_files_from_package() -> list[Path]: def _find_plx_in_traversable(traversable: Traversable, collected: list[Path]) -> None: """Recursively find .plx files in a Traversable.""" + excluded_dirs = get_config().pipelex.scan_config.excluded_dirs try: if not traversable.is_dir(): return @@ -37,7 +38,7 @@ def _find_plx_in_traversable(traversable: Traversable, collected: list[Path]) -> log.verbose(f"Found pipelex package PLX file: {plx_path_str}") elif child.is_dir(): # Skip excluded directories - if child.name not in EXCLUDED_SCAN_DIRS: + if child.name not in excluded_dirs: _find_plx_in_traversable(child, collected) except (PermissionError, OSError) as exc: log.warning(f"Could not access {traversable}: {exc}") @@ -80,9 +81,10 @@ def find_plx_files_in_dir(dir_path: str, pattern: str, is_recursive: bool) -> li # Filter out files in excluded directories filtered_files: list[Path] = [] + excluded_dirs = get_config().pipelex.scan_config.excluded_dirs for file_path in all_files: # Check if any parent directory is in the exclude list - should_exclude = any(part in EXCLUDED_SCAN_DIRS for part in file_path.parts) + should_exclude = any(part in excluded_dirs for part in file_path.parts) if not should_exclude: filtered_files.append(file_path) diff --git a/pipelex/pipelex.toml b/pipelex/pipelex.toml index e2add63a3..ac82f3bf8 100644 --- a/pipelex/pipelex.toml +++ b/pipelex/pipelex.toml @@ -4,6 +4,23 @@ [pipelex.observer_config] observer_dir = "results/observer" +[pipelex.scan_config] +excluded_dirs = [ + ".venv", + "venv", + "env", + ".env", + "virtualenv", + ".virtualenv", + ".git", + "__pycache__", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", + "node_modules", + "results", +] + [pipelex.feature_config] # WIP/Experimental feature flags is_pipeline_tracking_enabled = false diff --git a/pipelex/tools/class_registry_utils.py b/pipelex/tools/class_registry_utils.py index 7add0b70a..c81a118a5 100644 --- a/pipelex/tools/class_registry_utils.py +++ b/pipelex/tools/class_registry_utils.py @@ -1,19 +1,14 @@ import inspect import sys -import types import warnings from pathlib import Path -from typing import TYPE_CHECKING, Annotated, Any, Union, get_args, get_origin +from typing import Any from kajson.kajson_manager import KajsonManager -from pipelex.tools.misc.common_exclusions import EXCLUDED_SCAN_DIRS -from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir - -if TYPE_CHECKING: - from pydantic.fields import FieldInfo - from pipelex import log +from pipelex.config import get_config +from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir from pipelex.tools.typing.module_inspector import ( ModuleFileError, find_classes_in_module, @@ -21,9 +16,6 @@ import_module_from_file_if_has_classes, ) -_NoneType = type(None) -_UnionType = getattr(types, "UnionType", None) # Py3.10+: types.UnionType - class ClassRegistryUtils: @classmethod @@ -95,84 +87,15 @@ def find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> l # Filter out files in excluded directories filtered_files: list[Path] = [] + excluded_dirs = get_config().pipelex.scan_config.excluded_dirs for file_path in all_files: # Check if any parent directory is in the exclude list - should_exclude = any(part in EXCLUDED_SCAN_DIRS for part in file_path.parts) + should_exclude = any(part in excluded_dirs for part in file_path.parts) if not should_exclude: filtered_files.append(file_path) return filtered_files - @staticmethod - def are_classes_equivalent(class_1: type[Any], class_2: type[Any]) -> bool: - """Check if two Pydantic classes are equivalent (same fields, types, descriptions).""" - if not (hasattr(class_1, "model_fields") and hasattr(class_2, "model_fields")): - return class_1 == class_2 - - # Compare model schemas using Pydantic's built-in capabilities - try: - schema_1: dict[str, Any] = class_1.model_json_schema() - schema_2: dict[str, Any] = class_2.model_json_schema() - return schema_1 == schema_2 - except Exception: - # Fallback to manual field comparison if schema comparison fails - fields_1: dict[str, FieldInfo] = class_1.model_fields - fields_2: dict[str, FieldInfo] = class_2.model_fields - - if set(fields_1.keys()) != set(fields_2.keys()): - return False - - for field_1_name, field_1_info in fields_1.items(): - field_1: FieldInfo = field_1_info - field_2: FieldInfo = fields_2[field_1_name] - - # Compare field types - if field_1.annotation != field_2.annotation: - return False - - # Compare field descriptions if they exist - if getattr(field_1, "description", None) != getattr(field_2, "description", None): - return False - - # Compare default values - if field_1.default != field_2.default: - return False - - return True - - @staticmethod - def has_compatible_field(class_1: type[Any], class_2: type[Any]) -> bool: - """Check if class_1 has a field whose (possibly wrapped) type matches/subclasses class_2.""" - if not hasattr(class_1, "model_fields"): - return False - - fields: dict[str, FieldInfo] = class_1.model_fields # type: ignore[attr-defined] - - def _is_compatible(t: Any) -> bool: - # Unwrap Annotated[T, ...] - if get_origin(t) is Annotated: - t = get_args(t)[0] - - origin = get_origin(t) - - # Handle unions, including PEP 604 (T | None) - if origin in (Union, _UnionType): - for arg in get_args(t): - if arg is _NoneType: - continue - if _is_compatible(arg): - return True - return False - - # Base case: direct match / subclass - try: - return t is class_2 or (isinstance(t, type) and issubclass(t, class_2)) - except TypeError: - # Not a class type (e.g., typing constructs you don't care about) - return False - - return any(_is_compatible(field.annotation) for field in fields.values()) - @classmethod def import_modules_in_folder( cls, diff --git a/pipelex/tools/func_registry_utils.py b/pipelex/tools/func_registry_utils.py index 4f4d16474..a2e387a6e 100644 --- a/pipelex/tools/func_registry_utils.py +++ b/pipelex/tools/func_registry_utils.py @@ -6,8 +6,8 @@ from typing import Any from pipelex import log +from pipelex.config import get_config from pipelex.tools.func_registry import func_registry, pipe_func -from pipelex.tools.misc.common_exclusions import EXCLUDED_SCAN_DIRS from pipelex.tools.misc.file_utils import find_files_in_dir as base_find_files_in_dir from pipelex.tools.typing.module_inspector import ( ModuleFileError, @@ -207,9 +207,10 @@ def _find_files_in_dir(cls, dir_path: str, pattern: str, is_recursive: bool) -> # Filter out files in excluded directories filtered_files: list[Path] = [] + excluded_dirs = get_config().pipelex.scan_config.excluded_dirs for file_path in all_files: # Check if any parent directory is in the exclude list - should_exclude = any(part in EXCLUDED_SCAN_DIRS for part in file_path.parts) + should_exclude = any(part in excluded_dirs for part in file_path.parts) if not should_exclude: filtered_files.append(file_path) diff --git a/pipelex/tools/misc/common_exclusions.py b/pipelex/tools/misc/common_exclusions.py deleted file mode 100644 index d8c20fc15..000000000 --- a/pipelex/tools/misc/common_exclusions.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Common exclusion patterns for directory scanning operations.""" - -# Directories to exclude when scanning for Python files or PLX files -# These directories are typically build artifacts, caches, or environment folders -EXCLUDED_SCAN_DIRS = frozenset( - { - ".venv", - ".git", - "__pycache__", - ".pytest_cache", - ".mypy_cache", - ".ruff_cache", - "node_modules", - ".env", - "results", - } -) diff --git a/pipelex/tools/typing/class_utils.py b/pipelex/tools/typing/class_utils.py new file mode 100644 index 000000000..c61024901 --- /dev/null +++ b/pipelex/tools/typing/class_utils.py @@ -0,0 +1,79 @@ +import types +from typing import TYPE_CHECKING, Annotated, Any, Union, get_args, get_origin + +if TYPE_CHECKING: + from pydantic.fields import FieldInfo + + +_NoneType = type(None) +_UnionType = getattr(types, "UnionType", None) # Py3.10+: types.UnionType + + +def are_classes_equivalent(class_1: type[Any], class_2: type[Any]) -> bool: + """Check if two Pydantic classes are equivalent (same fields, types, descriptions).""" + if not (hasattr(class_1, "model_fields") and hasattr(class_2, "model_fields")): + return class_1 == class_2 + + # Compare model schemas using Pydantic's built-in capabilities + try: + schema_1: dict[str, Any] = class_1.model_json_schema() + schema_2: dict[str, Any] = class_2.model_json_schema() + return schema_1 == schema_2 + except Exception: + # Fallback to manual field comparison if schema comparison fails + fields_1: dict[str, FieldInfo] = class_1.model_fields + fields_2: dict[str, FieldInfo] = class_2.model_fields + + if set(fields_1.keys()) != set(fields_2.keys()): + return False + + for field_1_name, field_1_info in fields_1.items(): + field_1: FieldInfo = field_1_info + field_2: FieldInfo = fields_2[field_1_name] + + # Compare field types + if field_1.annotation != field_2.annotation: + return False + + # Compare field descriptions if they exist + if getattr(field_1, "description", None) != getattr(field_2, "description", None): + return False + + # Compare default values + if field_1.default != field_2.default: + return False + + return True + + +def has_compatible_field(class_1: type[Any], class_2: type[Any]) -> bool: + """Check if class_1 has a field whose (possibly wrapped) type matches/subclasses class_2.""" + if not hasattr(class_1, "model_fields"): + return False + + fields: dict[str, FieldInfo] = class_1.model_fields # type: ignore[attr-defined] + + def _is_compatible(t: Any) -> bool: + # Unwrap Annotated[T, ...] + if get_origin(t) is Annotated: + t = get_args(t)[0] + + origin = get_origin(t) + + # Handle unions, including PEP 604 (T | None) + if origin in (Union, _UnionType): + for arg in get_args(t): + if arg is _NoneType: + continue + if _is_compatible(arg): + return True + return False + + # Base case: direct match / subclass + try: + return t is class_2 or (isinstance(t, type) and issubclass(t, class_2)) + except TypeError: + # Not a class type (e.g., typing constructs you don't care about) + return False + + return any(_is_compatible(field.annotation) for field in fields.values()) From 487254bc959df45eb92054954fa9648b66fb45c1 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Tue, 14 Oct 2025 19:31:42 +0200 Subject: [PATCH 084/115] add codegen --- pipelex/cli/_cli.py | 3 +- pipelex/cli/commands/__init__.py | 3 +- pipelex/cli/commands/gen_cmd.py | 110 ++++++++++++++ pipelex/core/concepts/concept.py | 134 ++++++++++++++++++ pipelex/tools/codegen/__init__.py | 2 + pipelex/tools/codegen/runner_generator.py | 120 ++++++++++++++++ run_detail_pipe_condition.py | 45 ++++++ run_pipe_builder.py | 23 +++ run_spec_draft_to_concept_spec.py | 40 ++++++ .../concepts/test_concept_compact_memory.py | 116 +++++++++++++++ 10 files changed, 594 insertions(+), 2 deletions(-) create mode 100644 pipelex/cli/commands/gen_cmd.py create mode 100644 pipelex/tools/codegen/__init__.py create mode 100644 pipelex/tools/codegen/runner_generator.py create mode 100644 run_detail_pipe_condition.py create mode 100644 run_pipe_builder.py create mode 100644 run_spec_draft_to_concept_spec.py create mode 100644 tests/unit/core/concepts/test_concept_compact_memory.py diff --git a/pipelex/cli/_cli.py b/pipelex/cli/_cli.py index 6382ea9e4..4e62fae46 100644 --- a/pipelex/cli/_cli.py +++ b/pipelex/cli/_cli.py @@ -3,7 +3,7 @@ from typer.core import TyperGroup from typing_extensions import override -from pipelex.cli.commands import init_app, show_app, validate_app +from pipelex.cli.commands import gen_app, init_app, show_app, validate_app from pipelex.cli.commands.build_cmd import build_app from pipelex.cli.commands.kit_cmd import kit_app @@ -37,3 +37,4 @@ def main() -> None: app.add_typer(show_app, name="show", help="Show and list commands") app.add_typer(build_app, name="build", help="Build artifacts like pipeline blueprints") app.add_typer(kit_app, name="kit", help="Manage kit assets") +app.add_typer(gen_app, name="gen", help="Generate Python files from pipe definitions") diff --git a/pipelex/cli/commands/__init__.py b/pipelex/cli/commands/__init__.py index 2a74b504f..7394d835d 100644 --- a/pipelex/cli/commands/__init__.py +++ b/pipelex/cli/commands/__init__.py @@ -3,8 +3,9 @@ This package organizes CLI commands into logical modules. """ +from pipelex.cli.commands.gen_cmd import gen_app from pipelex.cli.commands.init_cmd import init_app from pipelex.cli.commands.show_cmd import show_app from pipelex.cli.commands.validate_cmd import validate_app -__all__ = ["init_app", "show_app", "validate_app"] +__all__ = ["gen_app", "init_app", "show_app", "validate_app"] diff --git a/pipelex/cli/commands/gen_cmd.py b/pipelex/cli/commands/gen_cmd.py new file mode 100644 index 000000000..7b0619231 --- /dev/null +++ b/pipelex/cli/commands/gen_cmd.py @@ -0,0 +1,110 @@ +"""Commands for generating Python runner files from pipe definitions.""" + +import subprocess +from typing import Annotated + +import typer + +from pipelex.hub import get_required_pipe +from pipelex.pipelex import Pipelex +from pipelex.tools.codegen.runner_generator import generate_runner_code +from pipelex.tools.misc.file_utils import ensure_directory_for_file_path, save_text_to_path + +gen_app = typer.Typer(help="Generate Python runner files from pipe definitions", no_args_is_help=True) + + +def do_generate_runner(pipe_code: str, output_path: str | None, execute: bool, lint: bool) -> None: + """Generate a Python runner file for the given pipe.""" + # Initialize Pipelex + Pipelex.make() + + # Get the pipe + try: + pipe = get_required_pipe(pipe_code=pipe_code) + except Exception as e: + typer.echo(typer.style(f"❌ Error: Could not find pipe '{pipe_code}': {e}", fg=typer.colors.RED)) + raise typer.Exit(1) from e + + # Generate the code + try: + runner_code = generate_runner_code(pipe) + except Exception as e: + typer.echo(typer.style(f"❌ Error generating runner code: {e}", fg=typer.colors.RED)) + raise typer.Exit(1) from e + + # Determine output path + if not output_path: + output_path = f"run_{pipe_code}.py" + + # Save the file + try: + ensure_directory_for_file_path(file_path=output_path) + save_text_to_path(text=runner_code, path=output_path) + typer.echo(typer.style(f"✅ Generated runner file: {output_path}", fg=typer.colors.GREEN)) + except Exception as e: + typer.echo(typer.style(f"❌ Error saving file: {e}", fg=typer.colors.RED)) + raise typer.Exit(1) from e + + # Lint the file if requested + if lint: + typer.echo("\n🔍 Running linter...") + result = subprocess.run( # noqa: S603 + ["ruff", "check", output_path], # noqa: S607 + check=False, + capture_output=True, + text=True, + ) + if result.returncode == 0: + typer.echo(typer.style("✅ Linting passed", fg=typer.colors.GREEN)) + else: + typer.echo(typer.style("⚠️ Linting found issues:", fg=typer.colors.YELLOW)) + typer.echo(result.stdout) + typer.echo(result.stderr) + + # Execute the file if requested (with warning) + if execute: + typer.echo("\n⚠️ Note: Execution may fail if input values need to be filled in") + typer.echo("🚀 Executing generated file...") + result = subprocess.run( # noqa: S603 + ["python", output_path], # noqa: S607 + check=False, + capture_output=True, + text=True, + ) + if result.returncode == 0: + typer.echo(typer.style("✅ Execution successful:", fg=typer.colors.GREEN)) + typer.echo(result.stdout) + else: + typer.echo(typer.style("❌ Execution failed:", fg=typer.colors.RED)) + typer.echo(result.stdout) + typer.echo(result.stderr) + + +@gen_app.command("runner") +def generate_runner_cmd( + pipe_code: Annotated[str, typer.Argument(help="The pipe code to generate a runner for")], + output: Annotated[ + str | None, + typer.Option("--output", "-o", help="Path to save the generated Python file"), + ] = None, + execute: Annotated[ + bool, + typer.Option("--execute", "-e", help="Execute the generated file after creation"), + ] = False, + lint: Annotated[ + bool, + typer.Option("--lint", "-l", help="Run linter on the generated file"), + ] = False, +) -> None: + """Generate a Python runner file for a pipe. + + The generated file will include: + - All necessary imports + - Example input values based on the pipe's input types + - A function to run the pipeline + - Code to execute the pipeline + + Native concept types (Text, Image, PDF, etc.) will be automatically handled. + Custom concept types will include TODO comments for filling in required fields. + """ + do_generate_runner(pipe_code=pipe_code, output_path=output, execute=execute, lint=lint) diff --git a/pipelex/core/concepts/concept.py b/pipelex/core/concepts/concept.py index 3b981555b..3feec842e 100644 --- a/pipelex/core/concepts/concept.py +++ b/pipelex/core/concepts/concept.py @@ -1,3 +1,6 @@ +import inspect +from typing import Any, cast, get_args, get_origin + from kajson.kajson_manager import KajsonManager from pydantic import BaseModel, ConfigDict, field_validator @@ -11,6 +14,7 @@ from pipelex.exceptions import PipelexUnexpectedError from pipelex.tools.misc.string_utils import pascal_case_to_sentence from pipelex.tools.typing.class_utils import are_classes_equivalent, has_compatible_field +from pipelex.types import StrEnum class Concept(BaseModel): @@ -103,3 +107,133 @@ def search_for_nested_image_fields_in_structure_class(self) -> list[str]: msg = f"Concept class '{self.structure_class_name}' is not a subclass of StuffContent" raise PipelexUnexpectedError(msg) return search_for_nested_image_fields(content_class=structure_class) + + def get_compact_memory_example(self, var_name: str) -> dict[str, Any] | str | int: + """Generate an example value for compact memory format based on this concept. + + Compact memory follows these conventions: + - For native concepts that can be represented as simple values (Text, Image, PDF): returns a simple string + - For structured concepts: returns {"concept_code": "...", "content": {...}} + + The content dict is recursively generated based on the StuffContent class structure. + """ + # Get the structure class + structure_class = KajsonManager.get_class_registry().get_class(name=self.structure_class_name) + + # If class not found, return placeholder + if structure_class is None: + return { + "concept_code": self.concept_string, + "content": {}, # Empty dict for unknown structures + } + + # Verify it's a subclass of StuffContent + if not issubclass(structure_class, StuffContent): + return { + "concept_code": self.concept_string, + "content": {}, # Empty dict for invalid structures + } + + # Generate the content based on structure + content_example = self._generate_content_example_for_class(structure_class, var_name) + + # For simple native concepts (Text, Image, PDF), return just the value + if self.structure_class_name == "TextContent": + return cast("str", content_example) # Returns just a string + elif self.structure_class_name in {"ImageContent", "PDFContent"}: + return cast("str", content_example) # Returns just a URL string + elif self.structure_class_name == "NumberContent": + return cast("int", content_example) # Returns just a number + + # For complex concepts, wrap with concept_code + return { + "concept_code": self.concept_string, + "content": content_example, + } + + @classmethod + def _generate_content_example_for_class(cls, content_class: type[StuffContent], var_name: str) -> Any: + """Recursively generate example content based on a StuffContent class structure. + + Args: + content_class: The StuffContent class to generate an example for + var_name: Variable name for generating contextual example values + + Returns: + Example content dict or simple value + """ + class_name = content_class.__name__ + + # Handle simple native content types + if class_name == "TextContent": + return f"{var_name}_text" + elif class_name in {"ImageContent", "PDFContent"}: + return f"{var_name}_url" + elif class_name == "NumberContent": + return 0 + + # For structured content, inspect fields and recursively generate + # Note: model_fields includes inherited fields from parent classes + content_dict: dict[str, Any] = {} + for field_name, field_info in content_class.model_fields.items(): + field_type = field_info.annotation + + # Handle Optional types (e.g., TextContent | None) + origin = get_origin(field_type) + args = get_args(field_type) + + if origin is type(None) or (args and type(None) in args): + # Optional field - get the non-None type + actual_type = next((arg for arg in args if arg is not type(None)), field_type) if args else field_type + else: + actual_type = field_type + + # Re-check origin after unwrapping Optional + origin = get_origin(actual_type) + args = get_args(actual_type) + + # Handle list types + if origin is list: + list_item_type = args[0] if args else str + if hasattr(list_item_type, "__name__"): + if list_item_type.__name__ == "ImageContent": + content_dict[field_name] = [f"{field_name}_url_1", f"{field_name}_url_2"] + elif list_item_type.__name__ == "TextContent": + content_dict[field_name] = [f"{field_name}_text_1", f"{field_name}_text_2"] + elif inspect.isclass(list_item_type) and issubclass(list_item_type, StuffContent): + # List of StuffContent - generate examples + content_dict[field_name] = [cls._generate_content_example_for_class(list_item_type, f"{field_name}_item")] + else: + content_dict[field_name] = [f"{field_name}_item_1"] + else: + content_dict[field_name] = [] + # Handle dict types + elif origin is dict: + # Simple example with one key-value pair + content_dict[field_name] = {f"{field_name}_key": f"{field_name}_value"} + # Handle StrEnum types + elif inspect.isclass(actual_type) and issubclass(actual_type, StrEnum): + # Get first enum value + enum_values = list(actual_type) + content_dict[field_name] = enum_values[0].value if enum_values else f"{field_name}_enum_value" + # Handle nested StuffContent + elif inspect.isclass(actual_type) and issubclass(actual_type, StuffContent): + content_dict[field_name] = cls._generate_content_example_for_class(actual_type, field_name) + # Handle basic types + elif actual_type is str: + content_dict[field_name] = f"{field_name}_value" + elif actual_type is int: + content_dict[field_name] = 0 + elif actual_type is float: + content_dict[field_name] = 0.0 + elif actual_type is bool: + content_dict[field_name] = False + else: + # For unknown types, try to get a simple repr + try: + type_name = getattr(actual_type, "__name__", str(actual_type)) + content_dict[field_name] = f"{field_name}_value # TODO: Fill {type_name}" + except Exception: + content_dict[field_name] = f"{field_name}_value" + + return content_dict diff --git a/pipelex/tools/codegen/__init__.py b/pipelex/tools/codegen/__init__.py new file mode 100644 index 000000000..8b230f56b --- /dev/null +++ b/pipelex/tools/codegen/__init__.py @@ -0,0 +1,2 @@ +"""Code generation utilities for Pipelex.""" + diff --git a/pipelex/tools/codegen/runner_generator.py b/pipelex/tools/codegen/runner_generator.py new file mode 100644 index 000000000..0fdadb064 --- /dev/null +++ b/pipelex/tools/codegen/runner_generator.py @@ -0,0 +1,120 @@ +"""Generate Python runner code from pipe definitions and concepts.""" + +from typing import Any + +from pipelex.core.concepts.concept import Concept +from pipelex.core.pipes.pipe_abstract import PipeAbstract + + +def _value_to_python_code(value: Any, indent_level: int = 0) -> str: + """Convert a value to Python code representation recursively. + + Args: + value: The value to convert (can be str, int, dict, list, etc.) + indent_level: Current indentation level for nested dicts + + Returns: + String representation of Python code + """ + indent = " " * indent_level + + if isinstance(value, str): + # String value - add quotes + return f'"{value}"' + elif isinstance(value, bool): + # Boolean - Python True/False + return str(value) + elif isinstance(value, (int, float)): + # Numeric value + return str(value) + elif isinstance(value, list): + # List - recursively convert items + if not value: + return "[]" + items: list[str] = [_value_to_python_code(item, indent_level + 1) for item in value] # pyright: ignore[reportUnknownVariableType] + return "[" + ", ".join(items) + "]" + elif isinstance(value, dict): + # Dict - recursively convert with proper formatting + if not value: + return "{}" + lines: list[str] = [] + for key, val in value.items(): # pyright: ignore[reportUnknownVariableType] + val_code = _value_to_python_code(val, indent_level + 1) + lines.append(f'{indent} "{key}": {val_code}') + return "{\n" + ",\n".join(lines) + f"\n{indent}}}" + else: + # Fallback - use repr + return repr(value) + + +def generate_compact_memory_entry(var_name: str, concept: Concept) -> str: + """Generate the compact_memory dictionary entry for a given input.""" + example_value = concept.get_compact_memory_example(var_name) + + # Convert the example value to a Python code string + value_str = _value_to_python_code(example_value, indent_level=3) + + return f' "{var_name}": {value_str},' + + +def generate_runner_code(pipe: PipeAbstract) -> str: + """Generate the complete Python runner code for a pipe.""" + pipe_code = pipe.code + inputs = pipe.inputs + + # Build import section - minimal imports since we use compact memory format + import_lines = [ + "import asyncio", + "", + "from pipelex.pipelex import Pipelex", + "from pipelex.pipeline.execute import execute_pipeline", + ] + + # Build input_memory entries + if inputs.nb_inputs > 0: + input_memory_entries: list[str] = [] + for var_name, input_req in inputs.root.items(): + concept = input_req.concept + entry = generate_compact_memory_entry(var_name, concept) + input_memory_entries.append(entry) + input_memory_block = "\n".join(input_memory_entries) + else: + input_memory_block = " # No inputs required" + + # Build the main function + function_lines = [ + "", + "", + f"async def run_{pipe_code}():", + ' """Run the pipeline and return the result."""', + " return await execute_pipeline(", + f' pipe_code="{pipe_code}",', + ] + + if inputs.nb_inputs > 0: + function_lines.extend( + [ + " input_memory={", + input_memory_block, + " },", + ] + ) + + function_lines.extend( + [ + " )", + "", + "", + 'if __name__ == "__main__":', + " # Initialize Pipelex", + " Pipelex.make()", + "", + " # Run the pipeline", + f" result = asyncio.run(run_{pipe_code}())", + "", + ] + ) + + # Combine everything + code_lines = import_lines + function_lines + return "\n".join(code_lines) diff --git a/run_detail_pipe_condition.py b/run_detail_pipe_condition.py new file mode 100644 index 000000000..70aa5b506 --- /dev/null +++ b/run_detail_pipe_condition.py @@ -0,0 +1,45 @@ +import asyncio + +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def run_detail_pipe_condition(): + """Run the pipeline and return the result.""" + return await execute_pipeline( + pipe_code="detail_pipe_condition", + input_memory={ + "plan_draft": "plan_draft_text", + "pipe_signature": { + "concept_code": "pipe_design.PipeSignature", + "content": { + "code": "code_value", + "type": "PipeFunc", + "pipe_category": "pipe_category_value # TODO: Fill Literal", + "description": "description_value", + "inputs": {"inputs_key": "inputs_value"}, + "result": "result_value", + "output": "output_value", + "pipe_dependencies": ["pipe_dependencies_item_1"], + }, + }, + "concept_specs": { + "concept_code": "concept.ConceptSpec", + "content": { + "the_concept_code": "the_concept_code_value", + "description": "description_value", + "structure": {"structure_key": "structure_value"}, + "refines": "refines_value", + }, + }, + }, + ) + + +if __name__ == "__main__": + # Initialize Pipelex + Pipelex.make() + + # Run the pipeline + result = asyncio.run(run_detail_pipe_condition()) + print(result.main_stuff_as_str) diff --git a/run_pipe_builder.py b/run_pipe_builder.py new file mode 100644 index 000000000..2d82be579 --- /dev/null +++ b/run_pipe_builder.py @@ -0,0 +1,23 @@ +import asyncio + +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def run_pipe_builder(): + """Run the pipeline and return the result.""" + return await execute_pipeline( + pipe_code="pipe_builder", + input_memory={ + "brief": "brief_text", + }, + ) + + +if __name__ == "__main__": + # Initialize Pipelex + Pipelex.make() + + # Run the pipeline + result = asyncio.run(run_pipe_builder()) + print(result.main_stuff_as_str) diff --git a/run_spec_draft_to_concept_spec.py b/run_spec_draft_to_concept_spec.py new file mode 100644 index 000000000..1d1283777 --- /dev/null +++ b/run_spec_draft_to_concept_spec.py @@ -0,0 +1,40 @@ +import asyncio + +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def run_spec_draft_to_concept_spec(): + """Run the pipeline and return the result.""" + return await execute_pipeline( + pipe_code="spec_draft_to_concept_spec", + input_memory={ + "concept_spec_draft": { + "concept_code": "concept.ConceptSpecDraft", + "content": { + "the_concept_code": "the_concept_code_value", + "description": "description_value", + "structure": "structure_value", + "refines": "refines_value", + }, + }, + "concept_spec_structures": { + "concept_code": "concept.ConceptStructureSpec", + "content": { + "the_field_name": "the_field_name_value", + "description": "description_value", + "type": "text", + "required": False, + "default_value": "default_value_value # TODO: Fill Any", + }, + }, + }, + ) + + +if __name__ == "__main__": + # Initialize Pipelex + Pipelex.make() + + # Run the pipeline + result = asyncio.run(run_spec_draft_to_concept_spec()) diff --git a/tests/unit/core/concepts/test_concept_compact_memory.py b/tests/unit/core/concepts/test_concept_compact_memory.py new file mode 100644 index 000000000..91303e419 --- /dev/null +++ b/tests/unit/core/concepts/test_concept_compact_memory.py @@ -0,0 +1,116 @@ +"""Unit tests for Concept compact memory generation methods.""" + +from __future__ import annotations + +import pytest + +from pipelex.core.concepts.concept_blueprint import ConceptBlueprint +from pipelex.core.concepts.concept_factory import ConceptFactory +from pipelex.core.concepts.concept_native import NativeConceptCode + + +class TestConceptCompactMemory: + """Test Concept methods for generating compact memory examples.""" + + @pytest.mark.parametrize( + ("concept_code", "domain", "structure_class_name", "var_name", "expected_value", "expected_type"), + [ + # Text concept - simple string + ("Text", "native", "TextContent", "message", "message_text", str), + # Custom text-based concept - simple string + ("CustomText", "test_domain", "TextContent", "raw_text", "raw_text_text", str), + # Image concept - URL string + ("Image", "native", "ImageContent", "photo", "photo_url", str), + # PDF concept - URL string + ("PDF", "native", "PDFContent", "document", "document_url", str), + # Number concept + ("Number", "native", "NumberContent", "count", 0, int), + ], + ) + def test_get_compact_memory_example_simple_types( + self, + concept_code: str, + domain: str, + structure_class_name: str, + var_name: str, + expected_value: str | int, + expected_type: type, + ) -> None: + """Test that get_compact_memory_example generates correct simple values.""" + # Create concept using ConceptFactory + concept = ConceptFactory.make( + concept_code=concept_code, + domain=domain, + description=f"Test {concept_code}", + structure_class_name=structure_class_name, + refines=None, + ) + + # Test + result = concept.get_compact_memory_example(var_name) + assert isinstance(result, expected_type) + assert result == expected_value + + def test_get_compact_memory_example_text_and_images(self) -> None: + """Test compact memory example for TextAndImages concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.TEXT_AND_IMAGES) + + result = concept.get_compact_memory_example("content") + + # Should return a dict with concept_code and content + assert isinstance(result, dict) + assert result["concept_code"] == "native.TextAndImages" + assert "content" in result + assert "text" in result["content"] + assert result["content"]["text"] == "text_text" # Generated from field name "text" + assert "images" in result["content"] + assert isinstance(result["content"]["images"], list) + + def test_get_compact_memory_example_page(self) -> None: + """Test compact memory example for Page concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.PAGE) + + result = concept.get_compact_memory_example("page") + + # Should return a dict with concept_code and content + assert isinstance(result, dict) + assert result["concept_code"] == "native.Page" + assert "content" in result + assert "text_and_images" in result["content"] + assert isinstance(result["content"]["text_and_images"], dict) + + def test_get_compact_memory_example_custom_structured(self) -> None: + """Test compact memory example for a custom structured concept.""" + concept = ConceptFactory.make( + concept_code="Invoice", + domain="accounting", + description="Invoice data", + structure_class_name="Invoice", + refines=None, + ) + + result = concept.get_compact_memory_example("invoice") + + # Should return a dict with concept_code and content + assert isinstance(result, dict) + assert result["concept_code"] == "accounting.Invoice" + assert "content" in result + + def test_get_compact_memory_example_for_refined_text_concept(self) -> None: + """Test compact memory example for a concept that refines Text.""" + # Create a concept that refines Text + blueprint = ConceptBlueprint( + description="A question", + refines="native.Text", + ) + + concept = ConceptFactory.make_from_blueprint( + domain="test_domain", + concept_code="Question", + blueprint=blueprint, + ) + + # Test - should return a simple string since it uses TextContent + result = concept.get_compact_memory_example("question") + assert isinstance(result, str) + assert result == "question_text" From d86654ae0161e07663750c2bbdb1ce3f0940a8e6 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Tue, 14 Oct 2025 19:46:20 +0200 Subject: [PATCH 085/115] merge commit --- pipelex/core/concepts/concept.py | 1 + pipelex/system/__init__.py | 0 pipelex/system/registries/__init__py | 0 pipelex/tools/codegen/__init__.py | 1 - .../tools/typing/test_find_classes_in_module.py | 11 ++++++++++- 5 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 pipelex/system/__init__.py create mode 100644 pipelex/system/registries/__init__py diff --git a/pipelex/core/concepts/concept.py b/pipelex/core/concepts/concept.py index 59fc46529..3feec842e 100644 --- a/pipelex/core/concepts/concept.py +++ b/pipelex/core/concepts/concept.py @@ -16,6 +16,7 @@ from pipelex.tools.typing.class_utils import are_classes_equivalent, has_compatible_field from pipelex.types import StrEnum + class Concept(BaseModel): model_config = ConfigDict(extra="ignore", strict=True) diff --git a/pipelex/system/__init__.py b/pipelex/system/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pipelex/system/registries/__init__py b/pipelex/system/registries/__init__py new file mode 100644 index 000000000..e69de29bb diff --git a/pipelex/tools/codegen/__init__.py b/pipelex/tools/codegen/__init__.py index 8b230f56b..c7351a0c9 100644 --- a/pipelex/tools/codegen/__init__.py +++ b/pipelex/tools/codegen/__init__.py @@ -1,2 +1 @@ """Code generation utilities for Pipelex.""" - diff --git a/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py b/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py index 91fb03d62..af110efa2 100644 --- a/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py +++ b/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py @@ -1,6 +1,15 @@ +import sys import types +from pathlib import Path -from pipelex.tools.typing.module_inspector import find_classes_in_module +import pytest + +from pipelex.tools.typing.module_inspector import ( + ModuleFileError, + find_class_names_in_file, + find_classes_in_module, + import_module_from_file_if_has_classes, +) class TestFindClassesInModule: From af00c0f6a597d02448da0ef47124208bd4aafb4f Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 00:26:34 +0200 Subject: [PATCH 086/115] add runner --- pipelex/core/concepts/concept.py | 20 ++++-- pipelex/tools/codegen/runner_generator.py | 42 +++++++++--- .../concepts/test_concept_compact_memory.py | 67 ++++++++----------- 3 files changed, 76 insertions(+), 53 deletions(-) diff --git a/pipelex/core/concepts/concept.py b/pipelex/core/concepts/concept.py index 3feec842e..ae2d73ba0 100644 --- a/pipelex/core/concepts/concept.py +++ b/pipelex/core/concepts/concept.py @@ -137,13 +137,23 @@ def get_compact_memory_example(self, var_name: str) -> dict[str, Any] | str | in # Generate the content based on structure content_example = self._generate_content_example_for_class(structure_class, var_name) - # For simple native concepts (Text, Image, PDF), return just the value + # For simple native concepts - return compact format if self.structure_class_name == "TextContent": - return cast("str", content_example) # Returns just a string - elif self.structure_class_name in {"ImageContent", "PDFContent"}: - return cast("str", content_example) # Returns just a URL string + return cast("str", content_example) # Just a string + elif self.structure_class_name == "ImageContent": + # Return dict with class instantiation info + return { + "_class": "ImageContent", + "url": cast("str", content_example), + } + elif self.structure_class_name == "PDFContent": + # Return dict with class instantiation info + return { + "_class": "PDFContent", + "url": cast("str", content_example), + } elif self.structure_class_name == "NumberContent": - return cast("int", content_example) # Returns just a number + return cast("int", content_example) # Just a number # For complex concepts, wrap with concept_code return { diff --git a/pipelex/tools/codegen/runner_generator.py b/pipelex/tools/codegen/runner_generator.py index 0fdadb064..b4c07261a 100644 --- a/pipelex/tools/codegen/runner_generator.py +++ b/pipelex/tools/codegen/runner_generator.py @@ -18,7 +18,14 @@ def _value_to_python_code(value: Any, indent_level: int = 0) -> str: """ indent = " " * indent_level - if isinstance(value, str): + if isinstance(value, dict) and "_class" in value: + # Special handling for Content class instantiation (e.g., PDFContent, ImageContent) + class_name = value["_class"] # pyright: ignore[reportUnknownVariableType] + if class_name in {"PDFContent", "ImageContent"}: + url = value.get("url", "your_url") # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType, reportUnknownVariableType] + return f'{class_name}(url="{url}")' + return str(value) # pyright: ignore[reportUnknownArgumentType] + elif isinstance(value, str): # String value - add quotes return f'"{value}"' elif isinstance(value, bool): @@ -62,13 +69,31 @@ def generate_runner_code(pipe: PipeAbstract) -> str: pipe_code = pipe.code inputs = pipe.inputs - # Build import section - minimal imports since we use compact memory format - import_lines = [ - "import asyncio", - "", - "from pipelex.pipelex import Pipelex", - "from pipelex.pipeline.execute import execute_pipeline", - ] + # Determine which imports are needed based on input concepts + needs_pdf = False + needs_image = False + for input_req in inputs.root.values(): + concept = input_req.concept + if concept.structure_class_name == "PDFContent": + needs_pdf = True + elif concept.structure_class_name == "ImageContent": + needs_image = True + + # Build import section + import_lines = ["import asyncio", ""] + + # Add content class imports if needed + if needs_pdf: + import_lines.append("from pipelex.core.stuffs.pdf_content import PDFContent") + if needs_image: + import_lines.append("from pipelex.core.stuffs.image_content import ImageContent") + + import_lines.extend( + [ + "from pipelex.pipelex import Pipelex", + "from pipelex.pipeline.execute import execute_pipeline", + ] + ) # Build input_memory entries if inputs.nb_inputs > 0: @@ -86,7 +111,6 @@ def generate_runner_code(pipe: PipeAbstract) -> str: "", "", f"async def run_{pipe_code}():", - ' """Run the pipeline and return the result."""', " return await execute_pipeline(", f' pipe_code="{pipe_code}",', ] diff --git a/tests/unit/core/concepts/test_concept_compact_memory.py b/tests/unit/core/concepts/test_concept_compact_memory.py index 91303e419..a5edb15ad 100644 --- a/tests/unit/core/concepts/test_concept_compact_memory.py +++ b/tests/unit/core/concepts/test_concept_compact_memory.py @@ -2,8 +2,6 @@ from __future__ import annotations -import pytest - from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode @@ -12,44 +10,35 @@ class TestConceptCompactMemory: """Test Concept methods for generating compact memory examples.""" - @pytest.mark.parametrize( - ("concept_code", "domain", "structure_class_name", "var_name", "expected_value", "expected_type"), - [ - # Text concept - simple string - ("Text", "native", "TextContent", "message", "message_text", str), - # Custom text-based concept - simple string - ("CustomText", "test_domain", "TextContent", "raw_text", "raw_text_text", str), - # Image concept - URL string - ("Image", "native", "ImageContent", "photo", "photo_url", str), - # PDF concept - URL string - ("PDF", "native", "PDFContent", "document", "document_url", str), - # Number concept - ("Number", "native", "NumberContent", "count", 0, int), - ], - ) - def test_get_compact_memory_example_simple_types( - self, - concept_code: str, - domain: str, - structure_class_name: str, - var_name: str, - expected_value: str | int, - expected_type: type, - ) -> None: - """Test that get_compact_memory_example generates correct simple values.""" - # Create concept using ConceptFactory - concept = ConceptFactory.make( - concept_code=concept_code, - domain=domain, - description=f"Test {concept_code}", - structure_class_name=structure_class_name, - refines=None, - ) + def test_get_compact_memory_example_text(self) -> None: + """Test compact memory example for Text concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.TEXT) + result = concept.get_compact_memory_example("message") + assert isinstance(result, str) + assert result == "message_text" + + def test_get_compact_memory_example_number(self) -> None: + """Test compact memory example for Number concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.NUMBER) + result = concept.get_compact_memory_example("count") + assert isinstance(result, int) + assert result == 0 + + def test_get_compact_memory_example_image(self) -> None: + """Test compact memory example for Image concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.IMAGE) + result = concept.get_compact_memory_example("photo") + assert isinstance(result, dict) + assert result["_class"] == "ImageContent" + assert result["url"] == "photo_url" - # Test - result = concept.get_compact_memory_example(var_name) - assert isinstance(result, expected_type) - assert result == expected_value + def test_get_compact_memory_example_pdf(self) -> None: + """Test compact memory example for PDF concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.PDF) + result = concept.get_compact_memory_example("document") + assert isinstance(result, dict) + assert result["_class"] == "PDFContent" + assert result["url"] == "document_url" def test_get_compact_memory_example_text_and_images(self) -> None: """Test compact memory example for TextAndImages concept.""" From e1be50d6e6f12593f85d3b9cbfc92353193ae28c Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 11:30:50 +0200 Subject: [PATCH 087/115] fix cli --- pipelex/cli/_cli.py | 4 +- pipelex/cli/commands/__init__.py | 4 +- .../cli/commands/{gen_cmd.py => run_cmd.py} | 14 +- run_detail_pipe_condition.py | 45 ----- run_pipe_builder.py | 23 --- run_spec_draft_to_concept_spec.py | 40 ----- uv.lock | 169 ++++++++++-------- 7 files changed, 107 insertions(+), 192 deletions(-) rename pipelex/cli/commands/{gen_cmd.py => run_cmd.py} (90%) delete mode 100644 run_detail_pipe_condition.py delete mode 100644 run_pipe_builder.py delete mode 100644 run_spec_draft_to_concept_spec.py diff --git a/pipelex/cli/_cli.py b/pipelex/cli/_cli.py index 4e62fae46..f0ab7a041 100644 --- a/pipelex/cli/_cli.py +++ b/pipelex/cli/_cli.py @@ -3,7 +3,7 @@ from typer.core import TyperGroup from typing_extensions import override -from pipelex.cli.commands import gen_app, init_app, show_app, validate_app +from pipelex.cli.commands import init_app, run_app, show_app, validate_app from pipelex.cli.commands.build_cmd import build_app from pipelex.cli.commands.kit_cmd import kit_app @@ -37,4 +37,4 @@ def main() -> None: app.add_typer(show_app, name="show", help="Show and list commands") app.add_typer(build_app, name="build", help="Build artifacts like pipeline blueprints") app.add_typer(kit_app, name="kit", help="Manage kit assets") -app.add_typer(gen_app, name="gen", help="Generate Python files from pipe definitions") +app.add_typer(run_app, name="run", help="Run pipelines and prepare runner files") diff --git a/pipelex/cli/commands/__init__.py b/pipelex/cli/commands/__init__.py index 7394d835d..bf8a79d19 100644 --- a/pipelex/cli/commands/__init__.py +++ b/pipelex/cli/commands/__init__.py @@ -3,9 +3,9 @@ This package organizes CLI commands into logical modules. """ -from pipelex.cli.commands.gen_cmd import gen_app from pipelex.cli.commands.init_cmd import init_app +from pipelex.cli.commands.run_cmd import run_app from pipelex.cli.commands.show_cmd import show_app from pipelex.cli.commands.validate_cmd import validate_app -__all__ = ["gen_app", "init_app", "show_app", "validate_app"] +__all__ = ["init_app", "run_app", "show_app", "validate_app"] diff --git a/pipelex/cli/commands/gen_cmd.py b/pipelex/cli/commands/run_cmd.py similarity index 90% rename from pipelex/cli/commands/gen_cmd.py rename to pipelex/cli/commands/run_cmd.py index 7b0619231..561ad9c0a 100644 --- a/pipelex/cli/commands/gen_cmd.py +++ b/pipelex/cli/commands/run_cmd.py @@ -1,5 +1,3 @@ -"""Commands for generating Python runner files from pipe definitions.""" - import subprocess from typing import Annotated @@ -10,7 +8,7 @@ from pipelex.tools.codegen.runner_generator import generate_runner_code from pipelex.tools.misc.file_utils import ensure_directory_for_file_path, save_text_to_path -gen_app = typer.Typer(help="Generate Python runner files from pipe definitions", no_args_is_help=True) +run_app = typer.Typer(help="Run pipelines and generate runner files", no_args_is_help=True) def do_generate_runner(pipe_code: str, output_path: str | None, execute: bool, lint: bool) -> None: @@ -80,9 +78,9 @@ def do_generate_runner(pipe_code: str, output_path: str | None, execute: bool, l typer.echo(result.stderr) -@gen_app.command("runner") -def generate_runner_cmd( - pipe_code: Annotated[str, typer.Argument(help="The pipe code to generate a runner for")], +@run_app.command("prepare") +def prepare_runner_cmd( + pipe_code: Annotated[str, typer.Argument(help="The pipe code to prepare a runner for")], output: Annotated[ str | None, typer.Option("--output", "-o", help="Path to save the generated Python file"), @@ -96,7 +94,7 @@ def generate_runner_cmd( typer.Option("--lint", "-l", help="Run linter on the generated file"), ] = False, ) -> None: - """Generate a Python runner file for a pipe. + """Prepare a Python runner file for a pipe. The generated file will include: - All necessary imports @@ -105,6 +103,6 @@ def generate_runner_cmd( - Code to execute the pipeline Native concept types (Text, Image, PDF, etc.) will be automatically handled. - Custom concept types will include TODO comments for filling in required fields. + Custom concept types will have their structure recursively generated. """ do_generate_runner(pipe_code=pipe_code, output_path=output, execute=execute, lint=lint) diff --git a/run_detail_pipe_condition.py b/run_detail_pipe_condition.py deleted file mode 100644 index 70aa5b506..000000000 --- a/run_detail_pipe_condition.py +++ /dev/null @@ -1,45 +0,0 @@ -import asyncio - -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def run_detail_pipe_condition(): - """Run the pipeline and return the result.""" - return await execute_pipeline( - pipe_code="detail_pipe_condition", - input_memory={ - "plan_draft": "plan_draft_text", - "pipe_signature": { - "concept_code": "pipe_design.PipeSignature", - "content": { - "code": "code_value", - "type": "PipeFunc", - "pipe_category": "pipe_category_value # TODO: Fill Literal", - "description": "description_value", - "inputs": {"inputs_key": "inputs_value"}, - "result": "result_value", - "output": "output_value", - "pipe_dependencies": ["pipe_dependencies_item_1"], - }, - }, - "concept_specs": { - "concept_code": "concept.ConceptSpec", - "content": { - "the_concept_code": "the_concept_code_value", - "description": "description_value", - "structure": {"structure_key": "structure_value"}, - "refines": "refines_value", - }, - }, - }, - ) - - -if __name__ == "__main__": - # Initialize Pipelex - Pipelex.make() - - # Run the pipeline - result = asyncio.run(run_detail_pipe_condition()) - print(result.main_stuff_as_str) diff --git a/run_pipe_builder.py b/run_pipe_builder.py deleted file mode 100644 index 2d82be579..000000000 --- a/run_pipe_builder.py +++ /dev/null @@ -1,23 +0,0 @@ -import asyncio - -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def run_pipe_builder(): - """Run the pipeline and return the result.""" - return await execute_pipeline( - pipe_code="pipe_builder", - input_memory={ - "brief": "brief_text", - }, - ) - - -if __name__ == "__main__": - # Initialize Pipelex - Pipelex.make() - - # Run the pipeline - result = asyncio.run(run_pipe_builder()) - print(result.main_stuff_as_str) diff --git a/run_spec_draft_to_concept_spec.py b/run_spec_draft_to_concept_spec.py deleted file mode 100644 index 1d1283777..000000000 --- a/run_spec_draft_to_concept_spec.py +++ /dev/null @@ -1,40 +0,0 @@ -import asyncio - -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def run_spec_draft_to_concept_spec(): - """Run the pipeline and return the result.""" - return await execute_pipeline( - pipe_code="spec_draft_to_concept_spec", - input_memory={ - "concept_spec_draft": { - "concept_code": "concept.ConceptSpecDraft", - "content": { - "the_concept_code": "the_concept_code_value", - "description": "description_value", - "structure": "structure_value", - "refines": "refines_value", - }, - }, - "concept_spec_structures": { - "concept_code": "concept.ConceptStructureSpec", - "content": { - "the_field_name": "the_field_name_value", - "description": "description_value", - "type": "text", - "required": False, - "default_value": "default_value_value # TODO: Fill Any", - }, - }, - }, - ) - - -if __name__ == "__main__": - # Initialize Pipelex - Pipelex.make() - - # Run the pipeline - result = asyncio.run(run_spec_draft_to_concept_spec()) diff --git a/uv.lock b/uv.lock index 47ac2ecd8..587250bb8 100644 --- a/uv.lock +++ b/uv.lock @@ -335,16 +335,16 @@ wheels = [ [[package]] name = "boto3-stubs" -version = "1.40.51" +version = "1.40.52" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore-stubs" }, { name = "types-s3transfer" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/82/4d/b07f9ee0fe432fa8ec6dc368ee7a0409e2b6d9df2c5a2a88265c9b6fd878/boto3_stubs-1.40.51.tar.gz", hash = "sha256:0281e820813a310954e15fb7c1d470c24c34c1cccc7b1ddad977fa293a1080a9", size = 100890, upload-time = "2025-10-13T19:25:36.126Z" } +sdist = { url = "https://files.pythonhosted.org/packages/00/7b/92a266747a504c09c40a382364e5f041a58acc2959f2920aa9b4ccf5e9db/boto3_stubs-1.40.52.tar.gz", hash = "sha256:bd20a7bc9122bb1b939195431b9d3f540b1ef050103bc1720d786960907464fd", size = 100895, upload-time = "2025-10-14T20:45:42.254Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d3/2e/4476431f11fc3bf7a7e0f4f5c275f17607aa127da7c0d8685a4dc6bf6291/boto3_stubs-1.40.51-py3-none-any.whl", hash = "sha256:896d0ffaa298ce1749eea1a54946320a0f4e07c6912f8e1f8c0744a708ee25a4", size = 69709, upload-time = "2025-10-13T19:25:23.116Z" }, + { url = "https://files.pythonhosted.org/packages/43/cf/d08cd0df2639896db574490181c363fc491404607b867a1bf04a623c4a19/boto3_stubs-1.40.52-py3-none-any.whl", hash = "sha256:5e2b74b7b5ad71ca2b8c35a8d3bf1e4ef60317b1682b5e7dda9f16a1c0b43844", size = 69709, upload-time = "2025-10-14T20:45:33.777Z" }, ] [[package]] @@ -363,14 +363,14 @@ wheels = [ [[package]] name = "botocore-stubs" -version = "1.40.50" +version = "1.40.52" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "types-awscrt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/20/4b/86ad2d24ea36eed159c8e1f85a2645bfeedae34ccb8c77ea8c99abbd66d1/botocore_stubs-1.40.50.tar.gz", hash = "sha256:d772b2d3aea6b4e464963fe45b2d504eee7bc3842f047cebbae5492b3993e0fd", size = 42250, upload-time = "2025-10-11T23:08:59.925Z" } +sdist = { url = "https://files.pythonhosted.org/packages/70/e5/a3a7ad9b45e6612989bb52dd1b586a2e2b9539ce5b097c15d15a6b0d54cb/botocore_stubs-1.40.52.tar.gz", hash = "sha256:9e9809e563eb2c925051d849257bdc7605760bcbb62b3d5fe3117f9385345488", size = 42232, upload-time = "2025-10-14T21:21:00.995Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/c1/4a736155b2d5dd7fdd09af8fba9ed59693c565d6e2bc1b5adc769da36cb5/botocore_stubs-1.40.50-py3-none-any.whl", hash = "sha256:7cb8d636e061e600929cd03339c3bbc162c21435b4bfeb6413cf7b0b612e7de0", size = 66541, upload-time = "2025-10-11T23:08:57.678Z" }, + { url = "https://files.pythonhosted.org/packages/a1/be/f0be116f4d2db93400a4c0c4b92e79200df562c8a4b5c1fc97349b6460c2/botocore_stubs-1.40.52-py3-none-any.whl", hash = "sha256:4f3e244aad34997cb88aa81c64eb0df23c19d3cbc9337366dfb7f992807d2e53", size = 66541, upload-time = "2025-10-14T21:20:58.699Z" }, ] [[package]] @@ -475,66 +475,91 @@ wheels = [ [[package]] name = "charset-normalizer" -version = "3.4.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/98/f3b8013223728a99b908c9344da3aa04ee6e3fa235f19409033eda92fb78/charset_normalizer-3.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fb7f67a1bfa6e40b438170ebdc8158b78dc465a5a67b6dde178a46987b244a72", size = 207695, upload-time = "2025-08-09T07:55:36.452Z" }, - { url = "https://files.pythonhosted.org/packages/21/40/5188be1e3118c82dcb7c2a5ba101b783822cfb413a0268ed3be0468532de/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc9370a2da1ac13f0153780040f465839e6cccb4a1e44810124b4e22483c93fe", size = 147153, upload-time = "2025-08-09T07:55:38.467Z" }, - { url = "https://files.pythonhosted.org/packages/37/60/5d0d74bc1e1380f0b72c327948d9c2aca14b46a9efd87604e724260f384c/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:07a0eae9e2787b586e129fdcbe1af6997f8d0e5abaa0bc98c0e20e124d67e601", size = 160428, upload-time = "2025-08-09T07:55:40.072Z" }, - { url = "https://files.pythonhosted.org/packages/85/9a/d891f63722d9158688de58d050c59dc3da560ea7f04f4c53e769de5140f5/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:74d77e25adda8581ffc1c720f1c81ca082921329452eba58b16233ab1842141c", size = 157627, upload-time = "2025-08-09T07:55:41.706Z" }, - { url = "https://files.pythonhosted.org/packages/65/1a/7425c952944a6521a9cfa7e675343f83fd82085b8af2b1373a2409c683dc/charset_normalizer-3.4.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0e909868420b7049dafd3a31d45125b31143eec59235311fc4c57ea26a4acd2", size = 152388, upload-time = "2025-08-09T07:55:43.262Z" }, - { url = "https://files.pythonhosted.org/packages/f0/c9/a2c9c2a355a8594ce2446085e2ec97fd44d323c684ff32042e2a6b718e1d/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c6f162aabe9a91a309510d74eeb6507fab5fff92337a15acbe77753d88d9dcf0", size = 150077, upload-time = "2025-08-09T07:55:44.903Z" }, - { url = "https://files.pythonhosted.org/packages/3b/38/20a1f44e4851aa1c9105d6e7110c9d020e093dfa5836d712a5f074a12bf7/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4ca4c094de7771a98d7fbd67d9e5dbf1eb73efa4f744a730437d8a3a5cf994f0", size = 161631, upload-time = "2025-08-09T07:55:46.346Z" }, - { url = "https://files.pythonhosted.org/packages/a4/fa/384d2c0f57edad03d7bec3ebefb462090d8905b4ff5a2d2525f3bb711fac/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:02425242e96bcf29a49711b0ca9f37e451da7c70562bc10e8ed992a5a7a25cc0", size = 159210, upload-time = "2025-08-09T07:55:47.539Z" }, - { url = "https://files.pythonhosted.org/packages/33/9e/eca49d35867ca2db336b6ca27617deed4653b97ebf45dfc21311ce473c37/charset_normalizer-3.4.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:78deba4d8f9590fe4dae384aeff04082510a709957e968753ff3c48399f6f92a", size = 153739, upload-time = "2025-08-09T07:55:48.744Z" }, - { url = "https://files.pythonhosted.org/packages/2a/91/26c3036e62dfe8de8061182d33be5025e2424002125c9500faff74a6735e/charset_normalizer-3.4.3-cp310-cp310-win32.whl", hash = "sha256:d79c198e27580c8e958906f803e63cddb77653731be08851c7df0b1a14a8fc0f", size = 99825, upload-time = "2025-08-09T07:55:50.305Z" }, - { url = "https://files.pythonhosted.org/packages/e2/c6/f05db471f81af1fa01839d44ae2a8bfeec8d2a8b4590f16c4e7393afd323/charset_normalizer-3.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:c6e490913a46fa054e03699c70019ab869e990270597018cef1d8562132c2669", size = 107452, upload-time = "2025-08-09T07:55:51.461Z" }, - { url = "https://files.pythonhosted.org/packages/7f/b5/991245018615474a60965a7c9cd2b4efbaabd16d582a5547c47ee1c7730b/charset_normalizer-3.4.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b256ee2e749283ef3ddcff51a675ff43798d92d746d1a6e4631bf8c707d22d0b", size = 204483, upload-time = "2025-08-09T07:55:53.12Z" }, - { url = "https://files.pythonhosted.org/packages/c7/2a/ae245c41c06299ec18262825c1569c5d3298fc920e4ddf56ab011b417efd/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13faeacfe61784e2559e690fc53fa4c5ae97c6fcedb8eb6fb8d0a15b475d2c64", size = 145520, upload-time = "2025-08-09T07:55:54.712Z" }, - { url = "https://files.pythonhosted.org/packages/3a/a4/b3b6c76e7a635748c4421d2b92c7b8f90a432f98bda5082049af37ffc8e3/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00237675befef519d9af72169d8604a067d92755e84fe76492fef5441db05b91", size = 158876, upload-time = "2025-08-09T07:55:56.024Z" }, - { url = "https://files.pythonhosted.org/packages/e2/e6/63bb0e10f90a8243c5def74b5b105b3bbbfb3e7bb753915fe333fb0c11ea/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:585f3b2a80fbd26b048a0be90c5aae8f06605d3c92615911c3a2b03a8a3b796f", size = 156083, upload-time = "2025-08-09T07:55:57.582Z" }, - { url = "https://files.pythonhosted.org/packages/87/df/b7737ff046c974b183ea9aa111b74185ac8c3a326c6262d413bd5a1b8c69/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e78314bdc32fa80696f72fa16dc61168fda4d6a0c014e0380f9d02f0e5d8a07", size = 150295, upload-time = "2025-08-09T07:55:59.147Z" }, - { url = "https://files.pythonhosted.org/packages/61/f1/190d9977e0084d3f1dc169acd060d479bbbc71b90bf3e7bf7b9927dec3eb/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:96b2b3d1a83ad55310de8c7b4a2d04d9277d5591f40761274856635acc5fcb30", size = 148379, upload-time = "2025-08-09T07:56:00.364Z" }, - { url = "https://files.pythonhosted.org/packages/4c/92/27dbe365d34c68cfe0ca76f1edd70e8705d82b378cb54ebbaeabc2e3029d/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:939578d9d8fd4299220161fdd76e86c6a251987476f5243e8864a7844476ba14", size = 160018, upload-time = "2025-08-09T07:56:01.678Z" }, - { url = "https://files.pythonhosted.org/packages/99/04/baae2a1ea1893a01635d475b9261c889a18fd48393634b6270827869fa34/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fd10de089bcdcd1be95a2f73dbe6254798ec1bda9f450d5828c96f93e2536b9c", size = 157430, upload-time = "2025-08-09T07:56:02.87Z" }, - { url = "https://files.pythonhosted.org/packages/2f/36/77da9c6a328c54d17b960c89eccacfab8271fdaaa228305330915b88afa9/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e8ac75d72fa3775e0b7cb7e4629cec13b7514d928d15ef8ea06bca03ef01cae", size = 151600, upload-time = "2025-08-09T07:56:04.089Z" }, - { url = "https://files.pythonhosted.org/packages/64/d4/9eb4ff2c167edbbf08cdd28e19078bf195762e9bd63371689cab5ecd3d0d/charset_normalizer-3.4.3-cp311-cp311-win32.whl", hash = "sha256:6cf8fd4c04756b6b60146d98cd8a77d0cdae0e1ca20329da2ac85eed779b6849", size = 99616, upload-time = "2025-08-09T07:56:05.658Z" }, - { url = "https://files.pythonhosted.org/packages/f4/9c/996a4a028222e7761a96634d1820de8a744ff4327a00ada9c8942033089b/charset_normalizer-3.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:31a9a6f775f9bcd865d88ee350f0ffb0e25936a7f930ca98995c05abf1faf21c", size = 107108, upload-time = "2025-08-09T07:56:07.176Z" }, - { url = "https://files.pythonhosted.org/packages/e9/5e/14c94999e418d9b87682734589404a25854d5f5d0408df68bc15b6ff54bb/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1", size = 205655, upload-time = "2025-08-09T07:56:08.475Z" }, - { url = "https://files.pythonhosted.org/packages/7d/a8/c6ec5d389672521f644505a257f50544c074cf5fc292d5390331cd6fc9c3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884", size = 146223, upload-time = "2025-08-09T07:56:09.708Z" }, - { url = "https://files.pythonhosted.org/packages/fc/eb/a2ffb08547f4e1e5415fb69eb7db25932c52a52bed371429648db4d84fb1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018", size = 159366, upload-time = "2025-08-09T07:56:11.326Z" }, - { url = "https://files.pythonhosted.org/packages/82/10/0fd19f20c624b278dddaf83b8464dcddc2456cb4b02bb902a6da126b87a1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392", size = 157104, upload-time = "2025-08-09T07:56:13.014Z" }, - { url = "https://files.pythonhosted.org/packages/16/ab/0233c3231af734f5dfcf0844aa9582d5a1466c985bbed6cedab85af9bfe3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f", size = 151830, upload-time = "2025-08-09T07:56:14.428Z" }, - { url = "https://files.pythonhosted.org/packages/ae/02/e29e22b4e02839a0e4a06557b1999d0a47db3567e82989b5bb21f3fbbd9f/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154", size = 148854, upload-time = "2025-08-09T07:56:16.051Z" }, - { url = "https://files.pythonhosted.org/packages/05/6b/e2539a0a4be302b481e8cafb5af8792da8093b486885a1ae4d15d452bcec/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491", size = 160670, upload-time = "2025-08-09T07:56:17.314Z" }, - { url = "https://files.pythonhosted.org/packages/31/e7/883ee5676a2ef217a40ce0bffcc3d0dfbf9e64cbcfbdf822c52981c3304b/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93", size = 158501, upload-time = "2025-08-09T07:56:18.641Z" }, - { url = "https://files.pythonhosted.org/packages/c1/35/6525b21aa0db614cf8b5792d232021dca3df7f90a1944db934efa5d20bb1/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f", size = 153173, upload-time = "2025-08-09T07:56:20.289Z" }, - { url = "https://files.pythonhosted.org/packages/50/ee/f4704bad8201de513fdc8aac1cabc87e38c5818c93857140e06e772b5892/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37", size = 99822, upload-time = "2025-08-09T07:56:21.551Z" }, - { url = "https://files.pythonhosted.org/packages/39/f5/3b3836ca6064d0992c58c7561c6b6eee1b3892e9665d650c803bd5614522/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc", size = 107543, upload-time = "2025-08-09T07:56:23.115Z" }, - { url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" }, - { url = "https://files.pythonhosted.org/packages/71/11/98a04c3c97dd34e49c7d247083af03645ca3730809a5509443f3c37f7c99/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8", size = 146008, upload-time = "2025-08-09T07:56:26.004Z" }, - { url = "https://files.pythonhosted.org/packages/60/f5/4659a4cb3c4ec146bec80c32d8bb16033752574c20b1252ee842a95d1a1e/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9", size = 159196, upload-time = "2025-08-09T07:56:27.25Z" }, - { url = "https://files.pythonhosted.org/packages/86/9e/f552f7a00611f168b9a5865a1414179b2c6de8235a4fa40189f6f79a1753/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31", size = 156819, upload-time = "2025-08-09T07:56:28.515Z" }, - { url = "https://files.pythonhosted.org/packages/7e/95/42aa2156235cbc8fa61208aded06ef46111c4d3f0de233107b3f38631803/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f", size = 151350, upload-time = "2025-08-09T07:56:29.716Z" }, - { url = "https://files.pythonhosted.org/packages/c2/a9/3865b02c56f300a6f94fc631ef54f0a8a29da74fb45a773dfd3dcd380af7/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927", size = 148644, upload-time = "2025-08-09T07:56:30.984Z" }, - { url = "https://files.pythonhosted.org/packages/77/d9/cbcf1a2a5c7d7856f11e7ac2d782aec12bdfea60d104e60e0aa1c97849dc/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9", size = 160468, upload-time = "2025-08-09T07:56:32.252Z" }, - { url = "https://files.pythonhosted.org/packages/f6/42/6f45efee8697b89fda4d50580f292b8f7f9306cb2971d4b53f8914e4d890/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5", size = 158187, upload-time = "2025-08-09T07:56:33.481Z" }, - { url = "https://files.pythonhosted.org/packages/70/99/f1c3bdcfaa9c45b3ce96f70b14f070411366fa19549c1d4832c935d8e2c3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc", size = 152699, upload-time = "2025-08-09T07:56:34.739Z" }, - { url = "https://files.pythonhosted.org/packages/a3/ad/b0081f2f99a4b194bcbb1934ef3b12aa4d9702ced80a37026b7607c72e58/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce", size = 99580, upload-time = "2025-08-09T07:56:35.981Z" }, - { url = "https://files.pythonhosted.org/packages/9a/8f/ae790790c7b64f925e5c953b924aaa42a243fb778fed9e41f147b2a5715a/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef", size = 107366, upload-time = "2025-08-09T07:56:37.339Z" }, - { url = "https://files.pythonhosted.org/packages/8e/91/b5a06ad970ddc7a0e513112d40113e834638f4ca1120eb727a249fb2715e/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15", size = 204342, upload-time = "2025-08-09T07:56:38.687Z" }, - { url = "https://files.pythonhosted.org/packages/ce/ec/1edc30a377f0a02689342f214455c3f6c2fbedd896a1d2f856c002fc3062/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db", size = 145995, upload-time = "2025-08-09T07:56:40.048Z" }, - { url = "https://files.pythonhosted.org/packages/17/e5/5e67ab85e6d22b04641acb5399c8684f4d37caf7558a53859f0283a650e9/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d", size = 158640, upload-time = "2025-08-09T07:56:41.311Z" }, - { url = "https://files.pythonhosted.org/packages/f1/e5/38421987f6c697ee3722981289d554957c4be652f963d71c5e46a262e135/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096", size = 156636, upload-time = "2025-08-09T07:56:43.195Z" }, - { url = "https://files.pythonhosted.org/packages/a0/e4/5a075de8daa3ec0745a9a3b54467e0c2967daaaf2cec04c845f73493e9a1/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa", size = 150939, upload-time = "2025-08-09T07:56:44.819Z" }, - { url = "https://files.pythonhosted.org/packages/02/f7/3611b32318b30974131db62b4043f335861d4d9b49adc6d57c1149cc49d4/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049", size = 148580, upload-time = "2025-08-09T07:56:46.684Z" }, - { url = "https://files.pythonhosted.org/packages/7e/61/19b36f4bd67f2793ab6a99b979b4e4f3d8fc754cbdffb805335df4337126/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0", size = 159870, upload-time = "2025-08-09T07:56:47.941Z" }, - { url = "https://files.pythonhosted.org/packages/06/57/84722eefdd338c04cf3030ada66889298eaedf3e7a30a624201e0cbe424a/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92", size = 157797, upload-time = "2025-08-09T07:56:49.756Z" }, - { url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" }, - { url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" }, - { url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" }, - { url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" }, +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/b8/6d51fc1d52cbd52cd4ccedd5b5b2f0f6a11bbf6765c782298b0f3e808541/charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", size = 209709, upload-time = "2025-10-14T04:40:11.385Z" }, + { url = "https://files.pythonhosted.org/packages/5c/af/1f9d7f7faafe2ddfb6f72a2e07a548a629c61ad510fe60f9630309908fef/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", size = 148814, upload-time = "2025-10-14T04:40:13.135Z" }, + { url = "https://files.pythonhosted.org/packages/79/3d/f2e3ac2bbc056ca0c204298ea4e3d9db9b4afe437812638759db2c976b5f/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", size = 144467, upload-time = "2025-10-14T04:40:14.728Z" }, + { url = "https://files.pythonhosted.org/packages/ec/85/1bf997003815e60d57de7bd972c57dc6950446a3e4ccac43bc3070721856/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", size = 162280, upload-time = "2025-10-14T04:40:16.14Z" }, + { url = "https://files.pythonhosted.org/packages/3e/8e/6aa1952f56b192f54921c436b87f2aaf7c7a7c3d0d1a765547d64fd83c13/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", size = 159454, upload-time = "2025-10-14T04:40:17.567Z" }, + { url = "https://files.pythonhosted.org/packages/36/3b/60cbd1f8e93aa25d1c669c649b7a655b0b5fb4c571858910ea9332678558/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", size = 153609, upload-time = "2025-10-14T04:40:19.08Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/6a13396948b8fd3c4b4fd5bc74d045f5637d78c9675585e8e9fbe5636554/charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", size = 151849, upload-time = "2025-10-14T04:40:20.607Z" }, + { url = "https://files.pythonhosted.org/packages/b7/7a/59482e28b9981d105691e968c544cc0df3b7d6133152fb3dcdc8f135da7a/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", size = 151586, upload-time = "2025-10-14T04:40:21.719Z" }, + { url = "https://files.pythonhosted.org/packages/92/59/f64ef6a1c4bdd2baf892b04cd78792ed8684fbc48d4c2afe467d96b4df57/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", size = 145290, upload-time = "2025-10-14T04:40:23.069Z" }, + { url = "https://files.pythonhosted.org/packages/6b/63/3bf9f279ddfa641ffa1962b0db6a57a9c294361cc2f5fcac997049a00e9c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", size = 163663, upload-time = "2025-10-14T04:40:24.17Z" }, + { url = "https://files.pythonhosted.org/packages/ed/09/c9e38fc8fa9e0849b172b581fd9803bdf6e694041127933934184e19f8c3/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", size = 151964, upload-time = "2025-10-14T04:40:25.368Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d1/d28b747e512d0da79d8b6a1ac18b7ab2ecfd81b2944c4c710e166d8dd09c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", size = 161064, upload-time = "2025-10-14T04:40:26.806Z" }, + { url = "https://files.pythonhosted.org/packages/bb/9a/31d62b611d901c3b9e5500c36aab0ff5eb442043fb3a1c254200d3d397d9/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", size = 155015, upload-time = "2025-10-14T04:40:28.284Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/107e008fa2bff0c8b9319584174418e5e5285fef32f79d8ee6a430d0039c/charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", size = 99792, upload-time = "2025-10-14T04:40:29.613Z" }, + { url = "https://files.pythonhosted.org/packages/eb/66/e396e8a408843337d7315bab30dbf106c38966f1819f123257f5520f8a96/charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", size = 107198, upload-time = "2025-10-14T04:40:30.644Z" }, + { url = "https://files.pythonhosted.org/packages/b5/58/01b4f815bf0312704c267f2ccb6e5d42bcc7752340cd487bc9f8c3710597/charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", size = 100262, upload-time = "2025-10-14T04:40:32.108Z" }, + { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" }, + { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" }, + { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" }, + { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" }, + { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" }, + { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" }, + { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" }, + { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" }, + { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" }, + { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" }, + { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" }, + { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" }, + { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" }, + { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" }, + { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" }, + { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" }, + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, ] [[package]] @@ -1023,7 +1048,7 @@ wheels = [ [[package]] name = "google-genai" -version = "1.43.0" +version = "1.44.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1035,9 +1060,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c1/75/992ca4462682949750709678b8efbc865222c9a16cf34504b69c5459606c/google_genai-1.43.0.tar.gz", hash = "sha256:84eb219d320759c5882bc2cdb4e2ac84544d00f5d12c7892c79fb03d71bfc9a4", size = 236132, upload-time = "2025-10-10T23:16:40.131Z" } +sdist = { url = "https://files.pythonhosted.org/packages/62/c4/7edde80ee4c0622f740008f927a11f8c8b0c7d6457f219fc698c27a3a377/google_genai-1.44.0.tar.gz", hash = "sha256:7df8c42505900714fea98ed0d03c06ed18323368ef9dceff74645631fd7a7650", size = 236659, upload-time = "2025-10-15T03:32:49.653Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/85/e90dda488d5044e6e4cd1b49e7e7f0cc7f4a2a1c8004e88a5122d42ea024/google_genai-1.43.0-py3-none-any.whl", hash = "sha256:be1d4b1acab268125d536fd81b73c38694a70cb08266759089154718924434fd", size = 236733, upload-time = "2025-10-10T23:16:38.809Z" }, + { url = "https://files.pythonhosted.org/packages/dc/4d/1f64941ec95c004c290db6287c1e8a0906ed00a0656075ad705cecb12e7d/google_genai-1.44.0-py3-none-any.whl", hash = "sha256:4732ffd56e5f7c89f2440e0baf9a78b91b01a2a22db1b451a60b4417b3bcfd74", size = 237302, upload-time = "2025-10-15T03:32:47.839Z" }, ] [[package]] @@ -2588,7 +2613,7 @@ crypto = [ [[package]] name = "pylint" -version = "4.0.0" +version = "4.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "astroid" }, @@ -2600,9 +2625,9 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "tomlkit" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b6/2f/e80cc4301c81c41a8836d726377daeebf5901a33c06ba8c2d5afb94f7612/pylint-4.0.0.tar.gz", hash = "sha256:62da212808c0681e49ffb125f0a994c685d912cf19ae373075649ebb5870ec28", size = 1567676, upload-time = "2025-10-12T15:21:15.165Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/3e/fa6b9d708486502b96ec2cd87d9266168dac8d7391a14a89738b88ae6379/pylint-4.0.1.tar.gz", hash = "sha256:06db6a1fda3cedbd7aee58f09d241e40e5f14b382fd035ed97be320f11728a84", size = 1568430, upload-time = "2025-10-15T05:40:55.071Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/af/068a0b92c49927ada0e177561244157dc9d122eeea5987e34c423172a296/pylint-4.0.0-py3-none-any.whl", hash = "sha256:196b92a85204bb0c0a416a6bb324f6185e59ff1d687ee1d614bf0abf34a348e8", size = 535836, upload-time = "2025-10-12T15:21:13.041Z" }, + { url = "https://files.pythonhosted.org/packages/69/ee/59269b7559a1d500acdba8722b995df2aa2946a71cbeeee07648256e9dae/pylint-4.0.1-py3-none-any.whl", hash = "sha256:6077ac21d01b7361eae6ed0f38d9024c02732fdc635d9e154d4fe6063af8ac56", size = 535937, upload-time = "2025-10-15T05:40:53.052Z" }, ] [[package]] From 1162cc42031a6f17a3955f2fee289bf9f9959602 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 11:39:41 +0200 Subject: [PATCH 088/115] fix tests --- .../stuffs/test_stuff_factory_make_stuff_from_content.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py b/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py index deb9125c2..fd18cc96e 100644 --- a/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py +++ b/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py @@ -32,7 +32,7 @@ class TestData: EMPTY_LIST_CONTENT: ClassVar[ListContent[TextContent]] = ListContent(items=[]) # Dictionary test data - native concept - NATIVE_TEXT_DICT: ClassVar[dict[str, Any]] = {"concept": NativeConceptCode.TEXT, "content": {"text": "Native text content"}} + NATIVE_TEXT_DICT: ClassVar[dict[str, Any]] = {"concept": NativeConceptCode.TEXT.concept_string, "content": {"text": "Native text content"}} # Dictionary test data - custom concept with concept field CUSTOM_CONCEPT_DICT: ClassVar[dict[str, Any]] = { @@ -203,10 +203,16 @@ def test_string_input(self, mocker: MockerFixture): def test_dict_with_native_concept(self, mocker: MockerFixture): """Test dictionary with native concept.""" + # Mock ConceptFactory to handle native concept creation mock_concept = mocker.Mock() mock_concept_factory = mocker.patch("pipelex.core.stuffs.stuff_factory.ConceptFactory") mock_concept_factory.make_native_concept.return_value = mock_concept + # Mock the make_domain_and_concept_code method + mock_domain_and_code = mocker.Mock() + mock_domain_and_code.concept_code = "Text" + mock_concept_factory.make_domain_and_concept_code_from_concept_string_or_code.return_value = mock_domain_and_code + mock_content = mocker.Mock(spec=StuffContent) mock_content_factory = mocker.patch("pipelex.core.stuffs.stuff_factory.StuffContentFactory") mock_content_factory.make_stuff_content_from_concept_with_fallback.return_value = mock_content From ea59e31ebcc17a194bcc29fde99329dff1deb525 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 13:24:20 +0200 Subject: [PATCH 089/115] ok --- pipelex/core/pipes/pipe_abstract.py | 4 +- pipelex/core/pipes/pipe_library.py | 4 +- pipelex/core/pipes/pipe_library_abstract.py | 2 +- pipelex/hub.py | 52 +++-- pipelex/libraries/library_manager.py | 196 +++++++++++++++--- pipelex/libraries/library_manager_abstract.py | 36 +++- pipelex/libraries/library_manager_factory.py | 31 +-- .../pipe_operators/img_gen/pipe_img_gen.py | 6 +- pipelex/pipe_operators/llm/pipe_llm.py | 16 +- pipelex/pipelex.py | 20 +- pipelex/pipeline/execute.py | 2 + tests/integration/pipelex/test_libraries.py | 14 +- 12 files changed, 260 insertions(+), 123 deletions(-) diff --git a/pipelex/core/pipes/pipe_abstract.py b/pipelex/core/pipes/pipe_abstract.py index f93a60642..3e42b5319 100644 --- a/pipelex/core/pipes/pipe_abstract.py +++ b/pipelex/core/pipes/pipe_abstract.py @@ -31,10 +31,10 @@ def validate_pipe_code_syntax(cls, code: str) -> str: return code @abstractmethod - def validate_output(self): + def validate_output(self, pipeline_run_id: str | None = None): """Validate the output for the pipe.""" - def validate_with_libraries(self): + def validate_with_libraries(self, pipeline_run_id: str | None = None): """Validate the pipe with the libraries, after the static validation""" @abstractmethod diff --git a/pipelex/core/pipes/pipe_library.py b/pipelex/core/pipes/pipe_library.py index 47128d78d..7ff587a0e 100644 --- a/pipelex/core/pipes/pipe_library.py +++ b/pipelex/core/pipes/pipe_library.py @@ -17,10 +17,10 @@ class PipeLibrary(RootModel[PipeLibraryRoot], PipeLibraryAbstract): @override - def validate_with_libraries(self): + def validate_with_libraries(self, pipeline_run_id: str | None = None): concept_library = get_concept_library() for pipe in self.root.values(): - pipe.validate_output() + pipe.validate_output(pipeline_run_id=pipeline_run_id) try: for concept in pipe.concept_dependencies(): try: diff --git a/pipelex/core/pipes/pipe_library_abstract.py b/pipelex/core/pipes/pipe_library_abstract.py index 434fae582..3f135d96f 100644 --- a/pipelex/core/pipes/pipe_library_abstract.py +++ b/pipelex/core/pipes/pipe_library_abstract.py @@ -5,7 +5,7 @@ class PipeLibraryAbstract(ABC): @abstractmethod - def validate_with_libraries(self) -> None: + def validate_with_libraries(self, pipeline_run_id: str | None = None) -> None: pass @abstractmethod diff --git a/pipelex/hub.py b/pipelex/hub.py index 53321d316..c4457873e 100644 --- a/pipelex/hub.py +++ b/pipelex/hub.py @@ -234,21 +234,27 @@ def get_required_content_generator(self) -> ContentGeneratorProtocol: # pipelex - def get_required_domain_library(self) -> DomainLibraryAbstract: + def get_required_domain_library(self, pipeline_run_id: str | None = None) -> DomainLibraryAbstract: + if self._library_manager is not None: + return self._library_manager.get_domain_library(pipeline_run_id=pipeline_run_id) if self._domain_library is None: - msg = "DomainLibrary is not initialized" + msg = "DomainLibrary with pipeline_run_id '{pipeline_run_id}' is not initialized" raise RuntimeError(msg) return self._domain_library - def get_required_concept_library(self) -> ConceptLibraryAbstract: + def get_required_concept_library(self, pipeline_run_id: str | None = None) -> ConceptLibraryAbstract: + if self._library_manager is not None: + return self._library_manager.get_concept_library(pipeline_run_id=pipeline_run_id) if self._concept_library is None: - msg = "ConceptLibrary is not initialized" + msg = "ConceptLibrary with pipeline_run_id '{pipeline_run_id}' is not initialized" raise RuntimeError(msg) return self._concept_library - def get_required_pipe_library(self) -> PipeLibraryAbstract: + def get_required_pipe_library(self, pipeline_run_id: str | None = None) -> PipeLibraryAbstract: + if self._library_manager is not None: + return self._library_manager.get_pipe_library(pipeline_run_id=pipeline_run_id) if self._pipe_library is None: - msg = "PipeLibrary is not initialized" + msg = "PipeLibrary with pipeline_run_id '{pipeline_run_id}' is not initialized" raise RuntimeError(msg) return self._pipe_library @@ -373,36 +379,36 @@ def get_secret(secret_id: str) -> str: return get_secrets_provider().get_secret(secret_id=secret_id) -def get_required_domain(domain: str) -> Domain: - return get_pipelex_hub().get_required_domain_library().get_required_domain(domain=domain) +def get_required_domain(domain: str, pipeline_run_id: str | None = None) -> Domain: + return get_pipelex_hub().get_required_domain_library(pipeline_run_id=pipeline_run_id).get_required_domain(domain=domain) -def get_optional_domain(domain: str) -> Domain | None: - return get_pipelex_hub().get_required_domain_library().get_domain(domain=domain) +def get_optional_domain(domain: str, pipeline_run_id: str | None = None) -> Domain | None: + return get_pipelex_hub().get_required_domain_library(pipeline_run_id=pipeline_run_id).get_domain(domain=domain) -def get_pipe_library() -> PipeLibraryAbstract: - return get_pipelex_hub().get_required_pipe_library() +def get_pipe_library(pipeline_run_id: str | None = None) -> PipeLibraryAbstract: + return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id) -def get_pipes() -> list[PipeAbstract]: - return get_pipelex_hub().get_required_pipe_library().get_pipes() +def get_pipes(pipeline_run_id: str | None = None) -> list[PipeAbstract]: + return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id).get_pipes() -def get_required_pipe(pipe_code: str) -> PipeAbstract: - return get_pipelex_hub().get_required_pipe_library().get_required_pipe(pipe_code=pipe_code) +def get_required_pipe(pipe_code: str, pipeline_run_id: str | None = None) -> PipeAbstract: + return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id).get_required_pipe(pipe_code=pipe_code) -def get_optional_pipe(pipe_code: str) -> PipeAbstract | None: - return get_pipelex_hub().get_required_pipe_library().get_optional_pipe(pipe_code=pipe_code) +def get_optional_pipe(pipe_code: str, pipeline_run_id: str | None = None) -> PipeAbstract | None: + return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id).get_optional_pipe(pipe_code=pipe_code) -def get_concept_library() -> ConceptLibraryAbstract: - return get_pipelex_hub().get_required_concept_library() +def get_concept_library(pipeline_run_id: str | None = None) -> ConceptLibraryAbstract: + return get_pipelex_hub().get_required_concept_library(pipeline_run_id=pipeline_run_id) -def get_required_concept(concept_string: str) -> Concept: - return get_pipelex_hub().get_required_concept_library().get_required_concept(concept_string=concept_string) +def get_required_concept(concept_string: str, pipeline_run_id: str | None = None) -> Concept: + return get_pipelex_hub().get_required_concept_library(pipeline_run_id=pipeline_run_id).get_required_concept(concept_string=concept_string) def get_pipe_router() -> PipeRouterProtocol: @@ -434,4 +440,4 @@ def get_observer_provider() -> ObserverProtocol: def get_native_concept(native_concept: NativeConceptCode) -> Concept: - return get_pipelex_hub().get_required_concept_library().get_native_concept(native_concept=native_concept) + return get_pipelex_hub().get_required_concept_library(pipeline_run_id=pipeline_run_id).get_native_concept(native_concept=native_concept) diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 8a55f93f1..b9809b687 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -37,6 +37,7 @@ get_pipelex_package_dir_for_imports, get_pipelex_plx_files_from_package, ) +from pipelex.pipeline.pipeline_models import SpecialPipelineId from pipelex.system.configuration.config_loader import config_manager from pipelex.system.registries.class_registry_utils import ClassRegistryUtils from pipelex.system.registries.func_registry_utils import FuncRegistryUtils @@ -65,39 +66,142 @@ class LibraryManager(LibraryManagerAbstract): "prompt_template_to_structure", ] - def __init__( - self, - domain_library: DomainLibrary, - concept_library: ConceptLibrary, - pipe_library: PipeLibrary, - ): - self.domain_library = domain_library - self.concept_library = concept_library - self.pipe_library = pipe_library - - @override - def validate_libraries(self): - log.debug("LibraryManager validating libraries") + def __init__(self): + self._domain_libraries: dict[str, DomainLibrary] = {SpecialPipelineId.UNTITLED: DomainLibrary.make_empty()} + self._concept_libraries: dict[str, ConceptLibrary] = {SpecialPipelineId.UNTITLED: ConceptLibrary.make_empty()} + self._pipe_libraries: dict[str, PipeLibrary] = {SpecialPipelineId.UNTITLED: PipeLibrary.make_empty()} - self.concept_library.validate_with_libraries() - self.pipe_library.validate_with_libraries() - self.domain_library.validate_with_libraries() + ############################################################ + # Manager lifecycle + ############################################################ @override def setup(self) -> None: - self.concept_library.setup() + self._domain_libraries.clear() + self._concept_libraries.clear() + self._pipe_libraries.clear() + concept_library = ConceptLibrary.make_empty() + concept_library.setup() + self._domain_libraries[SpecialPipelineId.UNTITLED] = DomainLibrary.make_empty() + self._concept_libraries[SpecialPipelineId.UNTITLED] = concept_library + self._pipe_libraries[SpecialPipelineId.UNTITLED] = PipeLibrary.make_empty() @override def teardown(self) -> None: - self.pipe_library.teardown() - self.concept_library.teardown() - self.domain_library.teardown() + for pipe_library in self._pipe_libraries.values(): + pipe_library.teardown() + for concept_library in self._concept_libraries.values(): + concept_library.teardown() + for domain_library in self._domain_libraries.values(): + domain_library.teardown() + self._pipe_libraries.clear() + self._concept_libraries.clear() + self._domain_libraries.clear() @override def reset(self) -> None: self.teardown() self.setup() + @override + def open_library(self, pipeline_run_id: str) -> None: + if pipeline_run_id in self._domain_libraries: + msg = f"Library for pipeline '{pipeline_run_id}' already exists" + raise LibraryError(msg) + + concept_library = ConceptLibrary.make_empty() + concept_library.setup() + self._domain_libraries[pipeline_run_id] = DomainLibrary.make_empty() + self._concept_libraries[pipeline_run_id] = concept_library + self._pipe_libraries[pipeline_run_id] = PipeLibrary.make_empty() + + @override + def close_library(self, pipeline_run_id: str) -> None: + if pipeline_run_id in self._pipe_libraries: + self._pipe_libraries[pipeline_run_id].teardown() + self._pipe_libraries.pop(pipeline_run_id) + if pipeline_run_id in self._concept_libraries: + self._concept_libraries[pipeline_run_id].teardown() + self._concept_libraries.pop(pipeline_run_id) + if pipeline_run_id in self._domain_libraries: + self._domain_libraries[pipeline_run_id].teardown() + self._domain_libraries.pop(pipeline_run_id) + + ############################################################ + # Public library accessors + ############################################################ + + @override + def get_domain_library(self, pipeline_run_id: str | None = None) -> DomainLibrary: + if pipeline_run_id is None: + pipeline_run_id = SpecialPipelineId.UNTITLED + if pipeline_run_id not in self._domain_libraries: + msg = f"Domain library for pipeline '{pipeline_run_id}' does not exist" + raise LibraryError(msg) + return self._domain_libraries[pipeline_run_id] + + @override + def get_concept_library(self, pipeline_run_id: str | None = None) -> ConceptLibrary: + if pipeline_run_id is None: + pipeline_run_id = SpecialPipelineId.UNTITLED + if pipeline_run_id not in self._concept_libraries: + msg = f"Concept library for pipeline '{pipeline_run_id}' does not exist" + raise LibraryError(msg) + return self._concept_libraries[pipeline_run_id] + + @override + def get_pipe_library(self, pipeline_run_id: str | None = None) -> PipeLibrary: + if pipeline_run_id is None: + pipeline_run_id = SpecialPipelineId.UNTITLED + if pipeline_run_id not in self._pipe_libraries: + msg = f"Pipe library for pipeline '{pipeline_run_id}' does not exist" + raise LibraryError(msg) + return self._pipe_libraries[pipeline_run_id] + + ############################################################ + # Private methods + ############################################################ + + def _get_domain_library(self, pipeline_run_id: str) -> DomainLibrary: + """Internal helper that requires explicit pipeline_run_id.""" + if pipeline_run_id not in self._domain_libraries: + msg = f"Domain library for pipeline '{pipeline_run_id}' does not exist" + raise LibraryError(msg) + return self._domain_libraries[pipeline_run_id] + + def _get_concept_library(self, pipeline_run_id: str) -> ConceptLibrary: + """Internal helper that requires explicit pipeline_run_id.""" + if pipeline_run_id not in self._concept_libraries: + msg = f"Concept library for pipeline '{pipeline_run_id}' does not exist" + raise LibraryError(msg) + return self._concept_libraries[pipeline_run_id] + + def _get_pipe_library(self, pipeline_run_id: str) -> PipeLibrary: + """Internal helper that requires explicit pipeline_run_id.""" + if pipeline_run_id not in self._pipe_libraries: + msg = f"Pipe library for pipeline '{pipeline_run_id}' does not exist" + raise LibraryError(msg) + return self._pipe_libraries[pipeline_run_id] + + ############################################################ + # LibraryManagerAbstract + ############################################################ + + @override + def validate_libraries(self, pipeline_run_id: str | None = None): + log.debug("LibraryManager validating libraries") + + if pipeline_run_id is None: + pipeline_run_id = SpecialPipelineId.UNTITLED + + concept_library = self._get_concept_library(pipeline_run_id) + pipe_library = self._get_pipe_library(pipeline_run_id) + domain_library = self._get_domain_library(pipeline_run_id) + + concept_library.validate_with_libraries() + pipe_library.validate_with_libraries() + domain_library.validate_with_libraries() + def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: """Get all valid Pipelex PLX files from the given directories.""" all_plx_paths: list[Path] = [] @@ -133,15 +237,22 @@ def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: return all_plx_paths @override - def load_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbstract]: + def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> list[PipeAbstract]: """Load a blueprint.""" + if pipeline_run_id is None: + pipeline_run_id = SpecialPipelineId.UNTITLED + + domain_library = self._get_domain_library(pipeline_run_id) + concept_library = self._get_concept_library(pipeline_run_id) + pipe_library = self._get_pipe_library(pipeline_run_id) + # Create and load domain try: domain = self._load_domain_from_blueprint(blueprint) except DomainDefinitionError as exc: msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {exc}" raise DomainLoadingError(message=msg, domain_code=exc.domain_code, description=exc.description, source=exc.source) from exc - self.domain_library.add_domain(domain=domain) + domain_library.add_domain(domain=domain) # Create and load concepts try: @@ -151,7 +262,7 @@ def load_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbs raise ConceptLoadingError( message=msg, concept_definition_error=exc, concept_code=exc.concept_code, description=exc.description, source=exc.source ) from exc - self.concept_library.add_concepts(concepts=concepts) + concept_library.add_concepts(concepts=concepts) # Create and load pipes try: @@ -161,14 +272,21 @@ def load_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbs raise PipeLoadingError( message=msg, pipe_definition_error=exc, pipe_code=exc.pipe_code or "", description=exc.description or "", source=exc.source ) from exc - self.pipe_library.add_pipes(pipes=pipes) + pipe_library.add_pipes(pipes=pipes) return pipes @override - def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> None: + def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> None: + if pipeline_run_id is None: + pipeline_run_id = SpecialPipelineId.UNTITLED + + domain_library = self._get_domain_library(pipeline_run_id) + concept_library = self._get_concept_library(pipeline_run_id) + pipe_library = self._get_pipe_library(pipeline_run_id) + if blueprint.pipe is not None: - self.pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) + pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) # Remove concepts (they may depend on domain) if blueprint.concept is not None: @@ -176,9 +294,9 @@ def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> None: ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) for concept_code in blueprint.concept ] - self.concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) + concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) - self.domain_library.remove_domain_by_code(domain_code=blueprint.domain) + domain_library.remove_domain_by_code(domain_code=blueprint.domain) def _load_domain_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> Domain: return DomainFactory.make_from_blueprint( @@ -235,9 +353,25 @@ def _import_pipelex_modules_directly(self) -> None: @override def load_libraries( self, + pipeline_run_id: str | None = None, library_dirs: list[Path] | None = None, library_file_paths: list[Path] | None = None, ) -> None: + if pipeline_run_id is None: + pipeline_run_id = SpecialPipelineId.UNTITLED + + # Ensure libraries exist for this pipeline_run_id + if pipeline_run_id not in self._domain_libraries: + if pipeline_run_id == SpecialPipelineId.UNTITLED: + # Auto-setup for UNTITLED if not already done + self.setup() + else: + msg = f"Libraries for pipeline '{pipeline_run_id}' do not exist. Call open_library() first." + raise LibraryError(msg) + + domain_library = self._get_domain_library(pipeline_run_id) + concept_library = self._get_concept_library(pipeline_run_id) + pipe_library = self._get_pipe_library(pipeline_run_id) # Collect directories to scan (user project directories) user_dirs: set[Path] = set() if library_dirs: @@ -346,7 +480,7 @@ def load_libraries( msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_domains.append(domain) - self.domain_library.add_domains(domains=all_domains) + domain_library.add_domains(domains=all_domains) # Load all concepts second all_concepts: list[Concept] = [] @@ -361,7 +495,7 @@ def load_libraries( msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_concepts.extend(concepts) - self.concept_library.add_concepts(concepts=all_concepts) + concept_library.add_concepts(concepts=all_concepts) # Load all pipes third all_pipes: list[PipeAbstract] = [] @@ -376,4 +510,4 @@ def load_libraries( msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" raise LibraryLoadingError(msg) from validation_error all_pipes.extend(pipes) - self.pipe_library.add_pipes(pipes=all_pipes) + pipe_library.add_pipes(pipes=all_pipes) diff --git a/pipelex/libraries/library_manager_abstract.py b/pipelex/libraries/library_manager_abstract.py index 9c23a9f09..5d4b23609 100644 --- a/pipelex/libraries/library_manager_abstract.py +++ b/pipelex/libraries/library_manager_abstract.py @@ -2,7 +2,10 @@ from pathlib import Path from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint +from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract +from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract from pipelex.core.pipes.pipe_abstract import PipeAbstract +from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract class LibraryManagerAbstract(ABC): @@ -19,17 +22,42 @@ def reset(self) -> None: pass @abstractmethod - def validate_libraries(self) -> None: + def open_library(self, pipeline_run_id: str) -> None: pass @abstractmethod - def load_libraries(self, library_dirs: list[Path] | None = None, library_file_paths: list[Path] | None = None) -> None: + def close_library(self, pipeline_run_id: str) -> None: pass @abstractmethod - def load_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbstract]: + def get_domain_library(self, pipeline_run_id: str | None = None) -> DomainLibraryAbstract: pass @abstractmethod - def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> None: + def get_concept_library(self, pipeline_run_id: str | None = None) -> ConceptLibraryAbstract: + pass + + @abstractmethod + def get_pipe_library(self, pipeline_run_id: str | None = None) -> PipeLibraryAbstract: + pass + + @abstractmethod + def validate_libraries(self, pipeline_run_id: str | None = None) -> None: + pass + + @abstractmethod + def load_libraries( + self, + pipeline_run_id: str | None = None, + library_dirs: list[Path] | None = None, + library_file_paths: list[Path] | None = None, + ) -> None: + pass + + @abstractmethod + def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> list[PipeAbstract]: + pass + + @abstractmethod + def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> None: pass diff --git a/pipelex/libraries/library_manager_factory.py b/pipelex/libraries/library_manager_factory.py index 47631d1df..d1430821f 100644 --- a/pipelex/libraries/library_manager_factory.py +++ b/pipelex/libraries/library_manager_factory.py @@ -1,34 +1,7 @@ -from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.domains.domain_library import DomainLibrary -from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.libraries.library_manager import LibraryManager class LibraryManagerFactory: - """Factory for creating LibraryManager instances.""" - - @classmethod - def make_empty(cls) -> "LibraryManager": - domain_library = DomainLibrary.make_empty() - concept_library = ConceptLibrary.make_empty() - pipe_library = PipeLibrary.make_empty() - - return LibraryManager( - domain_library=domain_library, - concept_library=concept_library, - pipe_library=pipe_library, - ) - @classmethod - def make( - cls, - domain_library: DomainLibrary, - concept_library: ConceptLibrary, - pipe_library: PipeLibrary, - ) -> "LibraryManager": - """Create a LibraryManager with provided libraries.""" - return LibraryManager( - domain_library=domain_library, - concept_library=concept_library, - pipe_library=pipe_library, - ) + def make_empty(cls) -> LibraryManager: + return LibraryManager() diff --git a/pipelex/pipe_operators/img_gen/pipe_img_gen.py b/pipelex/pipe_operators/img_gen/pipe_img_gen.py index 0cd6b8d43..f9be6c8d8 100644 --- a/pipelex/pipe_operators/img_gen/pipe_img_gen.py +++ b/pipelex/pipe_operators/img_gen/pipe_img_gen.py @@ -97,14 +97,14 @@ def validate_inputs(self) -> Self: return self @override - def validate_with_libraries(self): + def validate_with_libraries(self, pipeline_run_id: str | None = None): self._validate_inputs() if self.img_gen: check_img_gen_choice_with_deck(img_gen_choice=self.img_gen) @override - def validate_output(self): - if not get_concept_library().is_compatible( + def validate_output(self, pipeline_run_id: str | None = None): + if not get_concept_library(pipeline_run_id=pipeline_run_id).is_compatible( tested_concept=self.output, wanted_concept=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True, diff --git a/pipelex/pipe_operators/llm/pipe_llm.py b/pipelex/pipe_operators/llm/pipe_llm.py index 2a5280f5f..a4a3642c9 100644 --- a/pipelex/pipe_operators/llm/pipe_llm.py +++ b/pipelex/pipe_operators/llm/pipe_llm.py @@ -83,7 +83,7 @@ def validate_output_concept_consistency(self) -> Self: return self @override - def validate_with_libraries(self): + def validate_with_libraries(self, pipeline_run_id: str | None = None): llm_config = get_config().cogt.llm_config self.validate_inputs() self.llm_prompt_spec.validate_with_libraries() @@ -96,7 +96,7 @@ def validate_with_libraries(self): check_llm_choice_with_deck(llm_choice=llm_choice) @override - def validate_output(self): + def validate_output(self, pipeline_run_id: str | None = None): if get_concept_library().is_compatible( tested_concept=self.output, wanted_concept=get_native_concept(native_concept=NativeConceptCode.IMAGE), @@ -195,6 +195,7 @@ async def _run_operator_pipe( else: output_concept = get_required_concept( concept_string=ConceptFactory.make_concept_string_with_domain(domain=self.domain, concept_code=output_concept_code), + pipeline_run_id=job_metadata.pipeline_run_id, ) multiplicity_resolution = output_multiplicity_to_apply( @@ -289,9 +290,9 @@ async def _run_operator_pipe( llm_prompt_2_factory = None case StructuringMethod.PRELIMINARY_TEXT: log.verbose(f"Creating llm_prompt_2_factory for pipe {self.code} with structuring_method {structuring_method}") - pipe = get_required_pipe(pipe_code=self.code) + pipe = get_required_pipe(pipe_code=self.code, pipeline_run_id=job_metadata.pipeline_run_id) # TODO: run_pipe() could get the domain at the same time as the pip_code - domain = get_required_domain(domain=pipe.domain) + domain = get_required_domain(domain=pipe.domain, pipeline_run_id=job_metadata.pipeline_run_id) prompt_template_to_structure = ( self.prompt_template_to_structure or domain.prompt_template_to_structure @@ -309,7 +310,7 @@ async def _run_operator_pipe( log.debug(f"PipeLLM pipe_code is '{self.code}' and is_default_text_then_structure") # TODO: run_pipe() should get the domain along with the pip_code if the_pipe := get_optional_pipe(pipe_code=self.code): - domain = get_required_domain(domain=the_pipe.domain) + domain = get_required_domain(domain=the_pipe.domain, pipeline_run_id=job_metadata.pipeline_run_id) else: domain = Domain.make_default() prompt_template_to_structure = ( @@ -334,6 +335,7 @@ async def _run_operator_pipe( output_structure_prompt = await PipeLLM.get_output_structure_prompt( concept_string=pipe_run_params.dynamic_output_concept_code or output_concept.concept_string, is_with_preliminary_text=is_with_preliminary_text, + pipeline_run_id=job_metadata.pipeline_run_id, ) llm_prompt_1_for_object = await self.llm_prompt_spec.make_llm_prompt( output_concept_string=output_concept.concept_string, @@ -470,8 +472,8 @@ async def _dry_run_operator_pipe( ) @staticmethod - async def get_output_structure_prompt(concept_string: str, is_with_preliminary_text: bool) -> str | None: - concept = get_required_concept(concept_string=concept_string) + async def get_output_structure_prompt(concept_string: str, is_with_preliminary_text: bool, pipeline_run_id: str | None = None) -> str | None: + concept = get_required_concept(concept_string=concept_string, pipeline_run_id=pipeline_run_id) output_class = get_class_registry().get_class(concept.structure_class_name) log.debug(f"get_output_structure_prompt for {concept_string} with {is_with_preliminary_text=}") log.debug(f"output_class: {output_class}") diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index a38b9f575..4e9a54e26 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -25,9 +25,6 @@ from pipelex.cogt.models.model_manager import ModelManager from pipelex.cogt.models.model_manager_abstract import ModelManagerAbstract from pipelex.config import PipelexConfig, get_config -from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.domains.domain_library import DomainLibrary -from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.core.registry_models import CoreRegistryModels from pipelex.core.validation import report_validation_error from pipelex.exceptions import PipelexConfigError, PipelexSetupError @@ -121,18 +118,7 @@ def __init__( self.pipelex_hub.set_report_delegate(self.reporting_delegate) # pipelex libraries - domain_library = DomainLibrary.make_empty() - concept_library = ConceptLibrary.make_empty() - pipe_library = PipeLibrary.make_empty() - self.pipelex_hub.set_domain_library(domain_library=domain_library) - self.pipelex_hub.set_concept_library(concept_library=concept_library) - self.pipelex_hub.set_pipe_library(pipe_library=pipe_library) - - self.library_manager = LibraryManagerFactory.make( - domain_library=domain_library, - concept_library=concept_library, - pipe_library=pipe_library, - ) + self.library_manager = LibraryManagerFactory.make_empty() self.pipelex_hub.set_library_manager(library_manager=self.library_manager) # pipelex pipeline @@ -262,6 +248,10 @@ def setup( def setup_libraries(self): self.library_manager.setup() self.library_manager.load_libraries() + # Set the UNTITLED libraries in the hub for backward compatibility + self.pipelex_hub.set_domain_library(domain_library=self.library_manager.get_domain_library()) + self.pipelex_hub.set_concept_library(concept_library=self.library_manager.get_concept_library()) + self.pipelex_hub.set_pipe_library(pipe_library=self.library_manager.get_pipe_library()) log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}") def validate_libraries(self): diff --git a/pipelex/pipeline/execute.py b/pipelex/pipeline/execute.py index ed0d789a7..42a29e2ba 100644 --- a/pipelex/pipeline/execute.py +++ b/pipelex/pipeline/execute.py @@ -4,6 +4,7 @@ from pipelex.core.pipes.pipe_output import PipeOutput from pipelex.exceptions import PipelineInputError from pipelex.hub import ( + get_library_manager, get_pipe_router, get_pipeline_manager, get_report_delegate, @@ -88,6 +89,7 @@ async def execute_pipeline( pipeline = get_pipeline_manager().add_new_pipeline() pipeline_run_id = pipeline.pipeline_run_id get_report_delegate().open_registry(pipeline_run_id=pipeline_run_id) + get_library_manager().open_library(pipeline_run_id=pipeline_run_id) job_metadata = JobMetadata( pipeline_run_id=pipeline_run_id, diff --git a/tests/integration/pipelex/test_libraries.py b/tests/integration/pipelex/test_libraries.py index 42fca143f..560a49a36 100644 --- a/tests/integration/pipelex/test_libraries.py +++ b/tests/integration/pipelex/test_libraries.py @@ -89,13 +89,15 @@ def test_load_combo_libraries( test_pipelines_dir = [Path(LibraryTestCases.TEST_PIPELINES_DIR_PATH)] library_manager.load_libraries(library_dirs=test_pipelines_dir) # Verify that libraries were loaded - assert len(library_manager.concept_library.root) > 0, "No concepts were loaded" - assert len(library_manager.pipe_library.root) > 0, "No pipes were loaded" + concept_library = library_manager.get_concept_library() + pipe_library = library_manager.get_pipe_library() + assert len(concept_library.root) > 0, "No concepts were loaded" + assert len(pipe_library.root) > 0, "No pipes were loaded" # Test individual concepts and pipes - assert library_manager.concept_library.get_required_concept(concept_string=known_concept) is not None + assert concept_library.get_required_concept(concept_string=known_concept) is not None pretty_print( - f"Concept: {known_concept} is correctly loaded as {library_manager.concept_library.get_required_concept(concept_string=known_concept)}", + f"Concept: {known_concept} is correctly loaded as {concept_library.get_required_concept(concept_string=known_concept)}", ) - assert library_manager.pipe_library.get_optional_pipe(known_pipe) is not None - pretty_print(f"Pipe: {known_pipe} is correctly loaded as {library_manager.pipe_library.get_optional_pipe(known_pipe)}") + assert pipe_library.get_optional_pipe(known_pipe) is not None + pretty_print(f"Pipe: {known_pipe} is correctly loaded as {pipe_library.get_optional_pipe(known_pipe)}") From ba8214fd752048987cfb0938ac51e373f91c95e2 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 13:32:48 +0200 Subject: [PATCH 090/115] fix test --- .../stuffs/test_stuff_factory_make_stuff_from_content.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py b/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py index deb9125c2..fd18cc96e 100644 --- a/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py +++ b/tests/unit/pipelex/core/stuffs/test_stuff_factory_make_stuff_from_content.py @@ -32,7 +32,7 @@ class TestData: EMPTY_LIST_CONTENT: ClassVar[ListContent[TextContent]] = ListContent(items=[]) # Dictionary test data - native concept - NATIVE_TEXT_DICT: ClassVar[dict[str, Any]] = {"concept": NativeConceptCode.TEXT, "content": {"text": "Native text content"}} + NATIVE_TEXT_DICT: ClassVar[dict[str, Any]] = {"concept": NativeConceptCode.TEXT.concept_string, "content": {"text": "Native text content"}} # Dictionary test data - custom concept with concept field CUSTOM_CONCEPT_DICT: ClassVar[dict[str, Any]] = { @@ -203,10 +203,16 @@ def test_string_input(self, mocker: MockerFixture): def test_dict_with_native_concept(self, mocker: MockerFixture): """Test dictionary with native concept.""" + # Mock ConceptFactory to handle native concept creation mock_concept = mocker.Mock() mock_concept_factory = mocker.patch("pipelex.core.stuffs.stuff_factory.ConceptFactory") mock_concept_factory.make_native_concept.return_value = mock_concept + # Mock the make_domain_and_concept_code method + mock_domain_and_code = mocker.Mock() + mock_domain_and_code.concept_code = "Text" + mock_concept_factory.make_domain_and_concept_code_from_concept_string_or_code.return_value = mock_domain_and_code + mock_content = mocker.Mock(spec=StuffContent) mock_content_factory = mocker.patch("pipelex.core.stuffs.stuff_factory.StuffContentFactory") mock_content_factory.make_stuff_content_from_concept_with_fallback.return_value = mock_content From c541f5c74f2661485d11c5694b4b81bc653e0b49 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 13:40:33 +0200 Subject: [PATCH 091/115] remove --- pipelex/cogt/image/prompt_image.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pipelex/cogt/image/prompt_image.py b/pipelex/cogt/image/prompt_image.py index 319b2eca4..269479de0 100644 --- a/pipelex/cogt/image/prompt_image.py +++ b/pipelex/cogt/image/prompt_image.py @@ -44,14 +44,6 @@ def __str__(self) -> str: class PromptImageUrl(PromptImage): url: str - # @field_validator("url") - # @classmethod - # def validate_url(cls, value: str) -> str: - # if not (value.startswith(("http", "file", "data:image/"))): - # msg = f"Image url must start with http, file, or data:image/, here it starts with '{value[:100]}'" - # raise PromptImageDefinitionError(msg) - # return value - @override def __str__(self) -> str: truncated_url = AttributePolisher.get_truncated_value(name="url", value=self.url) From 87502accfbe8dc9a1ff2b4f7c8982e1a183725a0 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 13:41:57 +0200 Subject: [PATCH 092/115] rollback test file --- .../typing/test_find_classes_in_module.py | 179 +----------------- 1 file changed, 1 insertion(+), 178 deletions(-) diff --git a/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py b/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py index af110efa2..ba9d9548d 100644 --- a/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py +++ b/tests/unit/pipelex/tools/typing/test_find_classes_in_module.py @@ -1,15 +1,6 @@ -import sys import types -from pathlib import Path -import pytest - -from pipelex.tools.typing.module_inspector import ( - ModuleFileError, - find_class_names_in_file, - find_classes_in_module, - import_module_from_file_if_has_classes, -) +from pipelex.tools.typing.module_inspector import find_classes_in_module class TestFindClassesInModule: @@ -195,171 +186,3 @@ class ImportedSubClass(BaseClass): assert BaseClass in classes assert LocalSubClass in classes assert ImportedSubClass in classes - - -class TestFindClassNamesInFile: - def test_find_all_class_names(self, tmp_path: Path): - """Test finding all class names without filtering.""" - test_file_path = tmp_path / "test_classes.py" - test_file_path.write_text(""" -class ClassA: - pass - -class ClassB: - pass - -def some_function(): - pass -""") - class_names = find_class_names_in_file(str(test_file_path)) - assert len(class_names) == 2 - assert "ClassA" in class_names - assert "ClassB" in class_names - - def test_find_class_names_with_base_class_filter(self, tmp_path: Path): - """Test finding classes that inherit from specific base classes.""" - test_file_path = tmp_path / "test_inheritance.py" - test_file_path.write_text(""" -class BaseContent: - pass - -class StructuredContent: - pass - -class MyContent(StructuredContent): - pass - -class OtherContent(BaseContent): - pass - -class UnrelatedClass: - pass -""") - class_names = find_class_names_in_file( - str(test_file_path), - base_class_names=["StructuredContent"], - ) - assert len(class_names) == 1 - assert "MyContent" in class_names - assert "OtherContent" not in class_names - assert "UnrelatedClass" not in class_names - - def test_find_class_names_with_qualified_base_class(self, tmp_path: Path): - """Test finding classes with qualified base class names.""" - test_file_path = tmp_path / "test_qualified.py" - test_file_path.write_text(""" -from pipelex.core.stuffs.structured_content import StructuredContent - -class MyContent(StructuredContent): - pass - -class UnrelatedClass: - pass -""") - class_names = find_class_names_in_file( - str(test_file_path), - base_class_names=["StructuredContent"], - ) - assert len(class_names) == 1 - assert "MyContent" in class_names - - def test_find_class_names_empty_file(self, tmp_path: Path): - """Test with file containing no classes.""" - test_file_path = tmp_path / "test_empty.py" - test_file_path.write_text(""" -def some_function(): - pass - -variable = 42 -""") - class_names = find_class_names_in_file(str(test_file_path)) - assert len(class_names) == 0 - - def test_find_class_names_non_python_file_raises_error(self, tmp_path: Path): - """Test that non-Python file raises error.""" - test_file_path = tmp_path / "test.txt" - test_file_path.write_text("Not Python") - with pytest.raises(ModuleFileError) as excinfo: - find_class_names_in_file(str(test_file_path)) - assert "is not a Python file" in str(excinfo.value) - - def test_find_class_names_nonexistent_file_raises_error(self, tmp_path: Path): - """Test that nonexistent file raises error.""" - nonexistent_file_path = tmp_path / "nonexistent.py" - with pytest.raises(ModuleFileError) as excinfo: - find_class_names_in_file(str(nonexistent_file_path)) - assert "does not exist" in str(excinfo.value) - - -class TestImportModuleFromFileIfHasClasses: - @pytest.fixture(autouse=True) - def cleanup_sys_modules(self): - """Clean up sys.modules entries after each test.""" - yield - # Clean up sys.modules entries for test modules - modules_to_remove = [name for name in sys.modules if "test_module_" in name or name == "test_module"] - for module_name in modules_to_remove: - del sys.modules[module_name] - - def test_import_file_with_matching_classes(self, tmp_path: Path): - """Test that file with matching classes is imported.""" - test_file_path = tmp_path / "test_module_with_class.py" - test_file_path.write_text(""" -class StructuredContent: - pass - -class MyContent(StructuredContent): - value = "imported" -""") - module = import_module_from_file_if_has_classes( - str(test_file_path), - base_class_names=["StructuredContent"], - ) - assert module is not None - assert hasattr(module, "MyContent") - assert module.MyContent.value == "imported" - - def test_skip_file_without_matching_classes(self, tmp_path: Path): - """Test that file without matching classes is not imported.""" - test_file_path = tmp_path / "test_module_no_match.py" - # Add code that would execute and cause side effects - test_file_path.write_text(""" -print("This should not execute!") - -class UnrelatedClass: - pass - -def some_function(): - pass -""") - module = import_module_from_file_if_has_classes( - str(test_file_path), - base_class_names=["StructuredContent"], - ) - assert module is None - # Verify the module was NOT loaded into sys.modules - assert not any("test_module_no_match" in name for name in sys.modules) - - def test_import_all_files_with_classes_when_no_filter(self, tmp_path: Path): - """Test that any file with classes is imported when no filter is provided.""" - test_file_path = tmp_path / "test_module_any_class.py" - test_file_path.write_text(""" -class AnyClass: - value = "any_class" -""") - module = import_module_from_file_if_has_classes(str(test_file_path)) - assert module is not None - assert hasattr(module, "AnyClass") - assert module.AnyClass.value == "any_class" - - def test_skip_file_with_no_classes_when_no_filter(self, tmp_path: Path): - """Test that file with no classes is skipped even without filter.""" - test_file_path = tmp_path / "test_module_no_classes.py" - test_file_path.write_text(""" -def some_function(): - pass - -variable = 42 -""") - module = import_module_from_file_if_has_classes(str(test_file_path)) - assert module is None From 0642ddb8f900a6e92dfdf2ae6b776c10e1e99e69 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 14:31:00 +0200 Subject: [PATCH 093/115] ok --- pyproject.toml | 7 +++++-- uv.lock | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 771783435..f98346b80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pipelex" -version = "0.11.0" +version = "0.11.1" description = "Pipelex is an open-source dev tool based on a simple declarative language that lets you define replicable, structured, composable LLM pipelines." authors = [{ name = "Evotis S.A.S.", email = "evotis@pipelex.com" }] maintainers = [{ name = "Pipelex staff", email = "oss@pipelex.com" }] @@ -66,7 +66,7 @@ docs = [ dev = [ "boto3-stubs>=1.35.24", - "cocode==0.1.2", + "cocode @ git+https://github.com/Pipelex/cocode.git@release/v0.3.0", "mypy>=1.11.2", "pyright>=1.1.405", "pylint>=3.3.8", @@ -84,6 +84,9 @@ dev = [ "types-PyYAML>=6.0.12.20250326", ] +[tool.uv.sources] +cocode = { path = "../cocode", editable = true } + [project.scripts] pipelex = "pipelex.cli._cli:app" diff --git a/uv.lock b/uv.lock index 54efef1c5..fc35d7f09 100644 --- a/uv.lock +++ b/uv.lock @@ -576,16 +576,38 @@ wheels = [ [[package]] name = "cocode" -version = "0.1.2" -source = { registry = "https://pypi.org/simple" } +version = "0.2.3" +source = { editable = "../cocode" } dependencies = [ - { name = "pipelex", extra = ["anthropic", "bedrock", "google"] }, + { name = "pipelex", extra = ["anthropic", "bedrock", "google", "google-genai"] }, { name = "pygithub" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/a8/890526fa2f3cab311cb7a6d56fedd56c6829254fe13ca58435883f14c21f/cocode-0.1.2.tar.gz", hash = "sha256:63b7f9c51bd18c55f485051d714577a5b755498a20e6a9c69fa9ed734fb47801", size = 43905, upload-time = "2025-09-03T09:14:33.233Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/56/e37fd29be59dca658a22aef6328f475850ff4b8cc988e60e04ccdc9fb2b4/cocode-0.1.2-py3-none-any.whl", hash = "sha256:32aba620170da4e1855394fdf37274bf443a98fba98f0d5da1d89a06f06aa8c6", size = 60516, upload-time = "2025-09-03T09:14:32.072Z" }, + +[package.metadata] +requires-dist = [ + { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, + { name = "mkdocs", marker = "extra == 'docs'", specifier = "==1.6.1" }, + { name = "mkdocs-glightbox", marker = "extra == 'docs'", specifier = "==0.4.0" }, + { name = "mkdocs-material", marker = "extra == 'docs'", specifier = "==9.6.14" }, + { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = "==1.1.0" }, + { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, + { name = "pipelex", extras = ["anthropic", "google", "google-genai", "bedrock"], editable = "." }, + { name = "pygithub", specifier = "==2.4.0" }, + { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.405" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.3" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" }, + { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" }, + { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.6.8" }, + { name = "types-aioboto3", extras = ["bedrock", "bedrock-runtime"], marker = "extra == 'dev'", specifier = ">=13.4.0" }, + { name = "types-aiofiles", marker = "extra == 'dev'", specifier = ">=24.1.0.20240626" }, + { name = "types-markdown", marker = "extra == 'dev'", specifier = ">=3.6.0.20240316" }, + { name = "types-networkx", marker = "extra == 'dev'", specifier = ">=3.3.0.20241020" }, + { name = "types-openpyxl", marker = "extra == 'dev'", specifier = ">=3.1.5.20250306" }, + { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12.20250326" }, ] +provides-extras = ["docs", "dev"] [[package]] name = "colorama" @@ -2067,7 +2089,7 @@ wheels = [ [[package]] name = "pipelex" -version = "0.11.0" +version = "0.11.1" source = { editable = "." } dependencies = [ { name = "aiofiles" }, @@ -2152,7 +2174,7 @@ requires-dist = [ { name = "backports-strenum", marker = "python_full_version < '3.11'", specifier = ">=1.3.0" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34.131" }, { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, - { name = "cocode", marker = "extra == 'dev'", specifier = "==0.1.2" }, + { name = "cocode", marker = "extra == 'dev'", editable = "../cocode" }, { name = "fal-client", marker = "extra == 'fal'", specifier = ">=0.4.1" }, { name = "filetype", specifier = ">=1.2.0" }, { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.2.1" }, From 991739c3c9a5a01a4985e7d72ad5fc979286f4cb Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 14:31:16 +0200 Subject: [PATCH 094/115] 11.2 --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f98346b80..af6dfc54f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pipelex" -version = "0.11.1" +version = "0.11.2" description = "Pipelex is an open-source dev tool based on a simple declarative language that lets you define replicable, structured, composable LLM pipelines." authors = [{ name = "Evotis S.A.S.", email = "evotis@pipelex.com" }] maintainers = [{ name = "Pipelex staff", email = "oss@pipelex.com" }] diff --git a/uv.lock b/uv.lock index fc35d7f09..f36f0b2a0 100644 --- a/uv.lock +++ b/uv.lock @@ -2089,7 +2089,7 @@ wheels = [ [[package]] name = "pipelex" -version = "0.11.1" +version = "0.11.2" source = { editable = "." } dependencies = [ { name = "aiofiles" }, From b9f3dc3f7dca288cde2bf528d40526cb91e07b1f Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 14:31:28 +0200 Subject: [PATCH 095/115] bump --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index af6dfc54f..f98346b80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pipelex" -version = "0.11.2" +version = "0.11.1" description = "Pipelex is an open-source dev tool based on a simple declarative language that lets you define replicable, structured, composable LLM pipelines." authors = [{ name = "Evotis S.A.S.", email = "evotis@pipelex.com" }] maintainers = [{ name = "Pipelex staff", email = "oss@pipelex.com" }] diff --git a/uv.lock b/uv.lock index f36f0b2a0..fc35d7f09 100644 --- a/uv.lock +++ b/uv.lock @@ -2089,7 +2089,7 @@ wheels = [ [[package]] name = "pipelex" -version = "0.11.2" +version = "0.11.1" source = { editable = "." } dependencies = [ { name = "aiofiles" }, From cd7cdc0a9f454fd4f2d29941c99548259c3bde6d Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Wed, 15 Oct 2025 14:31:43 +0200 Subject: [PATCH 096/115] bump --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 771783435..f5a22a39d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pipelex" -version = "0.11.0" +version = "0.11.2" description = "Pipelex is an open-source dev tool based on a simple declarative language that lets you define replicable, structured, composable LLM pipelines." authors = [{ name = "Evotis S.A.S.", email = "evotis@pipelex.com" }] maintainers = [{ name = "Pipelex staff", email = "oss@pipelex.com" }] diff --git a/uv.lock b/uv.lock index 54efef1c5..f39792205 100644 --- a/uv.lock +++ b/uv.lock @@ -2067,7 +2067,7 @@ wheels = [ [[package]] name = "pipelex" -version = "0.11.0" +version = "0.11.2" source = { editable = "." } dependencies = [ { name = "aiofiles" }, From b872a0d71ce6c1798ef537f971200d7b127cfb2d Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 12:37:45 +0200 Subject: [PATCH 097/115] ok --- pipelex/core/concepts/concept_library.py | 15 +- pipelex/core/domains/domain_library.py | 21 +- .../core/domains/domain_library_abstract.py | 2 + pipelex/core/pipes/pipe_library.py | 11 +- pipelex/core/pipes/pipe_library_abstract.py | 17 +- pipelex/hub.py | 57 ++- pipelex/libraries/__init__.py | 9 + pipelex/libraries/library.py | 148 ++++++++ pipelex/libraries/library_manager.py | 336 ++++++------------ pipelex/libraries/library_manager_abstract.py | 49 ++- pipelex/pipeline/execute.py | 2 +- 11 files changed, 375 insertions(+), 292 deletions(-) create mode 100644 pipelex/libraries/library.py diff --git a/pipelex/core/concepts/concept_library.py b/pipelex/core/concepts/concept_library.py index 807184ead..1db9518d8 100644 --- a/pipelex/core/concepts/concept_library.py +++ b/pipelex/core/concepts/concept_library.py @@ -29,21 +29,22 @@ def validate_with_libraries(self): @override def setup(self): - all_native_concepts = ConceptFactory.make_all_native_concepts() - self.add_concepts(concepts=all_native_concepts) + pass @override - def reset(self): + def teardown(self): self.root = {} - self.setup() @override - def teardown(self): - self.root = {} + def reset(self): + self.teardown() + self.setup() @classmethod def make_empty(cls) -> Self: - return cls(root={}) + library = cls(root={}) + library.setup() + return library @override def list_concepts(self) -> list[Concept]: diff --git a/pipelex/core/domains/domain_library.py b/pipelex/core/domains/domain_library.py index 7a757160b..5996acc2b 100644 --- a/pipelex/core/domains/domain_library.py +++ b/pipelex/core/domains/domain_library.py @@ -13,9 +13,17 @@ class DomainLibrary(RootModel[DomainLibraryRoot], DomainLibraryAbstract): def validate_with_libraries(self): pass - def reset(self): + def setup(self): + pass + + @override + def teardown(self): self.root = {} + def reset(self): + self.teardown() + self.setup() + @classmethod def make_empty(cls) -> Self: return cls(root={}) @@ -36,18 +44,11 @@ def remove_domain_by_code(self, domain_code: str) -> None: if domain_code in self.root: del self.root[domain_code] - @override - def get_domain(self, domain: str) -> Domain | None: - return self.root.get(domain) - @override def get_required_domain(self, domain: str) -> Domain: + """Get a domain by code from this library, raising an error if not found.""" the_domain = self.get_domain(domain=domain) if not the_domain: msg = f"Domain '{domain}' not found. Check for typos and make sure it is declared in a pipeline library." raise DomainLibraryError(msg) - return the_domain - - @override - def teardown(self) -> None: - self.root = {} + return the_domain \ No newline at end of file diff --git a/pipelex/core/domains/domain_library_abstract.py b/pipelex/core/domains/domain_library_abstract.py index 5de1b5726..271d7ae79 100644 --- a/pipelex/core/domains/domain_library_abstract.py +++ b/pipelex/core/domains/domain_library_abstract.py @@ -6,10 +6,12 @@ class DomainLibraryAbstract(ABC): @abstractmethod def get_domain(self, domain: str) -> Domain | None: + """Get a domain by code from this library.""" pass @abstractmethod def get_required_domain(self, domain: str) -> Domain: + """Get a domain by code from this library, raising an error if not found.""" pass @abstractmethod diff --git a/pipelex/core/pipes/pipe_library.py b/pipelex/core/pipes/pipe_library.py index 7ff587a0e..f970b9490 100644 --- a/pipelex/core/pipes/pipe_library.py +++ b/pipelex/core/pipes/pipe_library.py @@ -35,9 +35,15 @@ def validate_with_libraries(self, pipeline_run_id: str | None = None): msg = f"Missing dependency for pipe '{pipe.code}': {not_found_error}" raise PipeLibraryError(msg) from not_found_error + @override + def teardown(self): + self.root = {} + @classmethod def make_empty(cls) -> Self: - return cls(root={}) + library = cls(root={}) + library.setup() + return library @override def add_new_pipe(self, pipe: PipeAbstract): @@ -80,9 +86,6 @@ def remove_pipes_by_codes(self, pipe_codes: list[str]) -> None: if pipe_code in self.root: del self.root[pipe_code] - @override - def teardown(self) -> None: - self.root = {} @override def pretty_list_pipes(self) -> None: diff --git a/pipelex/core/pipes/pipe_library_abstract.py b/pipelex/core/pipes/pipe_library_abstract.py index 3f135d96f..4ea9aa2e8 100644 --- a/pipelex/core/pipes/pipe_library_abstract.py +++ b/pipelex/core/pipes/pipe_library_abstract.py @@ -4,6 +4,19 @@ class PipeLibraryAbstract(ABC): + @abstractmethod + def setup(self) -> None: + pass + + @abstractmethod + def teardown(self) -> None: + pass + + @abstractmethod + def reset(self) -> None: + self.teardown() + self.setup() + @abstractmethod def validate_with_libraries(self, pipeline_run_id: str | None = None) -> None: pass @@ -27,10 +40,6 @@ def get_pipes_dict(self) -> dict[str, PipeAbstract]: def remove_pipes_by_codes(self, pipe_codes: list[str]) -> None: pass - @abstractmethod - def teardown(self) -> None: - pass - @abstractmethod def pretty_list_pipes(self) -> None: pass diff --git a/pipelex/hub.py b/pipelex/hub.py index c4457873e..807a7c21f 100644 --- a/pipelex/hub.py +++ b/pipelex/hub.py @@ -19,6 +19,7 @@ from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract +from pipelex.libraries.library_manager import SpecialLibraryId from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.observer.observer_protocol import ObserverProtocol from pipelex.pipe_run.pipe_router_protocol import PipeRouterProtocol @@ -234,27 +235,27 @@ def get_required_content_generator(self) -> ContentGeneratorProtocol: # pipelex - def get_required_domain_library(self, pipeline_run_id: str | None = None) -> DomainLibraryAbstract: + def get_required_domain_library(self, library_id: str | None = None) -> DomainLibraryAbstract: if self._library_manager is not None: - return self._library_manager.get_domain_library(pipeline_run_id=pipeline_run_id) + return self._library_manager.get_library(library_id=library_id).domain_library if self._domain_library is None: - msg = "DomainLibrary with pipeline_run_id '{pipeline_run_id}' is not initialized" + msg = "DomainLibrary with library_id '{library_id}' is not initialized" raise RuntimeError(msg) return self._domain_library - def get_required_concept_library(self, pipeline_run_id: str | None = None) -> ConceptLibraryAbstract: + def get_required_concept_library(self, library_id: str | None = None) -> ConceptLibraryAbstract: if self._library_manager is not None: - return self._library_manager.get_concept_library(pipeline_run_id=pipeline_run_id) + return self._library_manager.get_library(library_id=library_id).concept_library if self._concept_library is None: - msg = "ConceptLibrary with pipeline_run_id '{pipeline_run_id}' is not initialized" + msg = "ConceptLibrary with library_id '{library_id}' is not initialized" raise RuntimeError(msg) return self._concept_library - def get_required_pipe_library(self, pipeline_run_id: str | None = None) -> PipeLibraryAbstract: + def get_required_pipe_library(self, library_id: str | None = None) -> PipeLibraryAbstract: if self._library_manager is not None: - return self._library_manager.get_pipe_library(pipeline_run_id=pipeline_run_id) + return self._library_manager.get_library(library_id=library_id).pipe_library if self._pipe_library is None: - msg = "PipeLibrary with pipeline_run_id '{pipeline_run_id}' is not initialized" + msg = "PipeLibrary with library_id '{library_id}' is not initialized" raise RuntimeError(msg) return self._pipe_library @@ -379,36 +380,36 @@ def get_secret(secret_id: str) -> str: return get_secrets_provider().get_secret(secret_id=secret_id) -def get_required_domain(domain: str, pipeline_run_id: str | None = None) -> Domain: - return get_pipelex_hub().get_required_domain_library(pipeline_run_id=pipeline_run_id).get_required_domain(domain=domain) +def get_required_domain(domain: str, library_id: str | None = None) -> Domain: + return get_pipelex_hub().get_required_domain_library(library_id=library_id).get_required_domain(domain=domain) -def get_optional_domain(domain: str, pipeline_run_id: str | None = None) -> Domain | None: - return get_pipelex_hub().get_required_domain_library(pipeline_run_id=pipeline_run_id).get_domain(domain=domain) +def get_optional_domain(domain: str, library_id: str | None = None) -> Domain | None: + return get_pipelex_hub().get_required_domain_library(library_id=library_id).get_domain(domain=domain) -def get_pipe_library(pipeline_run_id: str | None = None) -> PipeLibraryAbstract: - return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id) +def get_pipe_library(library_id: str | None = None) -> PipeLibraryAbstract: + return get_pipelex_hub().get_required_pipe_library(library_id=library_id) -def get_pipes(pipeline_run_id: str | None = None) -> list[PipeAbstract]: - return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id).get_pipes() +def get_pipes(library_id: str | None = None) -> list[PipeAbstract]: + return get_pipelex_hub().get_required_pipe_library(library_id=library_id).get_pipes() -def get_required_pipe(pipe_code: str, pipeline_run_id: str | None = None) -> PipeAbstract: - return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id).get_required_pipe(pipe_code=pipe_code) +def get_required_pipe(pipe_code: str, library_id: str | None = None) -> PipeAbstract: + return get_pipelex_hub().get_required_pipe_library(library_id=library_id).get_required_pipe(pipe_code=pipe_code) -def get_optional_pipe(pipe_code: str, pipeline_run_id: str | None = None) -> PipeAbstract | None: - return get_pipelex_hub().get_required_pipe_library(pipeline_run_id=pipeline_run_id).get_optional_pipe(pipe_code=pipe_code) +def get_optional_pipe(pipe_code: str, library_id: str | None = None) -> PipeAbstract | None: + return get_pipelex_hub().get_required_pipe_library(library_id=library_id).get_optional_pipe(pipe_code=pipe_code) -def get_concept_library(pipeline_run_id: str | None = None) -> ConceptLibraryAbstract: - return get_pipelex_hub().get_required_concept_library(pipeline_run_id=pipeline_run_id) +def get_concept_library(library_id: str | None = None) -> ConceptLibraryAbstract: + return get_pipelex_hub().get_required_concept_library(library_id=library_id) -def get_required_concept(concept_string: str, pipeline_run_id: str | None = None) -> Concept: - return get_pipelex_hub().get_required_concept_library(pipeline_run_id=pipeline_run_id).get_required_concept(concept_string=concept_string) +def get_required_concept(concept_string: str, library_id: str | None = None) -> Concept: + return get_pipelex_hub().get_required_concept_library(library_id=library_id).get_required_concept(concept_string=concept_string) def get_pipe_router() -> PipeRouterProtocol: @@ -427,10 +428,6 @@ def get_activity_manager() -> ActivityManagerProtocol: return get_pipelex_hub().get_activity_manager() -def get_pipeline(pipeline_run_id: str) -> Pipeline: - return get_pipeline_manager().get_pipeline(pipeline_run_id=pipeline_run_id) - - def get_library_manager() -> LibraryManagerAbstract: return get_pipelex_hub().get_required_library_manager() @@ -440,4 +437,4 @@ def get_observer_provider() -> ObserverProtocol: def get_native_concept(native_concept: NativeConceptCode) -> Concept: - return get_pipelex_hub().get_required_concept_library(pipeline_run_id=pipeline_run_id).get_native_concept(native_concept=native_concept) + return get_pipelex_hub().get_required_concept_library(library_id=SpecialLibraryId.UNTITLED).get_native_concept(native_concept=native_concept) diff --git a/pipelex/libraries/__init__.py b/pipelex/libraries/__init__.py index 8b1378917..3484bf75f 100644 --- a/pipelex/libraries/__init__.py +++ b/pipelex/libraries/__init__.py @@ -1 +1,10 @@ +from pipelex.libraries.library import Library +from pipelex.libraries.library_manager import LibraryManager, SpecialLibraryId +from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract +__all__ = [ + "Library", + "LibraryManager", + "LibraryManagerAbstract", + "SpecialLibraryId", +] diff --git a/pipelex/libraries/library.py b/pipelex/libraries/library.py new file mode 100644 index 000000000..51327e25e --- /dev/null +++ b/pipelex/libraries/library.py @@ -0,0 +1,148 @@ +from pydantic import BaseModel, Field + +from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint +from pipelex.core.concepts.concept import Concept +from pipelex.core.concepts.concept_factory import ConceptFactory +from pipelex.core.concepts.concept_library import ConceptLibrary +from pipelex.core.domains.domain import Domain +from pipelex.core.domains.domain_blueprint import DomainBlueprint +from pipelex.core.domains.domain_factory import DomainFactory +from pipelex.core.domains.domain_library import DomainLibrary +from pipelex.core.pipes.pipe_abstract import PipeAbstract +from pipelex.core.pipes.pipe_factory import PipeFactory +from pipelex.core.pipes.pipe_library import PipeLibrary + + +class Library(BaseModel): + """A Library bundles together domain, concept, and pipe libraries for a specific context. + + This represents a complete set of Pipelex definitions (domains, concepts, pipes) + that can be loaded and used together, typically for a single pipeline run. + + Each Library (except BASE) inherits native concepts and base pipes from the BASE library. + """ + + domain_library: DomainLibrary = Field(default_factory=DomainLibrary.make_empty) + concept_library: ConceptLibrary = Field(default_factory=ConceptLibrary.make_empty) + pipe_library: PipeLibrary = Field(default_factory=PipeLibrary.make_empty) + + @classmethod + def make_empty(cls) -> "Library": + """Create an empty library with initialized concept library (includes native concepts). + + This should only be used for the BASE library. + """ + return cls( + domain_library=DomainLibrary.make_empty(), + concept_library=ConceptLibrary.make_empty(), + pipe_library=PipeLibrary.make_empty(), + ) + + @classmethod + def make_base(cls) -> "Library": + """Create the BASE library that contains native concepts and builder pipes.""" + # 1 - Concept library, add the native concepts + concept_library = ConceptLibrary.make_empty() + all_native_concepts = ConceptFactory.make_all_native_concepts() + concept_library.add_concepts(concepts=all_native_concepts) + + # 2 - Pipe library, add the builder pipes + pipe_library = PipeLibrary.make_empty() + + # 3 - Domain library, add the domains + domain_library = DomainLibrary.make_empty() + + return cls( + domain_library=domain_library, + concept_library=concept_library, + pipe_library=pipe_library, + ) + + def teardown(self) -> None: + """Teardown all libraries in this bundle.""" + self.pipe_library.teardown() + self.concept_library.teardown() + self.domain_library.teardown() + + def validate_with_libraries(self) -> None: + """Validate all libraries in this bundle.""" + self.concept_library.validate_with_libraries() + self.pipe_library.validate_with_libraries() + self.domain_library.validate_with_libraries() + + def load_from_blueprints(self, blueprints: list[PipelexBundleBlueprint]) -> list[PipeAbstract]: + """Load domains, concepts, and pipes from a list of blueprints. + + Args: + blueprints: List of parsed PLX blueprints to load + + Returns: + List of all pipes that were loaded + """ + all_pipes: list[PipeAbstract] = [] + + # Load all domains first + all_domains: list[Domain] = [] + for blueprint in blueprints: + domain = self._load_domain_from_blueprint(blueprint) + all_domains.append(domain) + self.domain_library.add_domains(domains=all_domains) + + # Load all concepts second + all_concepts: list[Concept] = [] + for blueprint in blueprints: + concepts = self._load_concepts_from_blueprint(blueprint) + all_concepts.extend(concepts) + self.concept_library.add_concepts(concepts=all_concepts) + + # Load all pipes third + for blueprint in blueprints: + pipes = self._load_pipes_from_blueprint(blueprint) + all_pipes.extend(pipes) + self.pipe_library.add_pipes(pipes=all_pipes) + + return all_pipes + + def _load_domain_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> Domain: + """Load a domain from a blueprint.""" + return DomainFactory.make_from_blueprint( + blueprint=DomainBlueprint( + source=blueprint.source, + code=blueprint.domain, + description=blueprint.description or "", + system_prompt=blueprint.system_prompt, + system_prompt_to_structure=blueprint.system_prompt_to_structure, + prompt_template_to_structure=blueprint.prompt_template_to_structure, + ), + ) + + def _load_concepts_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[Concept]: + """Load concepts from a blueprint.""" + if blueprint.concept is None: + return [] + + concepts: list[Concept] = [] + for concept_code, concept_blueprint_or_description in blueprint.concept.items(): + concept = ConceptFactory.make_from_blueprint_or_description( + domain=blueprint.domain, + concept_code=concept_code, + concept_codes_from_the_same_domain=list(blueprint.concept.keys()), + concept_blueprint_or_description=concept_blueprint_or_description, + ) + concepts.append(concept) + return concepts + + def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbstract]: + """Load pipes from a blueprint.""" + pipes: list[PipeAbstract] = [] + if blueprint.pipe is not None: + for pipe_name, pipe_blueprint in blueprint.pipe.items(): + pipe = PipeFactory.make_from_blueprint( + domain=blueprint.domain, + pipe_code=pipe_name, + blueprint=pipe_blueprint, + concept_codes_from_the_same_domain=list(blueprint.concept.keys()) if blueprint.concept else None, + ) + pipes.append(pipe) + return pipes + diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index b9809b687..3a600ac18 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -7,15 +7,11 @@ from pipelex import log from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint from pipelex.core.concepts.concept import Concept -from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_library import ConceptLibrary from pipelex.core.domains.domain import Domain -from pipelex.core.domains.domain_blueprint import DomainBlueprint -from pipelex.core.domains.domain_factory import DomainFactory from pipelex.core.domains.domain_library import DomainLibrary from pipelex.core.interpreter import PipelexInterpreter from pipelex.core.pipes.pipe_abstract import PipeAbstract -from pipelex.core.pipes.pipe_factory import PipeFactory from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.core.stuffs.structured_content import StructuredContent from pipelex.core.validation import report_validation_error @@ -31,13 +27,13 @@ PipeLibraryError, PipeLoadingError, ) +from pipelex.libraries.library import Library from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.libraries.library_utils import ( find_plx_files_in_dir, get_pipelex_package_dir_for_imports, get_pipelex_plx_files_from_package, ) -from pipelex.pipeline.pipeline_models import SpecialPipelineId from pipelex.system.configuration.config_loader import config_manager from pipelex.system.registries.class_registry_utils import ClassRegistryUtils from pipelex.system.registries.func_registry_utils import FuncRegistryUtils @@ -57,6 +53,14 @@ def error_class(self) -> type[LibraryError]: return PipeLibraryError +class SpecialLibraryId(StrEnum): + """Special library identifiers. + + BASE: The base/default library containing native concepts and builder pipes. + All new libraries inherit (copy) the contents of the BASE library when created. + """ + BASE = "base" + class LibraryManager(LibraryManagerAbstract): allowed_root_attributes: ClassVar[list[str]] = [ "domain", @@ -67,9 +71,8 @@ class LibraryManager(LibraryManagerAbstract): ] def __init__(self): - self._domain_libraries: dict[str, DomainLibrary] = {SpecialPipelineId.UNTITLED: DomainLibrary.make_empty()} - self._concept_libraries: dict[str, ConceptLibrary] = {SpecialPipelineId.UNTITLED: ConceptLibrary.make_empty()} - self._pipe_libraries: dict[str, PipeLibrary] = {SpecialPipelineId.UNTITLED: PipeLibrary.make_empty()} + # BASE library is the fallback library for all others + self._libraries: dict[str, Library] = {SpecialLibraryId.BASE: Library.make_empty()} ############################################################ # Manager lifecycle @@ -77,26 +80,14 @@ def __init__(self): @override def setup(self) -> None: - self._domain_libraries.clear() - self._concept_libraries.clear() - self._pipe_libraries.clear() - concept_library = ConceptLibrary.make_empty() - concept_library.setup() - self._domain_libraries[SpecialPipelineId.UNTITLED] = DomainLibrary.make_empty() - self._concept_libraries[SpecialPipelineId.UNTITLED] = concept_library - self._pipe_libraries[SpecialPipelineId.UNTITLED] = PipeLibrary.make_empty() + self._libraries.clear() + self._libraries[SpecialLibraryId.BASE] = Library.make_empty() @override def teardown(self) -> None: - for pipe_library in self._pipe_libraries.values(): - pipe_library.teardown() - for concept_library in self._concept_libraries.values(): - concept_library.teardown() - for domain_library in self._domain_libraries.values(): - domain_library.teardown() - self._pipe_libraries.clear() - self._concept_libraries.clear() - self._domain_libraries.clear() + for library in self._libraries.values(): + library.teardown() + self._libraries.clear() @override def reset(self) -> None: @@ -104,103 +95,85 @@ def reset(self) -> None: self.setup() @override - def open_library(self, pipeline_run_id: str) -> None: - if pipeline_run_id in self._domain_libraries: - msg = f"Library for pipeline '{pipeline_run_id}' already exists" + def open_library(self, library_id: str) -> None: + """Open a new library with the given library_id. + + The new library will inherit native concepts and base pipes from the BASE library. + """ + if library_id in self._libraries: + msg = f"Library '{library_id}' already exists" raise LibraryError(msg) - - concept_library = ConceptLibrary.make_empty() - concept_library.setup() - self._domain_libraries[pipeline_run_id] = DomainLibrary.make_empty() - self._concept_libraries[pipeline_run_id] = concept_library - self._pipe_libraries[pipeline_run_id] = PipeLibrary.make_empty() + + # Create a new library that inherits from BASE + base_library = Library.make_base_library() + self._libraries[library_id] = base_library @override - def close_library(self, pipeline_run_id: str) -> None: - if pipeline_run_id in self._pipe_libraries: - self._pipe_libraries[pipeline_run_id].teardown() - self._pipe_libraries.pop(pipeline_run_id) - if pipeline_run_id in self._concept_libraries: - self._concept_libraries[pipeline_run_id].teardown() - self._concept_libraries.pop(pipeline_run_id) - if pipeline_run_id in self._domain_libraries: - self._domain_libraries[pipeline_run_id].teardown() - self._domain_libraries.pop(pipeline_run_id) + def close_library(self, library_id: str) -> None: + """Close and cleanup a library with the given library_id.""" + if library_id not in self._libraries: + msg = f"Trying to close a library that does not exist: '{library_id}'" + raise LibraryError(msg) + self._libraries[library_id].teardown() + self._libraries.pop(library_id) ############################################################ # Public library accessors ############################################################ @override - def get_domain_library(self, pipeline_run_id: str | None = None) -> DomainLibrary: - if pipeline_run_id is None: - pipeline_run_id = SpecialPipelineId.UNTITLED - if pipeline_run_id not in self._domain_libraries: - msg = f"Domain library for pipeline '{pipeline_run_id}' does not exist" + def get_library(self, library_id: str | None = None) -> Library: + """Get the Library object for a specific library_id.""" + if library_id is None: + library_id = SpecialLibraryId.BASE + if library_id not in self._libraries: + msg = f"Trying to get a library that does not exist: '{library_id}'" raise LibraryError(msg) - return self._domain_libraries[pipeline_run_id] + return self._libraries[library_id] @override - def get_concept_library(self, pipeline_run_id: str | None = None) -> ConceptLibrary: - if pipeline_run_id is None: - pipeline_run_id = SpecialPipelineId.UNTITLED - if pipeline_run_id not in self._concept_libraries: - msg = f"Concept library for pipeline '{pipeline_run_id}' does not exist" - raise LibraryError(msg) - return self._concept_libraries[pipeline_run_id] + def get_domain_library(self, library_id: str | None = None) -> DomainLibrary: + """Get the domain library for a specific library_id.""" + return self.get_library(library_id).domain_library @override - def get_pipe_library(self, pipeline_run_id: str | None = None) -> PipeLibrary: - if pipeline_run_id is None: - pipeline_run_id = SpecialPipelineId.UNTITLED - if pipeline_run_id not in self._pipe_libraries: - msg = f"Pipe library for pipeline '{pipeline_run_id}' does not exist" - raise LibraryError(msg) - return self._pipe_libraries[pipeline_run_id] + def get_concept_library(self, library_id: str | None = None) -> ConceptLibrary: + """Get the concept library for a specific library_id.""" + return self.get_library(library_id).concept_library - ############################################################ - # Private methods - ############################################################ + @override + def get_pipe_library(self, library_id: str | None = None) -> PipeLibrary: + """Get the pipe library for a specific library_id.""" + return self.get_library(library_id).pipe_library - def _get_domain_library(self, pipeline_run_id: str) -> DomainLibrary: - """Internal helper that requires explicit pipeline_run_id.""" - if pipeline_run_id not in self._domain_libraries: - msg = f"Domain library for pipeline '{pipeline_run_id}' does not exist" - raise LibraryError(msg) - return self._domain_libraries[pipeline_run_id] + @override + def get_required_domain(self, domain: str, library_id: str | None = None) -> Domain: + """Get a required domain from the specified library.""" + return self.get_library(library_id).domain_library.get_required_domain(domain=domain) - def _get_concept_library(self, pipeline_run_id: str) -> ConceptLibrary: - """Internal helper that requires explicit pipeline_run_id.""" - if pipeline_run_id not in self._concept_libraries: - msg = f"Concept library for pipeline '{pipeline_run_id}' does not exist" - raise LibraryError(msg) - return self._concept_libraries[pipeline_run_id] + @override + def get_required_concept(self, concept_string: str, library_id: str | None = None) -> Concept: + """Get a required concept from the specified library.""" + return self.get_library(library_id).concept_library.get_required_concept(concept_string=concept_string) - def _get_pipe_library(self, pipeline_run_id: str) -> PipeLibrary: - """Internal helper that requires explicit pipeline_run_id.""" - if pipeline_run_id not in self._pipe_libraries: - msg = f"Pipe library for pipeline '{pipeline_run_id}' does not exist" - raise LibraryError(msg) - return self._pipe_libraries[pipeline_run_id] + @override + def get_required_pipe(self, pipe_code: str, library_id: str | None = None) -> PipeAbstract: + """Get a required pipe from the specified library.""" + return self.get_library(library_id).pipe_library.get_required_pipe(pipe_code=pipe_code) + + ############################################################ + # Private methods + ############################################################ ############################################################ # LibraryManagerAbstract ############################################################ @override - def validate_libraries(self, pipeline_run_id: str | None = None): + def validate_libraries(self, library_id: str | None = None): log.debug("LibraryManager validating libraries") - - if pipeline_run_id is None: - pipeline_run_id = SpecialPipelineId.UNTITLED - - concept_library = self._get_concept_library(pipeline_run_id) - pipe_library = self._get_pipe_library(pipeline_run_id) - domain_library = self._get_domain_library(pipeline_run_id) - - concept_library.validate_with_libraries() - pipe_library.validate_with_libraries() - domain_library.validate_with_libraries() + library = self.get_library(library_id) + library.validate_with_libraries() def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: """Get all valid Pipelex PLX files from the given directories.""" @@ -237,106 +210,44 @@ def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: return all_plx_paths @override - def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> list[PipeAbstract]: - """Load a blueprint.""" - if pipeline_run_id is None: - pipeline_run_id = SpecialPipelineId.UNTITLED - - domain_library = self._get_domain_library(pipeline_run_id) - concept_library = self._get_concept_library(pipeline_run_id) - pipe_library = self._get_pipe_library(pipeline_run_id) - - # Create and load domain + def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> list[PipeAbstract]: + """Load a single blueprint into the specified library.""" + library = self.get_library(library_id) + try: - domain = self._load_domain_from_blueprint(blueprint) + return library.load_from_blueprints([blueprint]) except DomainDefinitionError as exc: msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {exc}" raise DomainLoadingError(message=msg, domain_code=exc.domain_code, description=exc.description, source=exc.source) from exc - domain_library.add_domain(domain=domain) - - # Create and load concepts - try: - concepts = self._load_concepts_from_blueprint(blueprint) except ConceptDefinitionError as exc: msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {exc}" raise ConceptLoadingError( message=msg, concept_definition_error=exc, concept_code=exc.concept_code, description=exc.description, source=exc.source ) from exc - concept_library.add_concepts(concepts=concepts) - - # Create and load pipes - try: - pipes = self._load_pipes_from_blueprint(blueprint) except PipeDefinitionError as exc: msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {exc}" raise PipeLoadingError( message=msg, pipe_definition_error=exc, pipe_code=exc.pipe_code or "", description=exc.description or "", source=exc.source ) from exc - pipe_library.add_pipes(pipes=pipes) - - return pipes @override - def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> None: - if pipeline_run_id is None: - pipeline_run_id = SpecialPipelineId.UNTITLED - - domain_library = self._get_domain_library(pipeline_run_id) - concept_library = self._get_concept_library(pipeline_run_id) - pipe_library = self._get_pipe_library(pipeline_run_id) + def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> None: + library = self.get_library(library_id) if blueprint.pipe is not None: - pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) + library.pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) # Remove concepts (they may depend on domain) if blueprint.concept is not None: + from pipelex.core.concepts.concept_factory import ConceptFactory concept_codes_to_remove = [ ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) for concept_code in blueprint.concept ] - concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) - - domain_library.remove_domain_by_code(domain_code=blueprint.domain) - - def _load_domain_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> Domain: - return DomainFactory.make_from_blueprint( - blueprint=DomainBlueprint( - source=blueprint.source, - code=blueprint.domain, - description=blueprint.description or "", - system_prompt=blueprint.system_prompt, - system_prompt_to_structure=blueprint.system_prompt_to_structure, - prompt_template_to_structure=blueprint.prompt_template_to_structure, - ), - ) - - def _load_concepts_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[Concept]: - if blueprint.concept is None: - return [] - - concepts: list[Concept] = [] - for concept_code, concept_blueprint_or_description in blueprint.concept.items(): - concept = ConceptFactory.make_from_blueprint_or_description( - domain=blueprint.domain, - concept_code=concept_code, - concept_codes_from_the_same_domain=list(blueprint.concept.keys()), - concept_blueprint_or_description=concept_blueprint_or_description, - ) - concepts.append(concept) - return concepts + library.concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) + + library.domain_library.remove_domain_by_code(domain_code=blueprint.domain) - def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbstract]: - pipes: list[PipeAbstract] = [] - if blueprint.pipe is not None: - for pipe_name, pipe_blueprint in blueprint.pipe.items(): - pipe = PipeFactory.make_from_blueprint( - domain=blueprint.domain, - pipe_code=pipe_name, - blueprint=pipe_blueprint, - concept_codes_from_the_same_domain=list(blueprint.concept.keys()) if blueprint.concept else None, - ) - pipes.append(pipe) - return pipes def _import_pipelex_modules_directly(self) -> None: """Import pipelex modules to register @pipe_func decorated functions. @@ -353,25 +264,24 @@ def _import_pipelex_modules_directly(self) -> None: @override def load_libraries( self, - pipeline_run_id: str | None = None, + library_id: str | None = None, library_dirs: list[Path] | None = None, library_file_paths: list[Path] | None = None, ) -> None: - if pipeline_run_id is None: - pipeline_run_id = SpecialPipelineId.UNTITLED + if library_id is None: + library_id = SpecialLibraryId.BASE - # Ensure libraries exist for this pipeline_run_id - if pipeline_run_id not in self._domain_libraries: - if pipeline_run_id == SpecialPipelineId.UNTITLED: - # Auto-setup for UNTITLED if not already done + # Ensure libraries exist for this library_id + if library_id not in self._libraries: + if library_id == SpecialLibraryId.BASE: + # Auto-setup for BASE if not already done self.setup() else: - msg = f"Libraries for pipeline '{pipeline_run_id}' do not exist. Call open_library() first." + msg = f"Library '{library_id}' does not exist. Call open_library() first." raise LibraryError(msg) - domain_library = self._get_domain_library(pipeline_run_id) - concept_library = self._get_concept_library(pipeline_run_id) - pipe_library = self._get_pipe_library(pipeline_run_id) + library = self.get_library(library_id) + # Collect directories to scan (user project directories) user_dirs: set[Path] = set() if library_dirs: @@ -449,7 +359,7 @@ def load_libraries( num_registered = ClassRegistryUtils.auto_register_all_subclasses(base_class=StructuredContent) log.debug(f"Auto-registered {num_registered} StructuredContent classes from loaded modules") - # Parse all blueprints first + # Parse all blueprints blueprints: list[PipelexBundleBlueprint] = [] for plx_file_path in valid_plx_paths: try: @@ -467,47 +377,19 @@ def load_libraries( blueprint.source = str(plx_file_path) blueprints.append(blueprint) - # Load all domains first - all_domains: list[Domain] = [] - for blueprint in blueprints: - try: - domain = self._load_domain_from_blueprint(blueprint) - except DomainDefinitionError as domain_def_error: - msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {domain_def_error}" - raise LibraryLoadingError(msg) from domain_def_error - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" - raise LibraryLoadingError(msg) from validation_error - all_domains.append(domain) - domain_library.add_domains(domains=all_domains) - - # Load all concepts second - all_concepts: list[Concept] = [] - for blueprint in blueprints: - try: - concepts = self._load_concepts_from_blueprint(blueprint) - except ConceptDefinitionError as concept_def_error: - msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {concept_def_error}" - raise LibraryLoadingError(msg) from concept_def_error - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" - raise LibraryLoadingError(msg) from validation_error - all_concepts.extend(concepts) - concept_library.add_concepts(concepts=all_concepts) - - # Load all pipes third - all_pipes: list[PipeAbstract] = [] - for blueprint in blueprints: - try: - pipes = self._load_pipes_from_blueprint(blueprint) - except PipeDefinitionError as pipe_def_error: - msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {pipe_def_error}" - raise LibraryLoadingError(msg) from pipe_def_error - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {validation_error_msg}" - raise LibraryLoadingError(msg) from validation_error - all_pipes.extend(pipes) - pipe_library.add_pipes(pipes=all_pipes) + # Load all blueprints into the library + try: + library.load_from_blueprints(blueprints) + except DomainDefinitionError as domain_def_error: + msg = f"Could not load domains from blueprints: {domain_def_error}" + raise LibraryLoadingError(msg) from domain_def_error + except ConceptDefinitionError as concept_def_error: + msg = f"Could not load concepts from blueprints: {concept_def_error}" + raise LibraryLoadingError(msg) from concept_def_error + except PipeDefinitionError as pipe_def_error: + msg = f"Could not load pipes from blueprints: {pipe_def_error}" + raise LibraryLoadingError(msg) from pipe_def_error + except ValidationError as validation_error: + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) + msg = f"Could not load blueprints because of: {validation_error_msg}" + raise LibraryLoadingError(msg) from validation_error diff --git a/pipelex/libraries/library_manager_abstract.py b/pipelex/libraries/library_manager_abstract.py index 5d4b23609..2cdfa8307 100644 --- a/pipelex/libraries/library_manager_abstract.py +++ b/pipelex/libraries/library_manager_abstract.py @@ -1,12 +1,18 @@ from abc import ABC, abstractmethod from pathlib import Path +from typing import TYPE_CHECKING from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint +from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract +from pipelex.core.domains.domain import Domain from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract +if TYPE_CHECKING: + from pipelex.libraries.library import Library + class LibraryManagerAbstract(ABC): @abstractmethod @@ -22,42 +28,67 @@ def reset(self) -> None: pass @abstractmethod - def open_library(self, pipeline_run_id: str) -> None: + def open_library(self, library_id: str) -> None: + """Open a new library with the given library_id.""" + pass + + @abstractmethod + def close_library(self, library_id: str) -> None: + """Close and cleanup a library with the given library_id.""" + pass + + @abstractmethod + def get_library(self, library_id: str | None = None) -> "Library": + """Get the Library object for a specific library_id.""" + pass + + @abstractmethod + def get_domain_library(self, library_id: str | None = None) -> DomainLibraryAbstract: + """Get the domain library for a specific library_id.""" + pass + + @abstractmethod + def get_concept_library(self, library_id: str | None = None) -> ConceptLibraryAbstract: + """Get the concept library for a specific library_id.""" pass @abstractmethod - def close_library(self, pipeline_run_id: str) -> None: + def get_pipe_library(self, library_id: str | None = None) -> PipeLibraryAbstract: + """Get the pipe library for a specific library_id.""" pass @abstractmethod - def get_domain_library(self, pipeline_run_id: str | None = None) -> DomainLibraryAbstract: + def get_required_domain(self, domain: str, library_id: str | None = None) -> Domain: + """Get a required domain from the specified library.""" pass @abstractmethod - def get_concept_library(self, pipeline_run_id: str | None = None) -> ConceptLibraryAbstract: + def get_required_concept(self, concept_string: str, library_id: str | None = None) -> Concept: + """Get a required concept from the specified library.""" pass @abstractmethod - def get_pipe_library(self, pipeline_run_id: str | None = None) -> PipeLibraryAbstract: + def get_required_pipe(self, pipe_code: str, library_id: str | None = None) -> PipeAbstract: + """Get a required pipe from the specified library.""" pass @abstractmethod - def validate_libraries(self, pipeline_run_id: str | None = None) -> None: + def validate_libraries(self, library_id: str | None = None) -> None: pass @abstractmethod def load_libraries( self, - pipeline_run_id: str | None = None, + library_id: str | None = None, library_dirs: list[Path] | None = None, library_file_paths: list[Path] | None = None, ) -> None: pass @abstractmethod - def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> list[PipeAbstract]: + def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> list[PipeAbstract]: pass @abstractmethod - def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, pipeline_run_id: str | None = None) -> None: + def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> None: pass diff --git a/pipelex/pipeline/execute.py b/pipelex/pipeline/execute.py index 42a29e2ba..7e7bb45f0 100644 --- a/pipelex/pipeline/execute.py +++ b/pipelex/pipeline/execute.py @@ -89,7 +89,7 @@ async def execute_pipeline( pipeline = get_pipeline_manager().add_new_pipeline() pipeline_run_id = pipeline.pipeline_run_id get_report_delegate().open_registry(pipeline_run_id=pipeline_run_id) - get_library_manager().open_library(pipeline_run_id=pipeline_run_id) + get_library_manager().open_library(library_id=pipeline_run_id) job_metadata = JobMetadata( pipeline_run_id=pipeline_run_id, From 2f0e2f239d44a524a46175c877235929224afe81 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 12:38:24 +0200 Subject: [PATCH 098/115] 0.12.0 --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 446721cf0..f48a4d807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pipelex" -version = "0.12.1" +version = "0.12.0" description = "Pipelex is an open-source dev tool based on a simple declarative language that lets you define replicable, structured, composable LLM pipelines." authors = [{ name = "Evotis S.A.S.", email = "evotis@pipelex.com" }] maintainers = [{ name = "Pipelex staff", email = "oss@pipelex.com" }] diff --git a/uv.lock b/uv.lock index 52fc92184..176a88254 100644 --- a/uv.lock +++ b/uv.lock @@ -2089,7 +2089,7 @@ wheels = [ [[package]] name = "pipelex" -version = "0.12.1" +version = "0.12.0" source = { editable = "." } dependencies = [ { name = "aiofiles" }, From 3551342d789d628e01bdd19f78297f5d509f728f Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 13:59:53 +0200 Subject: [PATCH 099/115] fix cocode --- pyproject.toml | 5 +- uv.lock | 712 ++++++++++++++++++++++++------------------------- 2 files changed, 344 insertions(+), 373 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f48a4d807..8efb6c6d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ docs = [ dev = [ "boto3-stubs>=1.35.24", - "cocode @ git+https://github.com/Pipelex/cocode.git@release/v0.3.0", + "cocode==0.3.0", "mypy>=1.11.2", "pyright>=1.1.405", "pylint>=3.3.8", @@ -84,9 +84,6 @@ dev = [ "types-PyYAML>=6.0.12.20250326", ] -[tool.uv.sources] -cocode = { path = "../cocode", editable = true } - [project.scripts] pipelex = "pipelex.cli._cli:app" diff --git a/uv.lock b/uv.lock index 176a88254..ae70caaf5 100644 --- a/uv.lock +++ b/uv.lock @@ -216,7 +216,7 @@ wheels = [ [[package]] name = "anthropic" -version = "0.69.0" +version = "0.70.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -228,9 +228,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c8/9d/9ad1778b95f15c5b04e7d328c1b5f558f1e893857b7c33cd288c19c0057a/anthropic-0.69.0.tar.gz", hash = "sha256:c604d287f4d73640f40bd2c0f3265a2eb6ce034217ead0608f6b07a8bc5ae5f2", size = 480622, upload-time = "2025-09-29T16:53:45.282Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/be/a80a8678d39d77b2325b1a32a55d62ca9dc376984a3d66d351229d37da9c/anthropic-0.70.0.tar.gz", hash = "sha256:24078275246636d9fd38c94bb8cf64799ce7fc6bbad379422b36fa86b3e4deee", size = 480930, upload-time = "2025-10-15T16:54:33.577Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/38/75129688de5637eb5b383e5f2b1570a5cc3aecafa4de422da8eea4b90a6c/anthropic-0.69.0-py3-none-any.whl", hash = "sha256:1f73193040f33f11e27c2cd6ec25f24fe7c3f193dc1c5cde6b7a08b18a16bcc5", size = 337265, upload-time = "2025-09-29T16:53:43.686Z" }, + { url = "https://files.pythonhosted.org/packages/a3/81/da287ba25b9f8a16d27e822b3f2dad6ddf005fba3e3696f5dce818383850/anthropic-0.70.0-py3-none-any.whl", hash = "sha256:fa7d0dee6f2b871faa7cd0b77f6047e8006d5863618804204cf34b1b95819971", size = 337327, upload-time = "2025-10-15T16:54:32.087Z" }, ] [[package]] @@ -335,16 +335,16 @@ wheels = [ [[package]] name = "boto3-stubs" -version = "1.40.52" +version = "1.40.53" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore-stubs" }, { name = "types-s3transfer" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/00/7b/92a266747a504c09c40a382364e5f041a58acc2959f2920aa9b4ccf5e9db/boto3_stubs-1.40.52.tar.gz", hash = "sha256:bd20a7bc9122bb1b939195431b9d3f540b1ef050103bc1720d786960907464fd", size = 100895, upload-time = "2025-10-14T20:45:42.254Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/12/f8233045332ecdbaa483fb70e0990836903b281f31034b4a216254c13bbf/boto3_stubs-1.40.53.tar.gz", hash = "sha256:2dbb49642124a0815d271dde96b9d24cd4a908c96bb7be3cdc631b53169a2d92", size = 100898, upload-time = "2025-10-15T19:47:48.502Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/43/cf/d08cd0df2639896db574490181c363fc491404607b867a1bf04a623c4a19/boto3_stubs-1.40.52-py3-none-any.whl", hash = "sha256:5e2b74b7b5ad71ca2b8c35a8d3bf1e4ef60317b1682b5e7dda9f16a1c0b43844", size = 69709, upload-time = "2025-10-14T20:45:33.777Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1c/5d4e0f66d3b743b7c42ae0db4db586e02f13eb55271e3ea588ffdf973390/boto3_stubs-1.40.53-py3-none-any.whl", hash = "sha256:2239af9c47940f05f6e03f6d392f5044ae77c75d05895bae770737d0ad4e2afe", size = 69708, upload-time = "2025-10-15T19:47:36.66Z" }, ] [[package]] @@ -363,14 +363,14 @@ wheels = [ [[package]] name = "botocore-stubs" -version = "1.40.52" +version = "1.40.53" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "types-awscrt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/70/e5/a3a7ad9b45e6612989bb52dd1b586a2e2b9539ce5b097c15d15a6b0d54cb/botocore_stubs-1.40.52.tar.gz", hash = "sha256:9e9809e563eb2c925051d849257bdc7605760bcbb62b3d5fe3117f9385345488", size = 42232, upload-time = "2025-10-14T21:21:00.995Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/e9/eb177f3e2fd2f133a98db76709ff79884446f7a6fa89fe924d686eca5449/botocore_stubs-1.40.53.tar.gz", hash = "sha256:c5cba2e1f1fb4b41945a73cdf999a5444481e4ad009ca54d2c0d96b0f473522f", size = 42223, upload-time = "2025-10-15T20:27:56.546Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/be/f0be116f4d2db93400a4c0c4b92e79200df562c8a4b5c1fc97349b6460c2/botocore_stubs-1.40.52-py3-none-any.whl", hash = "sha256:4f3e244aad34997cb88aa81c64eb0df23c19d3cbc9337366dfb7f992807d2e53", size = 66541, upload-time = "2025-10-14T21:20:58.699Z" }, + { url = "https://files.pythonhosted.org/packages/59/d8/6c0ee2b997cd8dcffb89871b086516ae79471b8f0ae68b46216114415de0/botocore_stubs-1.40.53-py3-none-any.whl", hash = "sha256:f076c4e34877a445be05a68de77298750292564dae8e19706576394921772f5f", size = 66541, upload-time = "2025-10-15T20:27:54.175Z" }, ] [[package]] @@ -576,38 +576,16 @@ wheels = [ [[package]] name = "cocode" -version = "0.2.3" -source = { editable = "../cocode" } +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pipelex", extra = ["anthropic", "bedrock", "google", "google-genai"] }, { name = "pygithub" }, ] - -[package.metadata] -requires-dist = [ - { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, - { name = "mkdocs", marker = "extra == 'docs'", specifier = "==1.6.1" }, - { name = "mkdocs-glightbox", marker = "extra == 'docs'", specifier = "==0.4.0" }, - { name = "mkdocs-material", marker = "extra == 'docs'", specifier = "==9.6.14" }, - { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = "==1.1.0" }, - { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, - { name = "pipelex", extras = ["anthropic", "google", "google-genai", "bedrock"], editable = "." }, - { name = "pygithub", specifier = "==2.4.0" }, - { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.405" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.3" }, - { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, - { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" }, - { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" }, - { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.6.8" }, - { name = "types-aioboto3", extras = ["bedrock", "bedrock-runtime"], marker = "extra == 'dev'", specifier = ">=13.4.0" }, - { name = "types-aiofiles", marker = "extra == 'dev'", specifier = ">=24.1.0.20240626" }, - { name = "types-markdown", marker = "extra == 'dev'", specifier = ">=3.6.0.20240316" }, - { name = "types-networkx", marker = "extra == 'dev'", specifier = ">=3.3.0.20241020" }, - { name = "types-openpyxl", marker = "extra == 'dev'", specifier = ">=3.1.5.20250306" }, - { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12.20250326" }, +sdist = { url = "https://files.pythonhosted.org/packages/c4/73/f22c655b473308a5346d0efdfccc9b0eaadd0cfe87f327689b255aa0f367/cocode-0.3.0.tar.gz", hash = "sha256:29816536cb93673c2c02616f7458c5a7a51321929e8a08790f2b7818c31b559d", size = 45715, upload-time = "2025-10-16T11:57:50.79Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/4b/0585da9c534159a3e18d75b303b8fe0b425efd7c4f82a9664283231f1f6c/cocode-0.3.0-py3-none-any.whl", hash = "sha256:2d485de7dcd38b090b9c1d8582f896afecc9a324d614e16f910f119e81fa2914", size = 64109, upload-time = "2025-10-16T11:57:49.569Z" }, ] -provides-extras = ["docs", "dev"] [[package]] name = "colorama" @@ -620,101 +598,101 @@ wheels = [ [[package]] name = "coverage" -version = "7.10.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/6c/3a3f7a46888e69d18abe3ccc6fe4cb16cccb1e6a2f99698931dafca489e6/coverage-7.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc04cc7a3db33664e0c2d10eb8990ff6b3536f6842c9590ae8da4c614b9ed05a", size = 217987, upload-time = "2025-09-21T20:00:57.218Z" }, - { url = "https://files.pythonhosted.org/packages/03/94/952d30f180b1a916c11a56f5c22d3535e943aa22430e9e3322447e520e1c/coverage-7.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e201e015644e207139f7e2351980feb7040e6f4b2c2978892f3e3789d1c125e5", size = 218388, upload-time = "2025-09-21T20:01:00.081Z" }, - { url = "https://files.pythonhosted.org/packages/50/2b/9e0cf8ded1e114bcd8b2fd42792b57f1c4e9e4ea1824cde2af93a67305be/coverage-7.10.7-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:240af60539987ced2c399809bd34f7c78e8abe0736af91c3d7d0e795df633d17", size = 245148, upload-time = "2025-09-21T20:01:01.768Z" }, - { url = "https://files.pythonhosted.org/packages/19/20/d0384ac06a6f908783d9b6aa6135e41b093971499ec488e47279f5b846e6/coverage-7.10.7-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8421e088bc051361b01c4b3a50fd39a4b9133079a2229978d9d30511fd05231b", size = 246958, upload-time = "2025-09-21T20:01:03.355Z" }, - { url = "https://files.pythonhosted.org/packages/60/83/5c283cff3d41285f8eab897651585db908a909c572bdc014bcfaf8a8b6ae/coverage-7.10.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6be8ed3039ae7f7ac5ce058c308484787c86e8437e72b30bf5e88b8ea10f3c87", size = 248819, upload-time = "2025-09-21T20:01:04.968Z" }, - { url = "https://files.pythonhosted.org/packages/60/22/02eb98fdc5ff79f423e990d877693e5310ae1eab6cb20ae0b0b9ac45b23b/coverage-7.10.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e28299d9f2e889e6d51b1f043f58d5f997c373cc12e6403b90df95b8b047c13e", size = 245754, upload-time = "2025-09-21T20:01:06.321Z" }, - { url = "https://files.pythonhosted.org/packages/b4/bc/25c83bcf3ad141b32cd7dc45485ef3c01a776ca3aa8ef0a93e77e8b5bc43/coverage-7.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e16bd7761c5e454f4efd36f345286d6f7c5fa111623c355691e2755cae3b9e", size = 246860, upload-time = "2025-09-21T20:01:07.605Z" }, - { url = "https://files.pythonhosted.org/packages/3c/b7/95574702888b58c0928a6e982038c596f9c34d52c5e5107f1eef729399b5/coverage-7.10.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b1c81d0e5e160651879755c9c675b974276f135558cf4ba79fee7b8413a515df", size = 244877, upload-time = "2025-09-21T20:01:08.829Z" }, - { url = "https://files.pythonhosted.org/packages/47/b6/40095c185f235e085df0e0b158f6bd68cc6e1d80ba6c7721dc81d97ec318/coverage-7.10.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:606cc265adc9aaedcc84f1f064f0e8736bc45814f15a357e30fca7ecc01504e0", size = 245108, upload-time = "2025-09-21T20:01:10.527Z" }, - { url = "https://files.pythonhosted.org/packages/c8/50/4aea0556da7a4b93ec9168420d170b55e2eb50ae21b25062513d020c6861/coverage-7.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:10b24412692df990dbc34f8fb1b6b13d236ace9dfdd68df5b28c2e39cafbba13", size = 245752, upload-time = "2025-09-21T20:01:11.857Z" }, - { url = "https://files.pythonhosted.org/packages/6a/28/ea1a84a60828177ae3b100cb6723838523369a44ec5742313ed7db3da160/coverage-7.10.7-cp310-cp310-win32.whl", hash = "sha256:b51dcd060f18c19290d9b8a9dd1e0181538df2ce0717f562fff6cf74d9fc0b5b", size = 220497, upload-time = "2025-09-21T20:01:13.459Z" }, - { url = "https://files.pythonhosted.org/packages/fc/1a/a81d46bbeb3c3fd97b9602ebaa411e076219a150489bcc2c025f151bd52d/coverage-7.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:3a622ac801b17198020f09af3eaf45666b344a0d69fc2a6ffe2ea83aeef1d807", size = 221392, upload-time = "2025-09-21T20:01:14.722Z" }, - { url = "https://files.pythonhosted.org/packages/d2/5d/c1a17867b0456f2e9ce2d8d4708a4c3a089947d0bec9c66cdf60c9e7739f/coverage-7.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a609f9c93113be646f44c2a0256d6ea375ad047005d7f57a5c15f614dc1b2f59", size = 218102, upload-time = "2025-09-21T20:01:16.089Z" }, - { url = "https://files.pythonhosted.org/packages/54/f0/514dcf4b4e3698b9a9077f084429681bf3aad2b4a72578f89d7f643eb506/coverage-7.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:65646bb0359386e07639c367a22cf9b5bf6304e8630b565d0626e2bdf329227a", size = 218505, upload-time = "2025-09-21T20:01:17.788Z" }, - { url = "https://files.pythonhosted.org/packages/20/f6/9626b81d17e2a4b25c63ac1b425ff307ecdeef03d67c9a147673ae40dc36/coverage-7.10.7-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5f33166f0dfcce728191f520bd2692914ec70fac2713f6bf3ce59c3deacb4699", size = 248898, upload-time = "2025-09-21T20:01:19.488Z" }, - { url = "https://files.pythonhosted.org/packages/b0/ef/bd8e719c2f7417ba03239052e099b76ea1130ac0cbb183ee1fcaa58aaff3/coverage-7.10.7-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35f5e3f9e455bb17831876048355dca0f758b6df22f49258cb5a91da23ef437d", size = 250831, upload-time = "2025-09-21T20:01:20.817Z" }, - { url = "https://files.pythonhosted.org/packages/a5/b6/bf054de41ec948b151ae2b79a55c107f5760979538f5fb80c195f2517718/coverage-7.10.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da86b6d62a496e908ac2898243920c7992499c1712ff7c2b6d837cc69d9467e", size = 252937, upload-time = "2025-09-21T20:01:22.171Z" }, - { url = "https://files.pythonhosted.org/packages/0f/e5/3860756aa6f9318227443c6ce4ed7bf9e70bb7f1447a0353f45ac5c7974b/coverage-7.10.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6b8b09c1fad947c84bbbc95eca841350fad9cbfa5a2d7ca88ac9f8d836c92e23", size = 249021, upload-time = "2025-09-21T20:01:23.907Z" }, - { url = "https://files.pythonhosted.org/packages/26/0f/bd08bd042854f7fd07b45808927ebcce99a7ed0f2f412d11629883517ac2/coverage-7.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4376538f36b533b46f8971d3a3e63464f2c7905c9800db97361c43a2b14792ab", size = 250626, upload-time = "2025-09-21T20:01:25.721Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a7/4777b14de4abcc2e80c6b1d430f5d51eb18ed1d75fca56cbce5f2db9b36e/coverage-7.10.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:121da30abb574f6ce6ae09840dae322bef734480ceafe410117627aa54f76d82", size = 248682, upload-time = "2025-09-21T20:01:27.105Z" }, - { url = "https://files.pythonhosted.org/packages/34/72/17d082b00b53cd45679bad682fac058b87f011fd8b9fe31d77f5f8d3a4e4/coverage-7.10.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:88127d40df529336a9836870436fc2751c339fbaed3a836d42c93f3e4bd1d0a2", size = 248402, upload-time = "2025-09-21T20:01:28.629Z" }, - { url = "https://files.pythonhosted.org/packages/81/7a/92367572eb5bdd6a84bfa278cc7e97db192f9f45b28c94a9ca1a921c3577/coverage-7.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba58bbcd1b72f136080c0bccc2400d66cc6115f3f906c499013d065ac33a4b61", size = 249320, upload-time = "2025-09-21T20:01:30.004Z" }, - { url = "https://files.pythonhosted.org/packages/2f/88/a23cc185f6a805dfc4fdf14a94016835eeb85e22ac3a0e66d5e89acd6462/coverage-7.10.7-cp311-cp311-win32.whl", hash = "sha256:972b9e3a4094b053a4e46832b4bc829fc8a8d347160eb39d03f1690316a99c14", size = 220536, upload-time = "2025-09-21T20:01:32.184Z" }, - { url = "https://files.pythonhosted.org/packages/fe/ef/0b510a399dfca17cec7bc2f05ad8bd78cf55f15c8bc9a73ab20c5c913c2e/coverage-7.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:a7b55a944a7f43892e28ad4bc0561dfd5f0d73e605d1aa5c3c976b52aea121d2", size = 221425, upload-time = "2025-09-21T20:01:33.557Z" }, - { url = "https://files.pythonhosted.org/packages/51/7f/023657f301a276e4ba1850f82749bc136f5a7e8768060c2e5d9744a22951/coverage-7.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:736f227fb490f03c6488f9b6d45855f8e0fd749c007f9303ad30efab0e73c05a", size = 220103, upload-time = "2025-09-21T20:01:34.929Z" }, - { url = "https://files.pythonhosted.org/packages/13/e4/eb12450f71b542a53972d19117ea5a5cea1cab3ac9e31b0b5d498df1bd5a/coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417", size = 218290, upload-time = "2025-09-21T20:01:36.455Z" }, - { url = "https://files.pythonhosted.org/packages/37/66/593f9be12fc19fb36711f19a5371af79a718537204d16ea1d36f16bd78d2/coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973", size = 218515, upload-time = "2025-09-21T20:01:37.982Z" }, - { url = "https://files.pythonhosted.org/packages/66/80/4c49f7ae09cafdacc73fbc30949ffe77359635c168f4e9ff33c9ebb07838/coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c", size = 250020, upload-time = "2025-09-21T20:01:39.617Z" }, - { url = "https://files.pythonhosted.org/packages/a6/90/a64aaacab3b37a17aaedd83e8000142561a29eb262cede42d94a67f7556b/coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7", size = 252769, upload-time = "2025-09-21T20:01:41.341Z" }, - { url = "https://files.pythonhosted.org/packages/98/2e/2dda59afd6103b342e096f246ebc5f87a3363b5412609946c120f4e7750d/coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6", size = 253901, upload-time = "2025-09-21T20:01:43.042Z" }, - { url = "https://files.pythonhosted.org/packages/53/dc/8d8119c9051d50f3119bb4a75f29f1e4a6ab9415cd1fa8bf22fcc3fb3b5f/coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59", size = 250413, upload-time = "2025-09-21T20:01:44.469Z" }, - { url = "https://files.pythonhosted.org/packages/98/b3/edaff9c5d79ee4d4b6d3fe046f2b1d799850425695b789d491a64225d493/coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b", size = 251820, upload-time = "2025-09-21T20:01:45.915Z" }, - { url = "https://files.pythonhosted.org/packages/11/25/9a0728564bb05863f7e513e5a594fe5ffef091b325437f5430e8cfb0d530/coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a", size = 249941, upload-time = "2025-09-21T20:01:47.296Z" }, - { url = "https://files.pythonhosted.org/packages/e0/fd/ca2650443bfbef5b0e74373aac4df67b08180d2f184b482c41499668e258/coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb", size = 249519, upload-time = "2025-09-21T20:01:48.73Z" }, - { url = "https://files.pythonhosted.org/packages/24/79/f692f125fb4299b6f963b0745124998ebb8e73ecdfce4ceceb06a8c6bec5/coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1", size = 251375, upload-time = "2025-09-21T20:01:50.529Z" }, - { url = "https://files.pythonhosted.org/packages/5e/75/61b9bbd6c7d24d896bfeec57acba78e0f8deac68e6baf2d4804f7aae1f88/coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256", size = 220699, upload-time = "2025-09-21T20:01:51.941Z" }, - { url = "https://files.pythonhosted.org/packages/ca/f3/3bf7905288b45b075918d372498f1cf845b5b579b723c8fd17168018d5f5/coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba", size = 221512, upload-time = "2025-09-21T20:01:53.481Z" }, - { url = "https://files.pythonhosted.org/packages/5c/44/3e32dbe933979d05cf2dac5e697c8599cfe038aaf51223ab901e208d5a62/coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf", size = 220147, upload-time = "2025-09-21T20:01:55.2Z" }, - { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" }, - { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" }, - { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" }, - { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" }, - { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" }, - { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" }, - { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" }, - { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" }, - { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" }, - { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" }, - { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" }, - { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" }, - { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" }, - { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" }, - { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" }, - { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" }, - { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" }, - { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" }, - { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" }, - { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" }, - { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" }, - { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" }, - { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" }, - { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" }, - { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" }, - { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" }, - { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" }, - { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" }, - { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" }, - { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" }, - { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" }, - { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" }, - { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" }, - { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" }, - { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" }, - { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" }, - { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" }, - { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" }, - { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" }, - { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" }, - { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" }, - { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" }, - { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" }, - { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" }, - { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" }, - { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" }, - { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" }, - { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" }, - { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" }, - { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" }, - { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" }, - { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" }, - { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" }, +version = "7.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/38/ee22495420457259d2f3390309505ea98f98a5eed40901cf62196abad006/coverage-7.11.0.tar.gz", hash = "sha256:167bd504ac1ca2af7ff3b81d245dfea0292c5032ebef9d66cc08a7d28c1b8050", size = 811905, upload-time = "2025-10-15T15:15:08.542Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/95/c49df0aceb5507a80b9fe5172d3d39bf23f05be40c23c8d77d556df96cec/coverage-7.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb53f1e8adeeb2e78962bade0c08bfdc461853c7969706ed901821e009b35e31", size = 215800, upload-time = "2025-10-15T15:12:19.824Z" }, + { url = "https://files.pythonhosted.org/packages/dc/c6/7bb46ce01ed634fff1d7bb53a54049f539971862cc388b304ff3c51b4f66/coverage-7.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9a03ec6cb9f40a5c360f138b88266fd8f58408d71e89f536b4f91d85721d075", size = 216198, upload-time = "2025-10-15T15:12:22.549Z" }, + { url = "https://files.pythonhosted.org/packages/94/b2/75d9d8fbf2900268aca5de29cd0a0fe671b0f69ef88be16767cc3c828b85/coverage-7.11.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d7f0616c557cbc3d1c2090334eddcbb70e1ae3a40b07222d62b3aa47f608fab", size = 242953, upload-time = "2025-10-15T15:12:24.139Z" }, + { url = "https://files.pythonhosted.org/packages/65/ac/acaa984c18f440170525a8743eb4b6c960ace2dbad80dc22056a437fc3c6/coverage-7.11.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e44a86a47bbdf83b0a3ea4d7df5410d6b1a0de984fbd805fa5101f3624b9abe0", size = 244766, upload-time = "2025-10-15T15:12:25.974Z" }, + { url = "https://files.pythonhosted.org/packages/d8/0d/938d0bff76dfa4a6b228c3fc4b3e1c0e2ad4aa6200c141fcda2bd1170227/coverage-7.11.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:596763d2f9a0ee7eec6e643e29660def2eef297e1de0d334c78c08706f1cb785", size = 246625, upload-time = "2025-10-15T15:12:27.387Z" }, + { url = "https://files.pythonhosted.org/packages/38/54/8f5f5e84bfa268df98f46b2cb396b1009734cfb1e5d6adb663d284893b32/coverage-7.11.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ef55537ff511b5e0a43edb4c50a7bf7ba1c3eea20b4f49b1490f1e8e0e42c591", size = 243568, upload-time = "2025-10-15T15:12:28.799Z" }, + { url = "https://files.pythonhosted.org/packages/68/30/8ba337c2877fe3f2e1af0ed7ff4be0c0c4aca44d6f4007040f3ca2255e99/coverage-7.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cbabd8f4d0d3dc571d77ae5bdbfa6afe5061e679a9d74b6797c48d143307088", size = 244665, upload-time = "2025-10-15T15:12:30.297Z" }, + { url = "https://files.pythonhosted.org/packages/cc/fb/c6f1d6d9a665536b7dde2333346f0cc41dc6a60bd1ffc10cd5c33e7eb000/coverage-7.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e24045453384e0ae2a587d562df2a04d852672eb63051d16096d3f08aa4c7c2f", size = 242681, upload-time = "2025-10-15T15:12:32.326Z" }, + { url = "https://files.pythonhosted.org/packages/be/38/1b532319af5f991fa153c20373291dc65c2bf532af7dbcffdeef745c8f79/coverage-7.11.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:7161edd3426c8d19bdccde7d49e6f27f748f3c31cc350c5de7c633fea445d866", size = 242912, upload-time = "2025-10-15T15:12:34.079Z" }, + { url = "https://files.pythonhosted.org/packages/67/3d/f39331c60ef6050d2a861dc1b514fa78f85f792820b68e8c04196ad733d6/coverage-7.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d4ed4de17e692ba6415b0587bc7f12bc80915031fc9db46a23ce70fc88c9841", size = 243559, upload-time = "2025-10-15T15:12:35.809Z" }, + { url = "https://files.pythonhosted.org/packages/4b/55/cb7c9df9d0495036ce582a8a2958d50c23cd73f84a23284bc23bd4711a6f/coverage-7.11.0-cp310-cp310-win32.whl", hash = "sha256:765c0bc8fe46f48e341ef737c91c715bd2a53a12792592296a095f0c237e09cf", size = 218266, upload-time = "2025-10-15T15:12:37.429Z" }, + { url = "https://files.pythonhosted.org/packages/68/a8/b79cb275fa7bd0208767f89d57a1b5f6ba830813875738599741b97c2e04/coverage-7.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:24d6f3128f1b2d20d84b24f4074475457faedc3d4613a7e66b5e769939c7d969", size = 219169, upload-time = "2025-10-15T15:12:39.25Z" }, + { url = "https://files.pythonhosted.org/packages/49/3a/ee1074c15c408ddddddb1db7dd904f6b81bc524e01f5a1c5920e13dbde23/coverage-7.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d58ecaa865c5b9fa56e35efc51d1014d4c0d22838815b9fce57a27dd9576847", size = 215912, upload-time = "2025-10-15T15:12:40.665Z" }, + { url = "https://files.pythonhosted.org/packages/70/c4/9f44bebe5cb15f31608597b037d78799cc5f450044465bcd1ae8cb222fe1/coverage-7.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b679e171f1c104a5668550ada700e3c4937110dbdd153b7ef9055c4f1a1ee3cc", size = 216310, upload-time = "2025-10-15T15:12:42.461Z" }, + { url = "https://files.pythonhosted.org/packages/42/01/5e06077cfef92d8af926bdd86b84fb28bf9bc6ad27343d68be9b501d89f2/coverage-7.11.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ca61691ba8c5b6797deb221a0d09d7470364733ea9c69425a640f1f01b7c5bf0", size = 246706, upload-time = "2025-10-15T15:12:44.001Z" }, + { url = "https://files.pythonhosted.org/packages/40/b8/7a3f1f33b35cc4a6c37e759137533119560d06c0cc14753d1a803be0cd4a/coverage-7.11.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:aef1747ede4bd8ca9cfc04cc3011516500c6891f1b33a94add3253f6f876b7b7", size = 248634, upload-time = "2025-10-15T15:12:45.768Z" }, + { url = "https://files.pythonhosted.org/packages/7a/41/7f987eb33de386bc4c665ab0bf98d15fcf203369d6aacae74f5dd8ec489a/coverage-7.11.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1839d08406e4cba2953dcc0ffb312252f14d7c4c96919f70167611f4dee2623", size = 250741, upload-time = "2025-10-15T15:12:47.222Z" }, + { url = "https://files.pythonhosted.org/packages/23/c1/a4e0ca6a4e83069fb8216b49b30a7352061ca0cb38654bd2dc96b7b3b7da/coverage-7.11.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e0eb0a2dcc62478eb5b4cbb80b97bdee852d7e280b90e81f11b407d0b81c4287", size = 246837, upload-time = "2025-10-15T15:12:48.904Z" }, + { url = "https://files.pythonhosted.org/packages/5d/03/ced062a17f7c38b4728ff76c3acb40d8465634b20b4833cdb3cc3a74e115/coverage-7.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fbea96343b53f65d5351d8fd3b34fd415a2670d7c300b06d3e14a5af4f552", size = 248429, upload-time = "2025-10-15T15:12:50.73Z" }, + { url = "https://files.pythonhosted.org/packages/97/af/a7c6f194bb8c5a2705ae019036b8fe7f49ea818d638eedb15fdb7bed227c/coverage-7.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:214b622259dd0cf435f10241f1333d32caa64dbc27f8790ab693428a141723de", size = 246490, upload-time = "2025-10-15T15:12:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/ab/c3/aab4df02b04a8fde79068c3c41ad7a622b0ef2b12e1ed154da986a727c3f/coverage-7.11.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:258d9967520cca899695d4eb7ea38be03f06951d6ca2f21fb48b1235f791e601", size = 246208, upload-time = "2025-10-15T15:12:54.586Z" }, + { url = "https://files.pythonhosted.org/packages/30/d8/e282ec19cd658238d60ed404f99ef2e45eed52e81b866ab1518c0d4163cf/coverage-7.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cf9e6ff4ca908ca15c157c409d608da77a56a09877b97c889b98fb2c32b6465e", size = 247126, upload-time = "2025-10-15T15:12:56.485Z" }, + { url = "https://files.pythonhosted.org/packages/d1/17/a635fa07fac23adb1a5451ec756216768c2767efaed2e4331710342a3399/coverage-7.11.0-cp311-cp311-win32.whl", hash = "sha256:fcc15fc462707b0680cff6242c48625da7f9a16a28a41bb8fd7a4280920e676c", size = 218314, upload-time = "2025-10-15T15:12:58.365Z" }, + { url = "https://files.pythonhosted.org/packages/2a/29/2ac1dfcdd4ab9a70026edc8d715ece9b4be9a1653075c658ee6f271f394d/coverage-7.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:865965bf955d92790f1facd64fe7ff73551bd2c1e7e6b26443934e9701ba30b9", size = 219203, upload-time = "2025-10-15T15:12:59.902Z" }, + { url = "https://files.pythonhosted.org/packages/03/21/5ce8b3a0133179115af4c041abf2ee652395837cb896614beb8ce8ddcfd9/coverage-7.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:5693e57a065760dcbeb292d60cc4d0231a6d4b6b6f6a3191561e1d5e8820b745", size = 217879, upload-time = "2025-10-15T15:13:01.35Z" }, + { url = "https://files.pythonhosted.org/packages/c4/db/86f6906a7c7edc1a52b2c6682d6dd9be775d73c0dfe2b84f8923dfea5784/coverage-7.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9c49e77811cf9d024b95faf86c3f059b11c0c9be0b0d61bc598f453703bd6fd1", size = 216098, upload-time = "2025-10-15T15:13:02.916Z" }, + { url = "https://files.pythonhosted.org/packages/21/54/e7b26157048c7ba555596aad8569ff903d6cd67867d41b75287323678ede/coverage-7.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a61e37a403a778e2cda2a6a39abcc895f1d984071942a41074b5c7ee31642007", size = 216331, upload-time = "2025-10-15T15:13:04.403Z" }, + { url = "https://files.pythonhosted.org/packages/b9/19/1ce6bf444f858b83a733171306134a0544eaddf1ca8851ede6540a55b2ad/coverage-7.11.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c79cae102bb3b1801e2ef1511fb50e91ec83a1ce466b2c7c25010d884336de46", size = 247825, upload-time = "2025-10-15T15:13:05.92Z" }, + { url = "https://files.pythonhosted.org/packages/71/0b/d3bcbbc259fcced5fb67c5d78f6e7ee965f49760c14afd931e9e663a83b2/coverage-7.11.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16ce17ceb5d211f320b62df002fa7016b7442ea0fd260c11cec8ce7730954893", size = 250573, upload-time = "2025-10-15T15:13:07.471Z" }, + { url = "https://files.pythonhosted.org/packages/58/8d/b0ff3641a320abb047258d36ed1c21d16be33beed4152628331a1baf3365/coverage-7.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80027673e9d0bd6aef86134b0771845e2da85755cf686e7c7c59566cf5a89115", size = 251706, upload-time = "2025-10-15T15:13:09.4Z" }, + { url = "https://files.pythonhosted.org/packages/59/c8/5a586fe8c7b0458053d9c687f5cff515a74b66c85931f7fe17a1c958b4ac/coverage-7.11.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4d3ffa07a08657306cd2215b0da53761c4d73cb54d9143b9303a6481ec0cd415", size = 248221, upload-time = "2025-10-15T15:13:10.964Z" }, + { url = "https://files.pythonhosted.org/packages/d0/ff/3a25e3132804ba44cfa9a778cdf2b73dbbe63ef4b0945e39602fc896ba52/coverage-7.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a3b6a5f8b2524fd6c1066bc85bfd97e78709bb5e37b5b94911a6506b65f47186", size = 249624, upload-time = "2025-10-15T15:13:12.5Z" }, + { url = "https://files.pythonhosted.org/packages/c5/12/ff10c8ce3895e1b17a73485ea79ebc1896a9e466a9d0f4aef63e0d17b718/coverage-7.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fcc0a4aa589de34bc56e1a80a740ee0f8c47611bdfb28cd1849de60660f3799d", size = 247744, upload-time = "2025-10-15T15:13:14.554Z" }, + { url = "https://files.pythonhosted.org/packages/16/02/d500b91f5471b2975947e0629b8980e5e90786fe316b6d7299852c1d793d/coverage-7.11.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dba82204769d78c3fd31b35c3d5f46e06511936c5019c39f98320e05b08f794d", size = 247325, upload-time = "2025-10-15T15:13:16.438Z" }, + { url = "https://files.pythonhosted.org/packages/77/11/dee0284fbbd9cd64cfce806b827452c6df3f100d9e66188e82dfe771d4af/coverage-7.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:81b335f03ba67309a95210caf3eb43bd6fe75a4e22ba653ef97b4696c56c7ec2", size = 249180, upload-time = "2025-10-15T15:13:17.959Z" }, + { url = "https://files.pythonhosted.org/packages/59/1b/cdf1def928f0a150a057cab03286774e73e29c2395f0d30ce3d9e9f8e697/coverage-7.11.0-cp312-cp312-win32.whl", hash = "sha256:037b2d064c2f8cc8716fe4d39cb705779af3fbf1ba318dc96a1af858888c7bb5", size = 218479, upload-time = "2025-10-15T15:13:19.608Z" }, + { url = "https://files.pythonhosted.org/packages/ff/55/e5884d55e031da9c15b94b90a23beccc9d6beee65e9835cd6da0a79e4f3a/coverage-7.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:d66c0104aec3b75e5fd897e7940188ea1892ca1d0235316bf89286d6a22568c0", size = 219290, upload-time = "2025-10-15T15:13:21.593Z" }, + { url = "https://files.pythonhosted.org/packages/23/a8/faa930cfc71c1d16bc78f9a19bb73700464f9c331d9e547bfbc1dbd3a108/coverage-7.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:d91ebeac603812a09cf6a886ba6e464f3bbb367411904ae3790dfe28311b15ad", size = 217924, upload-time = "2025-10-15T15:13:23.39Z" }, + { url = "https://files.pythonhosted.org/packages/60/7f/85e4dfe65e400645464b25c036a26ac226cf3a69d4a50c3934c532491cdd/coverage-7.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc3f49e65ea6e0d5d9bd60368684fe52a704d46f9e7fc413918f18d046ec40e1", size = 216129, upload-time = "2025-10-15T15:13:25.371Z" }, + { url = "https://files.pythonhosted.org/packages/96/5d/dc5fa98fea3c175caf9d360649cb1aa3715e391ab00dc78c4c66fabd7356/coverage-7.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f39ae2f63f37472c17b4990f794035c9890418b1b8cca75c01193f3c8d3e01be", size = 216380, upload-time = "2025-10-15T15:13:26.976Z" }, + { url = "https://files.pythonhosted.org/packages/b2/f5/3da9cc9596708273385189289c0e4d8197d37a386bdf17619013554b3447/coverage-7.11.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7db53b5cdd2917b6eaadd0b1251cf4e7d96f4a8d24e174bdbdf2f65b5ea7994d", size = 247375, upload-time = "2025-10-15T15:13:28.923Z" }, + { url = "https://files.pythonhosted.org/packages/65/6c/f7f59c342359a235559d2bc76b0c73cfc4bac7d61bb0df210965cb1ecffd/coverage-7.11.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10ad04ac3a122048688387828b4537bc9cf60c0bf4869c1e9989c46e45690b82", size = 249978, upload-time = "2025-10-15T15:13:30.525Z" }, + { url = "https://files.pythonhosted.org/packages/e7/8c/042dede2e23525e863bf1ccd2b92689692a148d8b5fd37c37899ba882645/coverage-7.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4036cc9c7983a2b1f2556d574d2eb2154ac6ed55114761685657e38782b23f52", size = 251253, upload-time = "2025-10-15T15:13:32.174Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a9/3c58df67bfa809a7bddd786356d9c5283e45d693edb5f3f55d0986dd905a/coverage-7.11.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7ab934dd13b1c5e94b692b1e01bd87e4488cb746e3a50f798cb9464fd128374b", size = 247591, upload-time = "2025-10-15T15:13:34.147Z" }, + { url = "https://files.pythonhosted.org/packages/26/5b/c7f32efd862ee0477a18c41e4761305de6ddd2d49cdeda0c1116227570fd/coverage-7.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59a6e5a265f7cfc05f76e3bb53eca2e0dfe90f05e07e849930fecd6abb8f40b4", size = 249411, upload-time = "2025-10-15T15:13:38.425Z" }, + { url = "https://files.pythonhosted.org/packages/76/b5/78cb4f1e86c1611431c990423ec0768122905b03837e1b4c6a6f388a858b/coverage-7.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df01d6c4c81e15a7c88337b795bb7595a8596e92310266b5072c7e301168efbd", size = 247303, upload-time = "2025-10-15T15:13:40.464Z" }, + { url = "https://files.pythonhosted.org/packages/87/c9/23c753a8641a330f45f221286e707c427e46d0ffd1719b080cedc984ec40/coverage-7.11.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8c934bd088eed6174210942761e38ee81d28c46de0132ebb1801dbe36a390dcc", size = 247157, upload-time = "2025-10-15T15:13:42.087Z" }, + { url = "https://files.pythonhosted.org/packages/c5/42/6e0cc71dc8a464486e944a4fa0d85bdec031cc2969e98ed41532a98336b9/coverage-7.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a03eaf7ec24078ad64a07f02e30060aaf22b91dedf31a6b24d0d98d2bba7f48", size = 248921, upload-time = "2025-10-15T15:13:43.715Z" }, + { url = "https://files.pythonhosted.org/packages/e8/1c/743c2ef665e6858cccb0f84377dfe3a4c25add51e8c7ef19249be92465b6/coverage-7.11.0-cp313-cp313-win32.whl", hash = "sha256:695340f698a5f56f795b2836abe6fb576e7c53d48cd155ad2f80fd24bc63a040", size = 218526, upload-time = "2025-10-15T15:13:45.336Z" }, + { url = "https://files.pythonhosted.org/packages/ff/d5/226daadfd1bf8ddbccefbd3aa3547d7b960fb48e1bdac124e2dd13a2b71a/coverage-7.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2727d47fce3ee2bac648528e41455d1b0c46395a087a229deac75e9f88ba5a05", size = 219317, upload-time = "2025-10-15T15:13:47.401Z" }, + { url = "https://files.pythonhosted.org/packages/97/54/47db81dcbe571a48a298f206183ba8a7ba79200a37cd0d9f4788fcd2af4a/coverage-7.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:0efa742f431529699712b92ecdf22de8ff198df41e43aeaaadf69973eb93f17a", size = 217948, upload-time = "2025-10-15T15:13:49.096Z" }, + { url = "https://files.pythonhosted.org/packages/e5/8b/cb68425420154e7e2a82fd779a8cc01549b6fa83c2ad3679cd6c088ebd07/coverage-7.11.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:587c38849b853b157706407e9ebdca8fd12f45869edb56defbef2daa5fb0812b", size = 216837, upload-time = "2025-10-15T15:13:51.09Z" }, + { url = "https://files.pythonhosted.org/packages/33/55/9d61b5765a025685e14659c8d07037247de6383c0385757544ffe4606475/coverage-7.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b971bdefdd75096163dd4261c74be813c4508477e39ff7b92191dea19f24cd37", size = 217061, upload-time = "2025-10-15T15:13:52.747Z" }, + { url = "https://files.pythonhosted.org/packages/52/85/292459c9186d70dcec6538f06ea251bc968046922497377bf4a1dc9a71de/coverage-7.11.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:269bfe913b7d5be12ab13a95f3a76da23cf147be7fa043933320ba5625f0a8de", size = 258398, upload-time = "2025-10-15T15:13:54.45Z" }, + { url = "https://files.pythonhosted.org/packages/1f/e2/46edd73fb8bf51446c41148d81944c54ed224854812b6ca549be25113ee0/coverage-7.11.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dadbcce51a10c07b7c72b0ce4a25e4b6dcb0c0372846afb8e5b6307a121eb99f", size = 260574, upload-time = "2025-10-15T15:13:56.145Z" }, + { url = "https://files.pythonhosted.org/packages/07/5e/1df469a19007ff82e2ca8fe509822820a31e251f80ee7344c34f6cd2ec43/coverage-7.11.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ed43fa22c6436f7957df036331f8fe4efa7af132054e1844918866cd228af6c", size = 262797, upload-time = "2025-10-15T15:13:58.635Z" }, + { url = "https://files.pythonhosted.org/packages/f9/50/de216b31a1434b94d9b34a964c09943c6be45069ec704bfc379d8d89a649/coverage-7.11.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9516add7256b6713ec08359b7b05aeff8850c98d357784c7205b2e60aa2513fa", size = 257361, upload-time = "2025-10-15T15:14:00.409Z" }, + { url = "https://files.pythonhosted.org/packages/82/1e/3f9f8344a48111e152e0fd495b6fff13cc743e771a6050abf1627a7ba918/coverage-7.11.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb92e47c92fcbcdc692f428da67db33337fa213756f7adb6a011f7b5a7a20740", size = 260349, upload-time = "2025-10-15T15:14:02.188Z" }, + { url = "https://files.pythonhosted.org/packages/65/9b/3f52741f9e7d82124272f3070bbe316006a7de1bad1093f88d59bfc6c548/coverage-7.11.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d06f4fc7acf3cabd6d74941d53329e06bab00a8fe10e4df2714f0b134bfc64ef", size = 258114, upload-time = "2025-10-15T15:14:03.907Z" }, + { url = "https://files.pythonhosted.org/packages/0b/8b/918f0e15f0365d50d3986bbd3338ca01178717ac5678301f3f547b6619e6/coverage-7.11.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:6fbcee1a8f056af07ecd344482f711f563a9eb1c2cad192e87df00338ec3cdb0", size = 256723, upload-time = "2025-10-15T15:14:06.324Z" }, + { url = "https://files.pythonhosted.org/packages/44/9e/7776829f82d3cf630878a7965a7d70cc6ca94f22c7d20ec4944f7148cb46/coverage-7.11.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dbbf012be5f32533a490709ad597ad8a8ff80c582a95adc8d62af664e532f9ca", size = 259238, upload-time = "2025-10-15T15:14:08.002Z" }, + { url = "https://files.pythonhosted.org/packages/9a/b8/49cf253e1e7a3bedb85199b201862dd7ca4859f75b6cf25ffa7298aa0760/coverage-7.11.0-cp313-cp313t-win32.whl", hash = "sha256:cee6291bb4fed184f1c2b663606a115c743df98a537c969c3c64b49989da96c2", size = 219180, upload-time = "2025-10-15T15:14:09.786Z" }, + { url = "https://files.pythonhosted.org/packages/ac/e1/1a541703826be7ae2125a0fb7f821af5729d56bb71e946e7b933cc7a89a4/coverage-7.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a386c1061bf98e7ea4758e4313c0ab5ecf57af341ef0f43a0bf26c2477b5c268", size = 220241, upload-time = "2025-10-15T15:14:11.471Z" }, + { url = "https://files.pythonhosted.org/packages/d5/d1/5ee0e0a08621140fd418ec4020f595b4d52d7eb429ae6a0c6542b4ba6f14/coverage-7.11.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f9ea02ef40bb83823b2b04964459d281688fe173e20643870bb5d2edf68bc836", size = 218510, upload-time = "2025-10-15T15:14:13.46Z" }, + { url = "https://files.pythonhosted.org/packages/f4/06/e923830c1985ce808e40a3fa3eb46c13350b3224b7da59757d37b6ce12b8/coverage-7.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c770885b28fb399aaf2a65bbd1c12bf6f307ffd112d6a76c5231a94276f0c497", size = 216110, upload-time = "2025-10-15T15:14:15.157Z" }, + { url = "https://files.pythonhosted.org/packages/42/82/cdeed03bfead45203fb651ed756dfb5266028f5f939e7f06efac4041dad5/coverage-7.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a3d0e2087dba64c86a6b254f43e12d264b636a39e88c5cc0a01a7c71bcfdab7e", size = 216395, upload-time = "2025-10-15T15:14:16.863Z" }, + { url = "https://files.pythonhosted.org/packages/fc/ba/e1c80caffc3199aa699813f73ff097bc2df7b31642bdbc7493600a8f1de5/coverage-7.11.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73feb83bb41c32811973b8565f3705caf01d928d972b72042b44e97c71fd70d1", size = 247433, upload-time = "2025-10-15T15:14:18.589Z" }, + { url = "https://files.pythonhosted.org/packages/80/c0/5b259b029694ce0a5bbc1548834c7ba3db41d3efd3474489d7efce4ceb18/coverage-7.11.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c6f31f281012235ad08f9a560976cc2fc9c95c17604ff3ab20120fe480169bca", size = 249970, upload-time = "2025-10-15T15:14:20.307Z" }, + { url = "https://files.pythonhosted.org/packages/8c/86/171b2b5e1aac7e2fd9b43f7158b987dbeb95f06d1fbecad54ad8163ae3e8/coverage-7.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9570ad567f880ef675673992222746a124b9595506826b210fbe0ce3f0499cd", size = 251324, upload-time = "2025-10-15T15:14:22.419Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7e/7e10414d343385b92024af3932a27a1caf75c6e27ee88ba211221ff1a145/coverage-7.11.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8badf70446042553a773547a61fecaa734b55dc738cacf20c56ab04b77425e43", size = 247445, upload-time = "2025-10-15T15:14:24.205Z" }, + { url = "https://files.pythonhosted.org/packages/c4/3b/e4f966b21f5be8c4bf86ad75ae94efa0de4c99c7bbb8114476323102e345/coverage-7.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a09c1211959903a479e389685b7feb8a17f59ec5a4ef9afde7650bd5eabc2777", size = 249324, upload-time = "2025-10-15T15:14:26.234Z" }, + { url = "https://files.pythonhosted.org/packages/00/a2/8479325576dfcd909244d0df215f077f47437ab852ab778cfa2f8bf4d954/coverage-7.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:5ef83b107f50db3f9ae40f69e34b3bd9337456c5a7fe3461c7abf8b75dd666a2", size = 247261, upload-time = "2025-10-15T15:14:28.42Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d8/3a9e2db19d94d65771d0f2e21a9ea587d11b831332a73622f901157cc24b/coverage-7.11.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f91f927a3215b8907e214af77200250bb6aae36eca3f760f89780d13e495388d", size = 247092, upload-time = "2025-10-15T15:14:30.784Z" }, + { url = "https://files.pythonhosted.org/packages/b3/b1/bbca3c472544f9e2ad2d5116b2379732957048be4b93a9c543fcd0207e5f/coverage-7.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbcd376716d6b7fbfeedd687a6c4be019c5a5671b35f804ba76a4c0a778cba4", size = 248755, upload-time = "2025-10-15T15:14:32.585Z" }, + { url = "https://files.pythonhosted.org/packages/89/49/638d5a45a6a0f00af53d6b637c87007eb2297042186334e9923a61aa8854/coverage-7.11.0-cp314-cp314-win32.whl", hash = "sha256:bab7ec4bb501743edc63609320aaec8cd9188b396354f482f4de4d40a9d10721", size = 218793, upload-time = "2025-10-15T15:14:34.972Z" }, + { url = "https://files.pythonhosted.org/packages/30/cc/b675a51f2d068adb3cdf3799212c662239b0ca27f4691d1fff81b92ea850/coverage-7.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d4ba9a449e9364a936a27322b20d32d8b166553bfe63059bd21527e681e2fad", size = 219587, upload-time = "2025-10-15T15:14:37.047Z" }, + { url = "https://files.pythonhosted.org/packages/93/98/5ac886876026de04f00820e5094fe22166b98dcb8b426bf6827aaf67048c/coverage-7.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:ce37f215223af94ef0f75ac68ea096f9f8e8c8ec7d6e8c346ee45c0d363f0479", size = 218168, upload-time = "2025-10-15T15:14:38.861Z" }, + { url = "https://files.pythonhosted.org/packages/14/d1/b4145d35b3e3ecf4d917e97fc8895bcf027d854879ba401d9ff0f533f997/coverage-7.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f413ce6e07e0d0dc9c433228727b619871532674b45165abafe201f200cc215f", size = 216850, upload-time = "2025-10-15T15:14:40.651Z" }, + { url = "https://files.pythonhosted.org/packages/ca/d1/7f645fc2eccd318369a8a9948acc447bb7c1ade2911e31d3c5620544c22b/coverage-7.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:05791e528a18f7072bf5998ba772fe29db4da1234c45c2087866b5ba4dea710e", size = 217071, upload-time = "2025-10-15T15:14:42.755Z" }, + { url = "https://files.pythonhosted.org/packages/54/7d/64d124649db2737ceced1dfcbdcb79898d5868d311730f622f8ecae84250/coverage-7.11.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cacb29f420cfeb9283b803263c3b9a068924474ff19ca126ba9103e1278dfa44", size = 258570, upload-time = "2025-10-15T15:14:44.542Z" }, + { url = "https://files.pythonhosted.org/packages/6c/3f/6f5922f80dc6f2d8b2c6f974835c43f53eb4257a7797727e6ca5b7b2ec1f/coverage-7.11.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314c24e700d7027ae3ab0d95fbf8d53544fca1f20345fd30cd219b737c6e58d3", size = 260738, upload-time = "2025-10-15T15:14:46.436Z" }, + { url = "https://files.pythonhosted.org/packages/0e/5f/9e883523c4647c860b3812b417a2017e361eca5b635ee658387dc11b13c1/coverage-7.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630d0bd7a293ad2fc8b4b94e5758c8b2536fdf36c05f1681270203e463cbfa9b", size = 262994, upload-time = "2025-10-15T15:14:48.3Z" }, + { url = "https://files.pythonhosted.org/packages/07/bb/43b5a8e94c09c8bf51743ffc65c4c841a4ca5d3ed191d0a6919c379a1b83/coverage-7.11.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e89641f5175d65e2dbb44db15fe4ea48fade5d5bbb9868fdc2b4fce22f4a469d", size = 257282, upload-time = "2025-10-15T15:14:50.236Z" }, + { url = "https://files.pythonhosted.org/packages/aa/e5/0ead8af411411330b928733e1d201384b39251a5f043c1612970310e8283/coverage-7.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c9f08ea03114a637dab06cedb2e914da9dc67fa52c6015c018ff43fdde25b9c2", size = 260430, upload-time = "2025-10-15T15:14:52.413Z" }, + { url = "https://files.pythonhosted.org/packages/ae/66/03dd8bb0ba5b971620dcaac145461950f6d8204953e535d2b20c6b65d729/coverage-7.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce9f3bde4e9b031eaf1eb61df95c1401427029ea1bfddb8621c1161dcb0fa02e", size = 258190, upload-time = "2025-10-15T15:14:54.268Z" }, + { url = "https://files.pythonhosted.org/packages/45/ae/28a9cce40bf3174426cb2f7e71ee172d98e7f6446dff936a7ccecee34b14/coverage-7.11.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:e4dc07e95495923d6fd4d6c27bf70769425b71c89053083843fd78f378558996", size = 256658, upload-time = "2025-10-15T15:14:56.436Z" }, + { url = "https://files.pythonhosted.org/packages/5c/7c/3a44234a8599513684bfc8684878fd7b126c2760f79712bb78c56f19efc4/coverage-7.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:424538266794db2861db4922b05d729ade0940ee69dcf0591ce8f69784db0e11", size = 259342, upload-time = "2025-10-15T15:14:58.538Z" }, + { url = "https://files.pythonhosted.org/packages/e1/e6/0108519cba871af0351725ebdb8660fd7a0fe2ba3850d56d32490c7d9b4b/coverage-7.11.0-cp314-cp314t-win32.whl", hash = "sha256:4c1eeb3fb8eb9e0190bebafd0462936f75717687117339f708f395fe455acc73", size = 219568, upload-time = "2025-10-15T15:15:00.382Z" }, + { url = "https://files.pythonhosted.org/packages/c9/76/44ba876e0942b4e62fdde23ccb029ddb16d19ba1bef081edd00857ba0b16/coverage-7.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b56efee146c98dbf2cf5cffc61b9829d1e94442df4d7398b26892a53992d3547", size = 220687, upload-time = "2025-10-15T15:15:02.322Z" }, + { url = "https://files.pythonhosted.org/packages/b9/0c/0df55ecb20d0d0ed5c322e10a441775e1a3a5d78c60f0c4e1abfe6fcf949/coverage-7.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:b5c2705afa83f49bd91962a4094b6b082f94aef7626365ab3f8f4bd159c5acf3", size = 218711, upload-time = "2025-10-15T15:15:04.575Z" }, + { url = "https://files.pythonhosted.org/packages/5f/04/642c1d8a448ae5ea1369eac8495740a79eb4e581a9fb0cbdce56bbf56da1/coverage-7.11.0-py3-none-any.whl", hash = "sha256:4b7589765348d78fb4e5fb6ea35d07564e387da2fc5efff62e0222971f155f68", size = 207761, upload-time = "2025-10-15T15:15:06.439Z" }, ] [package.optional-dependencies] @@ -724,67 +702,67 @@ toml = [ [[package]] name = "cryptography" -version = "46.0.2" +version = "46.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4a/9b/e301418629f7bfdf72db9e80ad6ed9d1b83c487c471803eaa6464c511a01/cryptography-46.0.2.tar.gz", hash = "sha256:21b6fc8c71a3f9a604f028a329e5560009cc4a3a828bfea5fcba8eb7647d88fe", size = 749293, upload-time = "2025-10-01T00:29:11.856Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/98/7a8df8c19a335c8028414738490fc3955c0cecbfdd37fcc1b9c3d04bd561/cryptography-46.0.2-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:f3e32ab7dd1b1ef67b9232c4cf5e2ee4cd517d4316ea910acaaa9c5712a1c663", size = 7261255, upload-time = "2025-10-01T00:27:22.947Z" }, - { url = "https://files.pythonhosted.org/packages/c6/38/b2adb2aa1baa6706adc3eb746691edd6f90a656a9a65c3509e274d15a2b8/cryptography-46.0.2-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1fd1a69086926b623ef8126b4c33d5399ce9e2f3fac07c9c734c2a4ec38b6d02", size = 4297596, upload-time = "2025-10-01T00:27:25.258Z" }, - { url = "https://files.pythonhosted.org/packages/e4/27/0f190ada240003119488ae66c897b5e97149292988f556aef4a6a2a57595/cryptography-46.0.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb7fb9cd44c2582aa5990cf61a4183e6f54eea3172e54963787ba47287edd135", size = 4450899, upload-time = "2025-10-01T00:27:27.458Z" }, - { url = "https://files.pythonhosted.org/packages/85/d5/e4744105ab02fdf6bb58ba9a816e23b7a633255987310b4187d6745533db/cryptography-46.0.2-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9066cfd7f146f291869a9898b01df1c9b0e314bfa182cef432043f13fc462c92", size = 4300382, upload-time = "2025-10-01T00:27:29.091Z" }, - { url = "https://files.pythonhosted.org/packages/33/fb/bf9571065c18c04818cb07de90c43fc042c7977c68e5de6876049559c72f/cryptography-46.0.2-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:97e83bf4f2f2c084d8dd792d13841d0a9b241643151686010866bbd076b19659", size = 4017347, upload-time = "2025-10-01T00:27:30.767Z" }, - { url = "https://files.pythonhosted.org/packages/35/72/fc51856b9b16155ca071080e1a3ad0c3a8e86616daf7eb018d9565b99baa/cryptography-46.0.2-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:4a766d2a5d8127364fd936572c6e6757682fc5dfcbdba1632d4554943199f2fa", size = 4983500, upload-time = "2025-10-01T00:27:32.741Z" }, - { url = "https://files.pythonhosted.org/packages/c1/53/0f51e926799025e31746d454ab2e36f8c3f0d41592bc65cb9840368d3275/cryptography-46.0.2-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:fab8f805e9675e61ed8538f192aad70500fa6afb33a8803932999b1049363a08", size = 4482591, upload-time = "2025-10-01T00:27:34.869Z" }, - { url = "https://files.pythonhosted.org/packages/86/96/4302af40b23ab8aa360862251fb8fc450b2a06ff24bc5e261c2007f27014/cryptography-46.0.2-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:1e3b6428a3d56043bff0bb85b41c535734204e599c1c0977e1d0f261b02f3ad5", size = 4300019, upload-time = "2025-10-01T00:27:37.029Z" }, - { url = "https://files.pythonhosted.org/packages/9b/59/0be12c7fcc4c5e34fe2b665a75bc20958473047a30d095a7657c218fa9e8/cryptography-46.0.2-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:1a88634851d9b8de8bb53726f4300ab191d3b2f42595e2581a54b26aba71b7cc", size = 4950006, upload-time = "2025-10-01T00:27:40.272Z" }, - { url = "https://files.pythonhosted.org/packages/55/1d/42fda47b0111834b49e31590ae14fd020594d5e4dadd639bce89ad790fba/cryptography-46.0.2-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:be939b99d4e091eec9a2bcf41aaf8f351f312cd19ff74b5c83480f08a8a43e0b", size = 4482088, upload-time = "2025-10-01T00:27:42.668Z" }, - { url = "https://files.pythonhosted.org/packages/17/50/60f583f69aa1602c2bdc7022dae86a0d2b837276182f8c1ec825feb9b874/cryptography-46.0.2-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f13b040649bc18e7eb37936009b24fd31ca095a5c647be8bb6aaf1761142bd1", size = 4425599, upload-time = "2025-10-01T00:27:44.616Z" }, - { url = "https://files.pythonhosted.org/packages/d1/57/d8d4134cd27e6e94cf44adb3f3489f935bde85f3a5508e1b5b43095b917d/cryptography-46.0.2-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bdc25e4e01b261a8fda4e98618f1c9515febcecebc9566ddf4a70c63967043b", size = 4697458, upload-time = "2025-10-01T00:27:46.209Z" }, - { url = "https://files.pythonhosted.org/packages/d1/2b/531e37408573e1da33adfb4c58875013ee8ac7d548d1548967d94a0ae5c4/cryptography-46.0.2-cp311-abi3-win32.whl", hash = "sha256:8b9bf67b11ef9e28f4d78ff88b04ed0929fcd0e4f70bb0f704cfc32a5c6311ee", size = 3056077, upload-time = "2025-10-01T00:27:48.424Z" }, - { url = "https://files.pythonhosted.org/packages/a8/cd/2f83cafd47ed2dc5a3a9c783ff5d764e9e70d3a160e0df9a9dcd639414ce/cryptography-46.0.2-cp311-abi3-win_amd64.whl", hash = "sha256:758cfc7f4c38c5c5274b55a57ef1910107436f4ae842478c4989abbd24bd5acb", size = 3512585, upload-time = "2025-10-01T00:27:50.521Z" }, - { url = "https://files.pythonhosted.org/packages/00/36/676f94e10bfaa5c5b86c469ff46d3e0663c5dc89542f7afbadac241a3ee4/cryptography-46.0.2-cp311-abi3-win_arm64.whl", hash = "sha256:218abd64a2e72f8472c2102febb596793347a3e65fafbb4ad50519969da44470", size = 2927474, upload-time = "2025-10-01T00:27:52.91Z" }, - { url = "https://files.pythonhosted.org/packages/6f/cc/47fc6223a341f26d103cb6da2216805e08a37d3b52bee7f3b2aee8066f95/cryptography-46.0.2-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:bda55e8dbe8533937956c996beaa20266a8eca3570402e52ae52ed60de1faca8", size = 7198626, upload-time = "2025-10-01T00:27:54.8Z" }, - { url = "https://files.pythonhosted.org/packages/93/22/d66a8591207c28bbe4ac7afa25c4656dc19dc0db29a219f9809205639ede/cryptography-46.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e7155c0b004e936d381b15425273aee1cebc94f879c0ce82b0d7fecbf755d53a", size = 4287584, upload-time = "2025-10-01T00:27:57.018Z" }, - { url = "https://files.pythonhosted.org/packages/8c/3e/fac3ab6302b928e0398c269eddab5978e6c1c50b2b77bb5365ffa8633b37/cryptography-46.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a61c154cc5488272a6c4b86e8d5beff4639cdb173d75325ce464d723cda0052b", size = 4433796, upload-time = "2025-10-01T00:27:58.631Z" }, - { url = "https://files.pythonhosted.org/packages/7d/d8/24392e5d3c58e2d83f98fe5a2322ae343360ec5b5b93fe18bc52e47298f5/cryptography-46.0.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9ec3f2e2173f36a9679d3b06d3d01121ab9b57c979de1e6a244b98d51fea1b20", size = 4292126, upload-time = "2025-10-01T00:28:00.643Z" }, - { url = "https://files.pythonhosted.org/packages/ed/38/3d9f9359b84c16c49a5a336ee8be8d322072a09fac17e737f3bb11f1ce64/cryptography-46.0.2-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2fafb6aa24e702bbf74de4cb23bfa2c3beb7ab7683a299062b69724c92e0fa73", size = 3993056, upload-time = "2025-10-01T00:28:02.8Z" }, - { url = "https://files.pythonhosted.org/packages/d6/a3/4c44fce0d49a4703cc94bfbe705adebf7ab36efe978053742957bc7ec324/cryptography-46.0.2-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:0c7ffe8c9b1fcbb07a26d7c9fa5e857c2fe80d72d7b9e0353dcf1d2180ae60ee", size = 4967604, upload-time = "2025-10-01T00:28:04.783Z" }, - { url = "https://files.pythonhosted.org/packages/eb/c2/49d73218747c8cac16bb8318a5513fde3129e06a018af3bc4dc722aa4a98/cryptography-46.0.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:5840f05518caa86b09d23f8b9405a7b6d5400085aa14a72a98fdf5cf1568c0d2", size = 4465367, upload-time = "2025-10-01T00:28:06.864Z" }, - { url = "https://files.pythonhosted.org/packages/1b/64/9afa7d2ee742f55ca6285a54386ed2778556a4ed8871571cb1c1bfd8db9e/cryptography-46.0.2-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:27c53b4f6a682a1b645fbf1cd5058c72cf2f5aeba7d74314c36838c7cbc06e0f", size = 4291678, upload-time = "2025-10-01T00:28:08.982Z" }, - { url = "https://files.pythonhosted.org/packages/50/48/1696d5ea9623a7b72ace87608f6899ca3c331709ac7ebf80740abb8ac673/cryptography-46.0.2-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:512c0250065e0a6b286b2db4bbcc2e67d810acd53eb81733e71314340366279e", size = 4931366, upload-time = "2025-10-01T00:28:10.74Z" }, - { url = "https://files.pythonhosted.org/packages/eb/3c/9dfc778401a334db3b24435ee0733dd005aefb74afe036e2d154547cb917/cryptography-46.0.2-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:07c0eb6657c0e9cca5891f4e35081dbf985c8131825e21d99b4f440a8f496f36", size = 4464738, upload-time = "2025-10-01T00:28:12.491Z" }, - { url = "https://files.pythonhosted.org/packages/dc/b1/abcde62072b8f3fd414e191a6238ce55a0050e9738090dc6cded24c12036/cryptography-46.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:48b983089378f50cba258f7f7aa28198c3f6e13e607eaf10472c26320332ca9a", size = 4419305, upload-time = "2025-10-01T00:28:14.145Z" }, - { url = "https://files.pythonhosted.org/packages/c7/1f/3d2228492f9391395ca34c677e8f2571fb5370fe13dc48c1014f8c509864/cryptography-46.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e6f6775eaaa08c0eec73e301f7592f4367ccde5e4e4df8e58320f2ebf161ea2c", size = 4681201, upload-time = "2025-10-01T00:28:15.951Z" }, - { url = "https://files.pythonhosted.org/packages/de/77/b687745804a93a55054f391528fcfc76c3d6bfd082ce9fb62c12f0d29fc1/cryptography-46.0.2-cp314-cp314t-win32.whl", hash = "sha256:e8633996579961f9b5a3008683344c2558d38420029d3c0bc7ff77c17949a4e1", size = 3022492, upload-time = "2025-10-01T00:28:17.643Z" }, - { url = "https://files.pythonhosted.org/packages/60/a5/8d498ef2996e583de0bef1dcc5e70186376f00883ae27bf2133f490adf21/cryptography-46.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:48c01988ecbb32979bb98731f5c2b2f79042a6c58cc9a319c8c2f9987c7f68f9", size = 3496215, upload-time = "2025-10-01T00:28:19.272Z" }, - { url = "https://files.pythonhosted.org/packages/56/db/ee67aaef459a2706bc302b15889a1a8126ebe66877bab1487ae6ad00f33d/cryptography-46.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:8e2ad4d1a5899b7caa3a450e33ee2734be7cc0689010964703a7c4bcc8dd4fd0", size = 2919255, upload-time = "2025-10-01T00:28:21.115Z" }, - { url = "https://files.pythonhosted.org/packages/d5/bb/fa95abcf147a1b0bb94d95f53fbb09da77b24c776c5d87d36f3d94521d2c/cryptography-46.0.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a08e7401a94c002e79dc3bc5231b6558cd4b2280ee525c4673f650a37e2c7685", size = 7248090, upload-time = "2025-10-01T00:28:22.846Z" }, - { url = "https://files.pythonhosted.org/packages/b7/66/f42071ce0e3ffbfa80a88feadb209c779fda92a23fbc1e14f74ebf72ef6b/cryptography-46.0.2-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d30bc11d35743bf4ddf76674a0a369ec8a21f87aaa09b0661b04c5f6c46e8d7b", size = 4293123, upload-time = "2025-10-01T00:28:25.072Z" }, - { url = "https://files.pythonhosted.org/packages/a8/5d/1fdbd2e5c1ba822828d250e5a966622ef00185e476d1cd2726b6dd135e53/cryptography-46.0.2-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bca3f0ce67e5a2a2cf524e86f44697c4323a86e0fd7ba857de1c30d52c11ede1", size = 4439524, upload-time = "2025-10-01T00:28:26.808Z" }, - { url = "https://files.pythonhosted.org/packages/c8/c1/5e4989a7d102d4306053770d60f978c7b6b1ea2ff8c06e0265e305b23516/cryptography-46.0.2-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ff798ad7a957a5021dcbab78dfff681f0cf15744d0e6af62bd6746984d9c9e9c", size = 4297264, upload-time = "2025-10-01T00:28:29.327Z" }, - { url = "https://files.pythonhosted.org/packages/28/78/b56f847d220cb1d6d6aef5a390e116ad603ce13a0945a3386a33abc80385/cryptography-46.0.2-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:cb5e8daac840e8879407acbe689a174f5ebaf344a062f8918e526824eb5d97af", size = 4011872, upload-time = "2025-10-01T00:28:31.479Z" }, - { url = "https://files.pythonhosted.org/packages/e1/80/2971f214b066b888944f7b57761bf709ee3f2cf805619a18b18cab9b263c/cryptography-46.0.2-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:3f37aa12b2d91e157827d90ce78f6180f0c02319468a0aea86ab5a9566da644b", size = 4978458, upload-time = "2025-10-01T00:28:33.267Z" }, - { url = "https://files.pythonhosted.org/packages/a5/84/0cb0a2beaa4f1cbe63ebec4e97cd7e0e9f835d0ba5ee143ed2523a1e0016/cryptography-46.0.2-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5e38f203160a48b93010b07493c15f2babb4e0f2319bbd001885adb3f3696d21", size = 4472195, upload-time = "2025-10-01T00:28:36.039Z" }, - { url = "https://files.pythonhosted.org/packages/30/8b/2b542ddbf78835c7cd67b6fa79e95560023481213a060b92352a61a10efe/cryptography-46.0.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d19f5f48883752b5ab34cff9e2f7e4a7f216296f33714e77d1beb03d108632b6", size = 4296791, upload-time = "2025-10-01T00:28:37.732Z" }, - { url = "https://files.pythonhosted.org/packages/78/12/9065b40201b4f4876e93b9b94d91feb18de9150d60bd842a16a21565007f/cryptography-46.0.2-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:04911b149eae142ccd8c9a68892a70c21613864afb47aba92d8c7ed9cc001023", size = 4939629, upload-time = "2025-10-01T00:28:39.654Z" }, - { url = "https://files.pythonhosted.org/packages/f6/9e/6507dc048c1b1530d372c483dfd34e7709fc542765015425f0442b08547f/cryptography-46.0.2-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:8b16c1ede6a937c291d41176934268e4ccac2c6521c69d3f5961c5a1e11e039e", size = 4471988, upload-time = "2025-10-01T00:28:41.822Z" }, - { url = "https://files.pythonhosted.org/packages/b1/86/d025584a5f7d5c5ec8d3633dbcdce83a0cd579f1141ceada7817a4c26934/cryptography-46.0.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:747b6f4a4a23d5a215aadd1d0b12233b4119c4313df83ab4137631d43672cc90", size = 4422989, upload-time = "2025-10-01T00:28:43.608Z" }, - { url = "https://files.pythonhosted.org/packages/4b/39/536370418b38a15a61bbe413006b79dfc3d2b4b0eafceb5581983f973c15/cryptography-46.0.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b275e398ab3a7905e168c036aad54b5969d63d3d9099a0a66cc147a3cc983be", size = 4685578, upload-time = "2025-10-01T00:28:45.361Z" }, - { url = "https://files.pythonhosted.org/packages/15/52/ea7e2b1910f547baed566c866fbb86de2402e501a89ecb4871ea7f169a81/cryptography-46.0.2-cp38-abi3-win32.whl", hash = "sha256:0b507c8e033307e37af61cb9f7159b416173bdf5b41d11c4df2e499a1d8e007c", size = 3036711, upload-time = "2025-10-01T00:28:47.096Z" }, - { url = "https://files.pythonhosted.org/packages/71/9e/171f40f9c70a873e73c2efcdbe91e1d4b1777a03398fa1c4af3c56a2477a/cryptography-46.0.2-cp38-abi3-win_amd64.whl", hash = "sha256:f9b2dc7668418fb6f221e4bf701f716e05e8eadb4f1988a2487b11aedf8abe62", size = 3500007, upload-time = "2025-10-01T00:28:48.967Z" }, - { url = "https://files.pythonhosted.org/packages/3e/7c/15ad426257615f9be8caf7f97990cf3dcbb5b8dd7ed7e0db581a1c4759dd/cryptography-46.0.2-cp38-abi3-win_arm64.whl", hash = "sha256:91447f2b17e83c9e0c89f133119d83f94ce6e0fb55dd47da0a959316e6e9cfa1", size = 2918153, upload-time = "2025-10-01T00:28:51.003Z" }, - { url = "https://files.pythonhosted.org/packages/25/b2/067a7db693488f19777ecf73f925bcb6a3efa2eae42355bafaafa37a6588/cryptography-46.0.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f25a41f5b34b371a06dad3f01799706631331adc7d6c05253f5bca22068c7a34", size = 3701860, upload-time = "2025-10-01T00:28:53.003Z" }, - { url = "https://files.pythonhosted.org/packages/87/12/47c2aab2c285f97c71a791169529dbb89f48fc12e5f62bb6525c3927a1a2/cryptography-46.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e12b61e0b86611e3f4c1756686d9086c1d36e6fd15326f5658112ad1f1cc8807", size = 3429917, upload-time = "2025-10-01T00:28:55.03Z" }, - { url = "https://files.pythonhosted.org/packages/b7/8c/1aabe338149a7d0f52c3e30f2880b20027ca2a485316756ed6f000462db3/cryptography-46.0.2-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1d3b3edd145953832e09607986f2bd86f85d1dc9c48ced41808b18009d9f30e5", size = 3714495, upload-time = "2025-10-01T00:28:57.222Z" }, - { url = "https://files.pythonhosted.org/packages/e3/0a/0d10eb970fe3e57da9e9ddcfd9464c76f42baf7b3d0db4a782d6746f788f/cryptography-46.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:fe245cf4a73c20592f0f48da39748b3513db114465be78f0a36da847221bd1b4", size = 4243379, upload-time = "2025-10-01T00:28:58.989Z" }, - { url = "https://files.pythonhosted.org/packages/7d/60/e274b4d41a9eb82538b39950a74ef06e9e4d723cb998044635d9deb1b435/cryptography-46.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2b9cad9cf71d0c45566624ff76654e9bae5f8a25970c250a26ccfc73f8553e2d", size = 4409533, upload-time = "2025-10-01T00:29:00.785Z" }, - { url = "https://files.pythonhosted.org/packages/19/9a/fb8548f762b4749aebd13b57b8f865de80258083fe814957f9b0619cfc56/cryptography-46.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9bd26f2f75a925fdf5e0a446c0de2714f17819bf560b44b7480e4dd632ad6c46", size = 4243120, upload-time = "2025-10-01T00:29:02.515Z" }, - { url = "https://files.pythonhosted.org/packages/71/60/883f24147fd4a0c5cab74ac7e36a1ff3094a54ba5c3a6253d2ff4b19255b/cryptography-46.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:7282d8f092b5be7172d6472f29b0631f39f18512a3642aefe52c3c0e0ccfad5a", size = 4408940, upload-time = "2025-10-01T00:29:04.42Z" }, - { url = "https://files.pythonhosted.org/packages/d9/b5/c5e179772ec38adb1c072b3aa13937d2860509ba32b2462bf1dda153833b/cryptography-46.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c4b93af7920cdf80f71650769464ccf1fb49a4b56ae0024173c24c48eb6b1612", size = 3438518, upload-time = "2025-10-01T00:29:06.139Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004, upload-time = "2025-10-15T23:16:52.239Z" }, + { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" }, + { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" }, + { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" }, + { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" }, + { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" }, + { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" }, + { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" }, + { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" }, + { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" }, + { url = "https://files.pythonhosted.org/packages/96/92/8a6a9525893325fc057a01f654d7efc2c64b9de90413adcf605a85744ff4/cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018", size = 3055988, upload-time = "2025-10-15T23:17:14.65Z" }, + { url = "https://files.pythonhosted.org/packages/7e/bf/80fbf45253ea585a1e492a6a17efcb93467701fa79e71550a430c5e60df0/cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb", size = 3514451, upload-time = "2025-10-15T23:17:16.142Z" }, + { url = "https://files.pythonhosted.org/packages/2e/af/9b302da4c87b0beb9db4e756386a7c6c5b8003cd0e742277888d352ae91d/cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c", size = 2928007, upload-time = "2025-10-15T23:17:18.04Z" }, + { url = "https://files.pythonhosted.org/packages/f5/e2/a510aa736755bffa9d2f75029c229111a1d02f8ecd5de03078f4c18d91a3/cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217", size = 7158012, upload-time = "2025-10-15T23:17:19.982Z" }, + { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" }, + { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" }, + { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" }, + { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" }, + { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" }, + { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" }, + { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" }, + { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" }, + { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" }, + { url = "https://files.pythonhosted.org/packages/fd/30/27654c1dbaf7e4a3531fa1fc77986d04aefa4d6d78259a62c9dc13d7ad36/cryptography-46.0.3-cp314-cp314t-win32.whl", hash = "sha256:8a6e050cb6164d3f830453754094c086ff2d0b2f3a897a1d9820f6139a1f0914", size = 3022339, upload-time = "2025-10-15T23:17:40.888Z" }, + { url = "https://files.pythonhosted.org/packages/f6/30/640f34ccd4d2a1bc88367b54b926b781b5a018d65f404d409aba76a84b1c/cryptography-46.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:760f83faa07f8b64e9c33fc963d790a2edb24efb479e3520c14a45741cd9b2db", size = 3494315, upload-time = "2025-10-15T23:17:42.769Z" }, + { url = "https://files.pythonhosted.org/packages/ba/8b/88cc7e3bd0a8e7b861f26981f7b820e1f46aa9d26cc482d0feba0ecb4919/cryptography-46.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:516ea134e703e9fe26bcd1277a4b59ad30586ea90c365a87781d7887a646fe21", size = 2919331, upload-time = "2025-10-15T23:17:44.468Z" }, + { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248, upload-time = "2025-10-15T23:17:46.294Z" }, + { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" }, + { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" }, + { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" }, + { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" }, + { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" }, + { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" }, + { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" }, + { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6e/1c8331ddf91ca4730ab3086a0f1be19c65510a33b5a441cb334e7a2d2560/cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df", size = 3036695, upload-time = "2025-10-15T23:18:08.672Z" }, + { url = "https://files.pythonhosted.org/packages/90/45/b0d691df20633eff80955a0fc7695ff9051ffce8b69741444bd9ed7bd0db/cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f", size = 3501720, upload-time = "2025-10-15T23:18:10.632Z" }, + { url = "https://files.pythonhosted.org/packages/e8/cb/2da4cc83f5edb9c3257d09e1e7ab7b23f049c7962cae8d842bbef0a9cec9/cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372", size = 2918740, upload-time = "2025-10-15T23:18:12.277Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cd/1a8633802d766a0fa46f382a77e096d7e209e0817892929655fe0586ae32/cryptography-46.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a23582810fedb8c0bc47524558fb6c56aac3fc252cb306072fd2815da2a47c32", size = 3689163, upload-time = "2025-10-15T23:18:13.821Z" }, + { url = "https://files.pythonhosted.org/packages/4c/59/6b26512964ace6480c3e54681a9859c974172fb141c38df11eadd8416947/cryptography-46.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e7aec276d68421f9574040c26e2a7c3771060bc0cff408bae1dcb19d3ab1e63c", size = 3429474, upload-time = "2025-10-15T23:18:15.477Z" }, + { url = "https://files.pythonhosted.org/packages/06/8a/e60e46adab4362a682cf142c7dcb5bf79b782ab2199b0dcb81f55970807f/cryptography-46.0.3-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7ce938a99998ed3c8aa7e7272dca1a610401ede816d36d0693907d863b10d9ea", size = 3698132, upload-time = "2025-10-15T23:18:17.056Z" }, + { url = "https://files.pythonhosted.org/packages/da/38/f59940ec4ee91e93d3311f7532671a5cef5570eb04a144bf203b58552d11/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:191bb60a7be5e6f54e30ba16fdfae78ad3a342a0599eb4193ba88e3f3d6e185b", size = 4243992, upload-time = "2025-10-15T23:18:18.695Z" }, + { url = "https://files.pythonhosted.org/packages/b0/0c/35b3d92ddebfdfda76bb485738306545817253d0a3ded0bfe80ef8e67aa5/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c70cc23f12726be8f8bc72e41d5065d77e4515efae3690326764ea1b07845cfb", size = 4409944, upload-time = "2025-10-15T23:18:20.597Z" }, + { url = "https://files.pythonhosted.org/packages/99/55/181022996c4063fc0e7666a47049a1ca705abb9c8a13830f074edb347495/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9394673a9f4de09e28b5356e7fff97d778f8abad85c9d5ac4a4b7e25a0de7717", size = 4242957, upload-time = "2025-10-15T23:18:22.18Z" }, + { url = "https://files.pythonhosted.org/packages/ba/af/72cd6ef29f9c5f731251acadaeb821559fe25f10852f44a63374c9ca08c1/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94cd0549accc38d1494e1f8de71eca837d0509d0d44bf11d158524b0e12cebf9", size = 4409447, upload-time = "2025-10-15T23:18:24.209Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c3/e90f4a4feae6410f914f8ebac129b9ae7a8c92eb60a638012dde42030a9d/cryptography-46.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6b5063083824e5509fdba180721d55909ffacccc8adbec85268b48439423d78c", size = 3438528, upload-time = "2025-10-15T23:18:26.227Z" }, ] [[package]] @@ -879,15 +857,15 @@ wheels = [ [[package]] name = "fal-client" -version = "0.8.0" +version = "0.8.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "httpx-sse" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5c/62/6ddf0e206127975490d2f0bdb7c3371d6a338ad755caf700f3ad18215551/fal_client-0.8.0.tar.gz", hash = "sha256:86f521e0dfeff26e5fd6b92bdfaec1503e016f58789ee8e0c0c5298a6006ccf2", size = 14769, upload-time = "2025-09-30T22:41:29.3Z" } +sdist = { url = "https://files.pythonhosted.org/packages/15/21/6a199b0e5933a93ecb69ca919aa58e62682ff4619e5cdf9e5b17becebeeb/fal_client-0.8.1.tar.gz", hash = "sha256:2c12cb0b0a327f4aa9c24fc7952722acf7aa059a1421a1c53283466f3bc07353", size = 15795, upload-time = "2025-10-15T19:33:09.608Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/e7/1cc6e54c105310246f6c8d9e1199c5e7b06a255af164c438449617618e89/fal_client-0.8.0-py3-none-any.whl", hash = "sha256:f2fd9801a7b67b075e388f845065c1f2969f477267233311d34cd07039cab77d", size = 10814, upload-time = "2025-09-30T22:41:28.081Z" }, + { url = "https://files.pythonhosted.org/packages/45/dc/0012b95c05448264329ba71ad568e440f12b7f5acdf2ddc09fa1aec42e0d/fal_client-0.8.1-py3-none-any.whl", hash = "sha256:ab37063f2b35ca6fad06f75fe45b05b72c774ad7590e0f93d47a8f6ad5e1f9db", size = 10912, upload-time = "2025-10-15T19:33:07.797Z" }, ] [[package]] @@ -1061,7 +1039,7 @@ wheels = [ [[package]] name = "google-genai" -version = "1.44.0" +version = "1.45.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1073,9 +1051,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/c4/7edde80ee4c0622f740008f927a11f8c8b0c7d6457f219fc698c27a3a377/google_genai-1.44.0.tar.gz", hash = "sha256:7df8c42505900714fea98ed0d03c06ed18323368ef9dceff74645631fd7a7650", size = 236659, upload-time = "2025-10-15T03:32:49.653Z" } +sdist = { url = "https://files.pythonhosted.org/packages/91/77/776b92f6f7cf7d7d3bc77b44a323605ae0f94f807cf9a4977c90d296b6b4/google_genai-1.45.0.tar.gz", hash = "sha256:96ec32ae99a30b5a1b54cb874b577ec6e41b5d5b808bf0f10ed4620e867f9386", size = 238198, upload-time = "2025-10-15T23:03:07.713Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/4d/1f64941ec95c004c290db6287c1e8a0906ed00a0656075ad705cecb12e7d/google_genai-1.44.0-py3-none-any.whl", hash = "sha256:4732ffd56e5f7c89f2440e0baf9a78b91b01a2a22db1b451a60b4417b3bcfd74", size = 237302, upload-time = "2025-10-15T03:32:47.839Z" }, + { url = "https://files.pythonhosted.org/packages/11/8f/922116dabe3d0312f08903d324db6ac9d406832cf57707550bc61151d91b/google_genai-1.45.0-py3-none-any.whl", hash = "sha256:e755295063e5fd5a4c44acff782a569e37fa8f76a6c75d0ede3375c70d916b7f", size = 238495, upload-time = "2025-10-15T23:03:05.926Z" }, ] [[package]] @@ -1845,7 +1823,7 @@ wheels = [ [[package]] name = "numpy" -version = "2.3.3" +version = "2.3.4" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14' and platform_python_implementation != 'PyPy'", @@ -1853,81 +1831,81 @@ resolution-markers = [ "python_full_version >= '3.12' and platform_python_implementation == 'PyPy'", "python_full_version == '3.11.*'", ] -sdist = { url = "https://files.pythonhosted.org/packages/d0/19/95b3d357407220ed24c139018d2518fab0a61a948e68286a25f1a4d049ff/numpy-2.3.3.tar.gz", hash = "sha256:ddc7c39727ba62b80dfdbedf400d1c10ddfa8eefbd7ec8dcb118be8b56d31029", size = 20576648, upload-time = "2025-09-09T16:54:12.543Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/45/e80d203ef6b267aa29b22714fb558930b27960a0c5ce3c19c999232bb3eb/numpy-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ffc4f5caba7dfcbe944ed674b7eef683c7e94874046454bb79ed7ee0236f59d", size = 21259253, upload-time = "2025-09-09T15:56:02.094Z" }, - { url = "https://files.pythonhosted.org/packages/52/18/cf2c648fccf339e59302e00e5f2bc87725a3ce1992f30f3f78c9044d7c43/numpy-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7e946c7170858a0295f79a60214424caac2ffdb0063d4d79cb681f9aa0aa569", size = 14450980, upload-time = "2025-09-09T15:56:05.926Z" }, - { url = "https://files.pythonhosted.org/packages/93/fb/9af1082bec870188c42a1c239839915b74a5099c392389ff04215dcee812/numpy-2.3.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cd4260f64bc794c3390a63bf0728220dd1a68170c169088a1e0dfa2fde1be12f", size = 5379709, upload-time = "2025-09-09T15:56:07.95Z" }, - { url = "https://files.pythonhosted.org/packages/75/0f/bfd7abca52bcbf9a4a65abc83fe18ef01ccdeb37bfb28bbd6ad613447c79/numpy-2.3.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:f0ddb4b96a87b6728df9362135e764eac3cfa674499943ebc44ce96c478ab125", size = 6913923, upload-time = "2025-09-09T15:56:09.443Z" }, - { url = "https://files.pythonhosted.org/packages/79/55/d69adad255e87ab7afda1caf93ca997859092afeb697703e2f010f7c2e55/numpy-2.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:afd07d377f478344ec6ca2b8d4ca08ae8bd44706763d1efb56397de606393f48", size = 14589591, upload-time = "2025-09-09T15:56:11.234Z" }, - { url = "https://files.pythonhosted.org/packages/10/a2/010b0e27ddeacab7839957d7a8f00e91206e0c2c47abbb5f35a2630e5387/numpy-2.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc92a5dedcc53857249ca51ef29f5e5f2f8c513e22cfb90faeb20343b8c6f7a6", size = 16938714, upload-time = "2025-09-09T15:56:14.637Z" }, - { url = "https://files.pythonhosted.org/packages/1c/6b/12ce8ede632c7126eb2762b9e15e18e204b81725b81f35176eac14dc5b82/numpy-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7af05ed4dc19f308e1d9fc759f36f21921eb7bbfc82843eeec6b2a2863a0aefa", size = 16370592, upload-time = "2025-09-09T15:56:17.285Z" }, - { url = "https://files.pythonhosted.org/packages/b4/35/aba8568b2593067bb6a8fe4c52babb23b4c3b9c80e1b49dff03a09925e4a/numpy-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:433bf137e338677cebdd5beac0199ac84712ad9d630b74eceeb759eaa45ddf30", size = 18884474, upload-time = "2025-09-09T15:56:20.943Z" }, - { url = "https://files.pythonhosted.org/packages/45/fa/7f43ba10c77575e8be7b0138d107e4f44ca4a1ef322cd16980ea3e8b8222/numpy-2.3.3-cp311-cp311-win32.whl", hash = "sha256:eb63d443d7b4ffd1e873f8155260d7f58e7e4b095961b01c91062935c2491e57", size = 6599794, upload-time = "2025-09-09T15:56:23.258Z" }, - { url = "https://files.pythonhosted.org/packages/0a/a2/a4f78cb2241fe5664a22a10332f2be886dcdea8784c9f6a01c272da9b426/numpy-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:ec9d249840f6a565f58d8f913bccac2444235025bbb13e9a4681783572ee3caa", size = 13088104, upload-time = "2025-09-09T15:56:25.476Z" }, - { url = "https://files.pythonhosted.org/packages/79/64/e424e975adbd38282ebcd4891661965b78783de893b381cbc4832fb9beb2/numpy-2.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:74c2a948d02f88c11a3c075d9733f1ae67d97c6bdb97f2bb542f980458b257e7", size = 10460772, upload-time = "2025-09-09T15:56:27.679Z" }, - { url = "https://files.pythonhosted.org/packages/51/5d/bb7fc075b762c96329147799e1bcc9176ab07ca6375ea976c475482ad5b3/numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf", size = 20957014, upload-time = "2025-09-09T15:56:29.966Z" }, - { url = "https://files.pythonhosted.org/packages/6b/0e/c6211bb92af26517acd52125a237a92afe9c3124c6a68d3b9f81b62a0568/numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25", size = 14185220, upload-time = "2025-09-09T15:56:32.175Z" }, - { url = "https://files.pythonhosted.org/packages/22/f2/07bb754eb2ede9073f4054f7c0286b0d9d2e23982e090a80d478b26d35ca/numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe", size = 5113918, upload-time = "2025-09-09T15:56:34.175Z" }, - { url = "https://files.pythonhosted.org/packages/81/0a/afa51697e9fb74642f231ea36aca80fa17c8fb89f7a82abd5174023c3960/numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b", size = 6647922, upload-time = "2025-09-09T15:56:36.149Z" }, - { url = "https://files.pythonhosted.org/packages/5d/f5/122d9cdb3f51c520d150fef6e87df9279e33d19a9611a87c0d2cf78a89f4/numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8", size = 14281991, upload-time = "2025-09-09T15:56:40.548Z" }, - { url = "https://files.pythonhosted.org/packages/51/64/7de3c91e821a2debf77c92962ea3fe6ac2bc45d0778c1cbe15d4fce2fd94/numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20", size = 16641643, upload-time = "2025-09-09T15:56:43.343Z" }, - { url = "https://files.pythonhosted.org/packages/30/e4/961a5fa681502cd0d68907818b69f67542695b74e3ceaa513918103b7e80/numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea", size = 16056787, upload-time = "2025-09-09T15:56:46.141Z" }, - { url = "https://files.pythonhosted.org/packages/99/26/92c912b966e47fbbdf2ad556cb17e3a3088e2e1292b9833be1dfa5361a1a/numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7", size = 18579598, upload-time = "2025-09-09T15:56:49.844Z" }, - { url = "https://files.pythonhosted.org/packages/17/b6/fc8f82cb3520768718834f310c37d96380d9dc61bfdaf05fe5c0b7653e01/numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf", size = 6320800, upload-time = "2025-09-09T15:56:52.499Z" }, - { url = "https://files.pythonhosted.org/packages/32/ee/de999f2625b80d043d6d2d628c07d0d5555a677a3cf78fdf868d409b8766/numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb", size = 12786615, upload-time = "2025-09-09T15:56:54.422Z" }, - { url = "https://files.pythonhosted.org/packages/49/6e/b479032f8a43559c383acb20816644f5f91c88f633d9271ee84f3b3a996c/numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5", size = 10195936, upload-time = "2025-09-09T15:56:56.541Z" }, - { url = "https://files.pythonhosted.org/packages/7d/b9/984c2b1ee61a8b803bf63582b4ac4242cf76e2dbd663efeafcb620cc0ccb/numpy-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f5415fb78995644253370985342cd03572ef8620b934da27d77377a2285955bf", size = 20949588, upload-time = "2025-09-09T15:56:59.087Z" }, - { url = "https://files.pythonhosted.org/packages/a6/e4/07970e3bed0b1384d22af1e9912527ecbeb47d3b26e9b6a3bced068b3bea/numpy-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d00de139a3324e26ed5b95870ce63be7ec7352171bc69a4cf1f157a48e3eb6b7", size = 14177802, upload-time = "2025-09-09T15:57:01.73Z" }, - { url = "https://files.pythonhosted.org/packages/35/c7/477a83887f9de61f1203bad89cf208b7c19cc9fef0cebef65d5a1a0619f2/numpy-2.3.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9dc13c6a5829610cc07422bc74d3ac083bd8323f14e2827d992f9e52e22cd6a6", size = 5106537, upload-time = "2025-09-09T15:57:03.765Z" }, - { url = "https://files.pythonhosted.org/packages/52/47/93b953bd5866a6f6986344d045a207d3f1cfbad99db29f534ea9cee5108c/numpy-2.3.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d79715d95f1894771eb4e60fb23f065663b2298f7d22945d66877aadf33d00c7", size = 6640743, upload-time = "2025-09-09T15:57:07.921Z" }, - { url = "https://files.pythonhosted.org/packages/23/83/377f84aaeb800b64c0ef4de58b08769e782edcefa4fea712910b6f0afd3c/numpy-2.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:952cfd0748514ea7c3afc729a0fc639e61655ce4c55ab9acfab14bda4f402b4c", size = 14278881, upload-time = "2025-09-09T15:57:11.349Z" }, - { url = "https://files.pythonhosted.org/packages/9a/a5/bf3db6e66c4b160d6ea10b534c381a1955dfab34cb1017ea93aa33c70ed3/numpy-2.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b83648633d46f77039c29078751f80da65aa64d5622a3cd62aaef9d835b6c93", size = 16636301, upload-time = "2025-09-09T15:57:14.245Z" }, - { url = "https://files.pythonhosted.org/packages/a2/59/1287924242eb4fa3f9b3a2c30400f2e17eb2707020d1c5e3086fe7330717/numpy-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b001bae8cea1c7dfdb2ae2b017ed0a6f2102d7a70059df1e338e307a4c78a8ae", size = 16053645, upload-time = "2025-09-09T15:57:16.534Z" }, - { url = "https://files.pythonhosted.org/packages/e6/93/b3d47ed882027c35e94ac2320c37e452a549f582a5e801f2d34b56973c97/numpy-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e9aced64054739037d42fb84c54dd38b81ee238816c948c8f3ed134665dcd86", size = 18578179, upload-time = "2025-09-09T15:57:18.883Z" }, - { url = "https://files.pythonhosted.org/packages/20/d9/487a2bccbf7cc9d4bfc5f0f197761a5ef27ba870f1e3bbb9afc4bbe3fcc2/numpy-2.3.3-cp313-cp313-win32.whl", hash = "sha256:9591e1221db3f37751e6442850429b3aabf7026d3b05542d102944ca7f00c8a8", size = 6312250, upload-time = "2025-09-09T15:57:21.296Z" }, - { url = "https://files.pythonhosted.org/packages/1b/b5/263ebbbbcede85028f30047eab3d58028d7ebe389d6493fc95ae66c636ab/numpy-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f0dadeb302887f07431910f67a14d57209ed91130be0adea2f9793f1a4f817cf", size = 12783269, upload-time = "2025-09-09T15:57:23.034Z" }, - { url = "https://files.pythonhosted.org/packages/fa/75/67b8ca554bbeaaeb3fac2e8bce46967a5a06544c9108ec0cf5cece559b6c/numpy-2.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:3c7cf302ac6e0b76a64c4aecf1a09e51abd9b01fc7feee80f6c43e3ab1b1dbc5", size = 10195314, upload-time = "2025-09-09T15:57:25.045Z" }, - { url = "https://files.pythonhosted.org/packages/11/d0/0d1ddec56b162042ddfafeeb293bac672de9b0cfd688383590090963720a/numpy-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eda59e44957d272846bb407aad19f89dc6f58fecf3504bd144f4c5cf81a7eacc", size = 21048025, upload-time = "2025-09-09T15:57:27.257Z" }, - { url = "https://files.pythonhosted.org/packages/36/9e/1996ca6b6d00415b6acbdd3c42f7f03ea256e2c3f158f80bd7436a8a19f3/numpy-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:823d04112bc85ef5c4fda73ba24e6096c8f869931405a80aa8b0e604510a26bc", size = 14301053, upload-time = "2025-09-09T15:57:30.077Z" }, - { url = "https://files.pythonhosted.org/packages/05/24/43da09aa764c68694b76e84b3d3f0c44cb7c18cdc1ba80e48b0ac1d2cd39/numpy-2.3.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:40051003e03db4041aa325da2a0971ba41cf65714e65d296397cc0e32de6018b", size = 5229444, upload-time = "2025-09-09T15:57:32.733Z" }, - { url = "https://files.pythonhosted.org/packages/bc/14/50ffb0f22f7218ef8af28dd089f79f68289a7a05a208db9a2c5dcbe123c1/numpy-2.3.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6ee9086235dd6ab7ae75aba5662f582a81ced49f0f1c6de4260a78d8f2d91a19", size = 6738039, upload-time = "2025-09-09T15:57:34.328Z" }, - { url = "https://files.pythonhosted.org/packages/55/52/af46ac0795e09657d45a7f4db961917314377edecf66db0e39fa7ab5c3d3/numpy-2.3.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94fcaa68757c3e2e668ddadeaa86ab05499a70725811e582b6a9858dd472fb30", size = 14352314, upload-time = "2025-09-09T15:57:36.255Z" }, - { url = "https://files.pythonhosted.org/packages/a7/b1/dc226b4c90eb9f07a3fff95c2f0db3268e2e54e5cce97c4ac91518aee71b/numpy-2.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da1a74b90e7483d6ce5244053399a614b1d6b7bc30a60d2f570e5071f8959d3e", size = 16701722, upload-time = "2025-09-09T15:57:38.622Z" }, - { url = "https://files.pythonhosted.org/packages/9d/9d/9d8d358f2eb5eced14dba99f110d83b5cd9a4460895230f3b396ad19a323/numpy-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2990adf06d1ecee3b3dcbb4977dfab6e9f09807598d647f04d385d29e7a3c3d3", size = 16132755, upload-time = "2025-09-09T15:57:41.16Z" }, - { url = "https://files.pythonhosted.org/packages/b6/27/b3922660c45513f9377b3fb42240bec63f203c71416093476ec9aa0719dc/numpy-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ed635ff692483b8e3f0fcaa8e7eb8a75ee71aa6d975388224f70821421800cea", size = 18651560, upload-time = "2025-09-09T15:57:43.459Z" }, - { url = "https://files.pythonhosted.org/packages/5b/8e/3ab61a730bdbbc201bb245a71102aa609f0008b9ed15255500a99cd7f780/numpy-2.3.3-cp313-cp313t-win32.whl", hash = "sha256:a333b4ed33d8dc2b373cc955ca57babc00cd6f9009991d9edc5ddbc1bac36bcd", size = 6442776, upload-time = "2025-09-09T15:57:45.793Z" }, - { url = "https://files.pythonhosted.org/packages/1c/3a/e22b766b11f6030dc2decdeff5c2fb1610768055603f9f3be88b6d192fb2/numpy-2.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4384a169c4d8f97195980815d6fcad04933a7e1ab3b530921c3fef7a1c63426d", size = 12927281, upload-time = "2025-09-09T15:57:47.492Z" }, - { url = "https://files.pythonhosted.org/packages/7b/42/c2e2bc48c5e9b2a83423f99733950fbefd86f165b468a3d85d52b30bf782/numpy-2.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:75370986cc0bc66f4ce5110ad35aae6d182cc4ce6433c40ad151f53690130bf1", size = 10265275, upload-time = "2025-09-09T15:57:49.647Z" }, - { url = "https://files.pythonhosted.org/packages/6b/01/342ad585ad82419b99bcf7cebe99e61da6bedb89e213c5fd71acc467faee/numpy-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cd052f1fa6a78dee696b58a914b7229ecfa41f0a6d96dc663c1220a55e137593", size = 20951527, upload-time = "2025-09-09T15:57:52.006Z" }, - { url = "https://files.pythonhosted.org/packages/ef/d8/204e0d73fc1b7a9ee80ab1fe1983dd33a4d64a4e30a05364b0208e9a241a/numpy-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:414a97499480067d305fcac9716c29cf4d0d76db6ebf0bf3cbce666677f12652", size = 14186159, upload-time = "2025-09-09T15:57:54.407Z" }, - { url = "https://files.pythonhosted.org/packages/22/af/f11c916d08f3a18fb8ba81ab72b5b74a6e42ead4c2846d270eb19845bf74/numpy-2.3.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:50a5fe69f135f88a2be9b6ca0481a68a136f6febe1916e4920e12f1a34e708a7", size = 5114624, upload-time = "2025-09-09T15:57:56.5Z" }, - { url = "https://files.pythonhosted.org/packages/fb/11/0ed919c8381ac9d2ffacd63fd1f0c34d27e99cab650f0eb6f110e6ae4858/numpy-2.3.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:b912f2ed2b67a129e6a601e9d93d4fa37bef67e54cac442a2f588a54afe5c67a", size = 6642627, upload-time = "2025-09-09T15:57:58.206Z" }, - { url = "https://files.pythonhosted.org/packages/ee/83/deb5f77cb0f7ba6cb52b91ed388b47f8f3c2e9930d4665c600408d9b90b9/numpy-2.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9e318ee0596d76d4cb3d78535dc005fa60e5ea348cd131a51e99d0bdbe0b54fe", size = 14296926, upload-time = "2025-09-09T15:58:00.035Z" }, - { url = "https://files.pythonhosted.org/packages/77/cc/70e59dcb84f2b005d4f306310ff0a892518cc0c8000a33d0e6faf7ca8d80/numpy-2.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce020080e4a52426202bdb6f7691c65bb55e49f261f31a8f506c9f6bc7450421", size = 16638958, upload-time = "2025-09-09T15:58:02.738Z" }, - { url = "https://files.pythonhosted.org/packages/b6/5a/b2ab6c18b4257e099587d5b7f903317bd7115333ad8d4ec4874278eafa61/numpy-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e6687dc183aa55dae4a705b35f9c0f8cb178bcaa2f029b241ac5356221d5c021", size = 16071920, upload-time = "2025-09-09T15:58:05.029Z" }, - { url = "https://files.pythonhosted.org/packages/b8/f1/8b3fdc44324a259298520dd82147ff648979bed085feeacc1250ef1656c0/numpy-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d8f3b1080782469fdc1718c4ed1d22549b5fb12af0d57d35e992158a772a37cf", size = 18577076, upload-time = "2025-09-09T15:58:07.745Z" }, - { url = "https://files.pythonhosted.org/packages/f0/a1/b87a284fb15a42e9274e7fcea0dad259d12ddbf07c1595b26883151ca3b4/numpy-2.3.3-cp314-cp314-win32.whl", hash = "sha256:cb248499b0bc3be66ebd6578b83e5acacf1d6cb2a77f2248ce0e40fbec5a76d0", size = 6366952, upload-time = "2025-09-09T15:58:10.096Z" }, - { url = "https://files.pythonhosted.org/packages/70/5f/1816f4d08f3b8f66576d8433a66f8fa35a5acfb3bbd0bf6c31183b003f3d/numpy-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:691808c2b26b0f002a032c73255d0bd89751425f379f7bcd22d140db593a96e8", size = 12919322, upload-time = "2025-09-09T15:58:12.138Z" }, - { url = "https://files.pythonhosted.org/packages/8c/de/072420342e46a8ea41c324a555fa90fcc11637583fb8df722936aed1736d/numpy-2.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:9ad12e976ca7b10f1774b03615a2a4bab8addce37ecc77394d8e986927dc0dfe", size = 10478630, upload-time = "2025-09-09T15:58:14.64Z" }, - { url = "https://files.pythonhosted.org/packages/d5/df/ee2f1c0a9de7347f14da5dd3cd3c3b034d1b8607ccb6883d7dd5c035d631/numpy-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9cc48e09feb11e1db00b320e9d30a4151f7369afb96bd0e48d942d09da3a0d00", size = 21047987, upload-time = "2025-09-09T15:58:16.889Z" }, - { url = "https://files.pythonhosted.org/packages/d6/92/9453bdc5a4e9e69cf4358463f25e8260e2ffc126d52e10038b9077815989/numpy-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:901bf6123879b7f251d3631967fd574690734236075082078e0571977c6a8e6a", size = 14301076, upload-time = "2025-09-09T15:58:20.343Z" }, - { url = "https://files.pythonhosted.org/packages/13/77/1447b9eb500f028bb44253105bd67534af60499588a5149a94f18f2ca917/numpy-2.3.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:7f025652034199c301049296b59fa7d52c7e625017cae4c75d8662e377bf487d", size = 5229491, upload-time = "2025-09-09T15:58:22.481Z" }, - { url = "https://files.pythonhosted.org/packages/3d/f9/d72221b6ca205f9736cb4b2ce3b002f6e45cd67cd6a6d1c8af11a2f0b649/numpy-2.3.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:533ca5f6d325c80b6007d4d7fb1984c303553534191024ec6a524a4c92a5935a", size = 6737913, upload-time = "2025-09-09T15:58:24.569Z" }, - { url = "https://files.pythonhosted.org/packages/3c/5f/d12834711962ad9c46af72f79bb31e73e416ee49d17f4c797f72c96b6ca5/numpy-2.3.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0edd58682a399824633b66885d699d7de982800053acf20be1eaa46d92009c54", size = 14352811, upload-time = "2025-09-09T15:58:26.416Z" }, - { url = "https://files.pythonhosted.org/packages/a1/0d/fdbec6629d97fd1bebed56cd742884e4eead593611bbe1abc3eb40d304b2/numpy-2.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:367ad5d8fbec5d9296d18478804a530f1191e24ab4d75ab408346ae88045d25e", size = 16702689, upload-time = "2025-09-09T15:58:28.831Z" }, - { url = "https://files.pythonhosted.org/packages/9b/09/0a35196dc5575adde1eb97ddfbc3e1687a814f905377621d18ca9bc2b7dd/numpy-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8f6ac61a217437946a1fa48d24c47c91a0c4f725237871117dea264982128097", size = 16133855, upload-time = "2025-09-09T15:58:31.349Z" }, - { url = "https://files.pythonhosted.org/packages/7a/ca/c9de3ea397d576f1b6753eaa906d4cdef1bf97589a6d9825a349b4729cc2/numpy-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:179a42101b845a816d464b6fe9a845dfaf308fdfc7925387195570789bb2c970", size = 18652520, upload-time = "2025-09-09T15:58:33.762Z" }, - { url = "https://files.pythonhosted.org/packages/fd/c2/e5ed830e08cd0196351db55db82f65bc0ab05da6ef2b72a836dcf1936d2f/numpy-2.3.3-cp314-cp314t-win32.whl", hash = "sha256:1250c5d3d2562ec4174bce2e3a1523041595f9b651065e4a4473f5f48a6bc8a5", size = 6515371, upload-time = "2025-09-09T15:58:36.04Z" }, - { url = "https://files.pythonhosted.org/packages/47/c7/b0f6b5b67f6788a0725f744496badbb604d226bf233ba716683ebb47b570/numpy-2.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:b37a0b2e5935409daebe82c1e42274d30d9dd355852529eab91dab8dcca7419f", size = 13112576, upload-time = "2025-09-09T15:58:37.927Z" }, - { url = "https://files.pythonhosted.org/packages/06/b9/33bba5ff6fb679aa0b1f8a07e853f002a6b04b9394db3069a1270a7784ca/numpy-2.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:78c9f6560dc7e6b3990e32df7ea1a50bbd0e2a111e05209963f5ddcab7073b0b", size = 10545953, upload-time = "2025-09-09T15:58:40.576Z" }, - { url = "https://files.pythonhosted.org/packages/b8/f2/7e0a37cfced2644c9563c529f29fa28acbd0960dde32ece683aafa6f4949/numpy-2.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1e02c7159791cd481e1e6d5ddd766b62a4d5acf8df4d4d1afe35ee9c5c33a41e", size = 21131019, upload-time = "2025-09-09T15:58:42.838Z" }, - { url = "https://files.pythonhosted.org/packages/1a/7e/3291f505297ed63831135a6cc0f474da0c868a1f31b0dd9a9f03a7a0d2ed/numpy-2.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:dca2d0fc80b3893ae72197b39f69d55a3cd8b17ea1b50aa4c62de82419936150", size = 14376288, upload-time = "2025-09-09T15:58:45.425Z" }, - { url = "https://files.pythonhosted.org/packages/bf/4b/ae02e985bdeee73d7b5abdefeb98aef1207e96d4c0621ee0cf228ddfac3c/numpy-2.3.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:99683cbe0658f8271b333a1b1b4bb3173750ad59c0c61f5bbdc5b318918fffe3", size = 5305425, upload-time = "2025-09-09T15:58:48.6Z" }, - { url = "https://files.pythonhosted.org/packages/8b/eb/9df215d6d7250db32007941500dc51c48190be25f2401d5b2b564e467247/numpy-2.3.3-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d9d537a39cc9de668e5cd0e25affb17aec17b577c6b3ae8a3d866b479fbe88d0", size = 6819053, upload-time = "2025-09-09T15:58:50.401Z" }, - { url = "https://files.pythonhosted.org/packages/57/62/208293d7d6b2a8998a4a1f23ac758648c3c32182d4ce4346062018362e29/numpy-2.3.3-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8596ba2f8af5f93b01d97563832686d20206d303024777f6dfc2e7c7c3f1850e", size = 14420354, upload-time = "2025-09-09T15:58:52.704Z" }, - { url = "https://files.pythonhosted.org/packages/ed/0c/8e86e0ff7072e14a71b4c6af63175e40d1e7e933ce9b9e9f765a95b4e0c3/numpy-2.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1ec5615b05369925bd1125f27df33f3b6c8bc10d788d5999ecd8769a1fa04db", size = 16760413, upload-time = "2025-09-09T15:58:55.027Z" }, - { url = "https://files.pythonhosted.org/packages/af/11/0cc63f9f321ccf63886ac203336777140011fb669e739da36d8db3c53b98/numpy-2.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2e267c7da5bf7309670523896df97f93f6e469fb931161f483cd6882b3b1a5dc", size = 12971844, upload-time = "2025-09-09T15:58:57.359Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a", size = 20582187, upload-time = "2025-10-15T16:18:11.77Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/e7/0e07379944aa8afb49a556a2b54587b828eb41dc9adc56fb7615b678ca53/numpy-2.3.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e78aecd2800b32e8347ce49316d3eaf04aed849cd5b38e0af39f829a4e59f5eb", size = 21259519, upload-time = "2025-10-15T16:15:19.012Z" }, + { url = "https://files.pythonhosted.org/packages/d0/cb/5a69293561e8819b09e34ed9e873b9a82b5f2ade23dce4c51dc507f6cfe1/numpy-2.3.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd09cc5d65bda1e79432859c40978010622112e9194e581e3415a3eccc7f43f", size = 14452796, upload-time = "2025-10-15T16:15:23.094Z" }, + { url = "https://files.pythonhosted.org/packages/e4/04/ff11611200acd602a1e5129e36cfd25bf01ad8e5cf927baf2e90236eb02e/numpy-2.3.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1b219560ae2c1de48ead517d085bc2d05b9433f8e49d0955c82e8cd37bd7bf36", size = 5381639, upload-time = "2025-10-15T16:15:25.572Z" }, + { url = "https://files.pythonhosted.org/packages/ea/77/e95c757a6fe7a48d28a009267408e8aa382630cc1ad1db7451b3bc21dbb4/numpy-2.3.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:bafa7d87d4c99752d07815ed7a2c0964f8ab311eb8168f41b910bd01d15b6032", size = 6914296, upload-time = "2025-10-15T16:15:27.079Z" }, + { url = "https://files.pythonhosted.org/packages/a3/d2/137c7b6841c942124eae921279e5c41b1c34bab0e6fc60c7348e69afd165/numpy-2.3.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36dc13af226aeab72b7abad501d370d606326a0029b9f435eacb3b8c94b8a8b7", size = 14591904, upload-time = "2025-10-15T16:15:29.044Z" }, + { url = "https://files.pythonhosted.org/packages/bb/32/67e3b0f07b0aba57a078c4ab777a9e8e6bc62f24fb53a2337f75f9691699/numpy-2.3.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7b2f9a18b5ff9824a6af80de4f37f4ec3c2aab05ef08f51c77a093f5b89adda", size = 16939602, upload-time = "2025-10-15T16:15:31.106Z" }, + { url = "https://files.pythonhosted.org/packages/95/22/9639c30e32c93c4cee3ccdb4b09c2d0fbff4dcd06d36b357da06146530fb/numpy-2.3.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9984bd645a8db6ca15d850ff996856d8762c51a2239225288f08f9050ca240a0", size = 16372661, upload-time = "2025-10-15T16:15:33.546Z" }, + { url = "https://files.pythonhosted.org/packages/12/e9/a685079529be2b0156ae0c11b13d6be647743095bb51d46589e95be88086/numpy-2.3.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:64c5825affc76942973a70acf438a8ab618dbd692b84cd5ec40a0a0509edc09a", size = 18884682, upload-time = "2025-10-15T16:15:36.105Z" }, + { url = "https://files.pythonhosted.org/packages/cf/85/f6f00d019b0cc741e64b4e00ce865a57b6bed945d1bbeb1ccadbc647959b/numpy-2.3.4-cp311-cp311-win32.whl", hash = "sha256:ed759bf7a70342f7817d88376eb7142fab9fef8320d6019ef87fae05a99874e1", size = 6570076, upload-time = "2025-10-15T16:15:38.225Z" }, + { url = "https://files.pythonhosted.org/packages/7d/10/f8850982021cb90e2ec31990291f9e830ce7d94eef432b15066e7cbe0bec/numpy-2.3.4-cp311-cp311-win_amd64.whl", hash = "sha256:faba246fb30ea2a526c2e9645f61612341de1a83fb1e0c5edf4ddda5a9c10996", size = 13089358, upload-time = "2025-10-15T16:15:40.404Z" }, + { url = "https://files.pythonhosted.org/packages/d1/ad/afdd8351385edf0b3445f9e24210a9c3971ef4de8fd85155462fc4321d79/numpy-2.3.4-cp311-cp311-win_arm64.whl", hash = "sha256:4c01835e718bcebe80394fd0ac66c07cbb90147ebbdad3dcecd3f25de2ae7e2c", size = 10462292, upload-time = "2025-10-15T16:15:42.896Z" }, + { url = "https://files.pythonhosted.org/packages/96/7a/02420400b736f84317e759291b8edaeee9dc921f72b045475a9cbdb26b17/numpy-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ef1b5a3e808bc40827b5fa2c8196151a4c5abe110e1726949d7abddfe5c7ae11", size = 20957727, upload-time = "2025-10-15T16:15:44.9Z" }, + { url = "https://files.pythonhosted.org/packages/18/90/a014805d627aa5750f6f0e878172afb6454552da929144b3c07fcae1bb13/numpy-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2f91f496a87235c6aaf6d3f3d89b17dba64996abadccb289f48456cff931ca9", size = 14187262, upload-time = "2025-10-15T16:15:47.761Z" }, + { url = "https://files.pythonhosted.org/packages/c7/e4/0a94b09abe89e500dc748e7515f21a13e30c5c3fe3396e6d4ac108c25fca/numpy-2.3.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f77e5b3d3da652b474cc80a14084927a5e86a5eccf54ca8ca5cbd697bf7f2667", size = 5115992, upload-time = "2025-10-15T16:15:50.144Z" }, + { url = "https://files.pythonhosted.org/packages/88/dd/db77c75b055c6157cbd4f9c92c4458daef0dd9cbe6d8d2fe7f803cb64c37/numpy-2.3.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ab1c5f5ee40d6e01cbe96de5863e39b215a4d24e7d007cad56c7184fdf4aeef", size = 6648672, upload-time = "2025-10-15T16:15:52.442Z" }, + { url = "https://files.pythonhosted.org/packages/e1/e6/e31b0d713719610e406c0ea3ae0d90760465b086da8783e2fd835ad59027/numpy-2.3.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77b84453f3adcb994ddbd0d1c5d11db2d6bda1a2b7fd5ac5bd4649d6f5dc682e", size = 14284156, upload-time = "2025-10-15T16:15:54.351Z" }, + { url = "https://files.pythonhosted.org/packages/f9/58/30a85127bfee6f108282107caf8e06a1f0cc997cb6b52cdee699276fcce4/numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4121c5beb58a7f9e6dfdee612cb24f4df5cd4db6e8261d7f4d7450a997a65d6a", size = 16641271, upload-time = "2025-10-15T16:15:56.67Z" }, + { url = "https://files.pythonhosted.org/packages/06/f2/2e06a0f2adf23e3ae29283ad96959267938d0efd20a2e25353b70065bfec/numpy-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65611ecbb00ac9846efe04db15cbe6186f562f6bb7e5e05f077e53a599225d16", size = 16059531, upload-time = "2025-10-15T16:15:59.412Z" }, + { url = "https://files.pythonhosted.org/packages/b0/e7/b106253c7c0d5dc352b9c8fab91afd76a93950998167fa3e5afe4ef3a18f/numpy-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dabc42f9c6577bcc13001b8810d300fe814b4cfbe8a92c873f269484594f9786", size = 18578983, upload-time = "2025-10-15T16:16:01.804Z" }, + { url = "https://files.pythonhosted.org/packages/73/e3/04ecc41e71462276ee867ccbef26a4448638eadecf1bc56772c9ed6d0255/numpy-2.3.4-cp312-cp312-win32.whl", hash = "sha256:a49d797192a8d950ca59ee2d0337a4d804f713bb5c3c50e8db26d49666e351dc", size = 6291380, upload-time = "2025-10-15T16:16:03.938Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a8/566578b10d8d0e9955b1b6cd5db4e9d4592dd0026a941ff7994cedda030a/numpy-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:985f1e46358f06c2a09921e8921e2c98168ed4ae12ccd6e5e87a4f1857923f32", size = 12787999, upload-time = "2025-10-15T16:16:05.801Z" }, + { url = "https://files.pythonhosted.org/packages/58/22/9c903a957d0a8071b607f5b1bff0761d6e608b9a965945411f867d515db1/numpy-2.3.4-cp312-cp312-win_arm64.whl", hash = "sha256:4635239814149e06e2cb9db3dd584b2fa64316c96f10656983b8026a82e6e4db", size = 10197412, upload-time = "2025-10-15T16:16:07.854Z" }, + { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966", size = 20950335, upload-time = "2025-10-15T16:16:10.304Z" }, + { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3", size = 14179878, upload-time = "2025-10-15T16:16:12.595Z" }, + { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197", size = 5108673, upload-time = "2025-10-15T16:16:14.877Z" }, + { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e", size = 6641438, upload-time = "2025-10-15T16:16:16.805Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7", size = 14281290, upload-time = "2025-10-15T16:16:18.764Z" }, + { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953", size = 16636543, upload-time = "2025-10-15T16:16:21.072Z" }, + { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37", size = 16056117, upload-time = "2025-10-15T16:16:23.369Z" }, + { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd", size = 18577788, upload-time = "2025-10-15T16:16:27.496Z" }, + { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646", size = 6282620, upload-time = "2025-10-15T16:16:29.811Z" }, + { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d", size = 12784672, upload-time = "2025-10-15T16:16:31.589Z" }, + { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc", size = 10196702, upload-time = "2025-10-15T16:16:33.902Z" }, + { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879", size = 21049003, upload-time = "2025-10-15T16:16:36.101Z" }, + { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562", size = 14302980, upload-time = "2025-10-15T16:16:39.124Z" }, + { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a", size = 5231472, upload-time = "2025-10-15T16:16:41.168Z" }, + { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6", size = 6739342, upload-time = "2025-10-15T16:16:43.777Z" }, + { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7", size = 14354338, upload-time = "2025-10-15T16:16:46.081Z" }, + { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0", size = 16702392, upload-time = "2025-10-15T16:16:48.455Z" }, + { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f", size = 16134998, upload-time = "2025-10-15T16:16:51.114Z" }, + { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64", size = 18651574, upload-time = "2025-10-15T16:16:53.429Z" }, + { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb", size = 6413135, upload-time = "2025-10-15T16:16:55.992Z" }, + { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c", size = 12928582, upload-time = "2025-10-15T16:16:57.943Z" }, + { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40", size = 10266691, upload-time = "2025-10-15T16:17:00.048Z" }, + { url = "https://files.pythonhosted.org/packages/72/71/ae6170143c115732470ae3a2d01512870dd16e0953f8a6dc89525696069b/numpy-2.3.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81c3e6d8c97295a7360d367f9f8553973651b76907988bb6066376bc2252f24e", size = 20955580, upload-time = "2025-10-15T16:17:02.509Z" }, + { url = "https://files.pythonhosted.org/packages/af/39/4be9222ffd6ca8a30eda033d5f753276a9c3426c397bb137d8e19dedd200/numpy-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7c26b0b2bf58009ed1f38a641f3db4be8d960a417ca96d14e5b06df1506d41ff", size = 14188056, upload-time = "2025-10-15T16:17:04.873Z" }, + { url = "https://files.pythonhosted.org/packages/6c/3d/d85f6700d0a4aa4f9491030e1021c2b2b7421b2b38d01acd16734a2bfdc7/numpy-2.3.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:62b2198c438058a20b6704351b35a1d7db881812d8512d67a69c9de1f18ca05f", size = 5116555, upload-time = "2025-10-15T16:17:07.499Z" }, + { url = "https://files.pythonhosted.org/packages/bf/04/82c1467d86f47eee8a19a464c92f90a9bb68ccf14a54c5224d7031241ffb/numpy-2.3.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:9d729d60f8d53a7361707f4b68a9663c968882dd4f09e0d58c044c8bf5faee7b", size = 6643581, upload-time = "2025-10-15T16:17:09.774Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d3/c79841741b837e293f48bd7db89d0ac7a4f2503b382b78a790ef1dc778a5/numpy-2.3.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd0c630cf256b0a7fd9d0a11c9413b42fef5101219ce6ed5a09624f5a65392c7", size = 14299186, upload-time = "2025-10-15T16:17:11.937Z" }, + { url = "https://files.pythonhosted.org/packages/e8/7e/4a14a769741fbf237eec5a12a2cbc7a4c4e061852b6533bcb9e9a796c908/numpy-2.3.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5e081bc082825f8b139f9e9fe42942cb4054524598aaeb177ff476cc76d09d2", size = 16638601, upload-time = "2025-10-15T16:17:14.391Z" }, + { url = "https://files.pythonhosted.org/packages/93/87/1c1de269f002ff0a41173fe01dcc925f4ecff59264cd8f96cf3b60d12c9b/numpy-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15fb27364ed84114438fff8aaf998c9e19adbeba08c0b75409f8c452a8692c52", size = 16074219, upload-time = "2025-10-15T16:17:17.058Z" }, + { url = "https://files.pythonhosted.org/packages/cd/28/18f72ee77408e40a76d691001ae599e712ca2a47ddd2c4f695b16c65f077/numpy-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:85d9fb2d8cd998c84d13a79a09cc0c1091648e848e4e6249b0ccd7f6b487fa26", size = 18576702, upload-time = "2025-10-15T16:17:19.379Z" }, + { url = "https://files.pythonhosted.org/packages/c3/76/95650169b465ececa8cf4b2e8f6df255d4bf662775e797ade2025cc51ae6/numpy-2.3.4-cp314-cp314-win32.whl", hash = "sha256:e73d63fd04e3a9d6bc187f5455d81abfad05660b212c8804bf3b407e984cd2bc", size = 6337136, upload-time = "2025-10-15T16:17:22.886Z" }, + { url = "https://files.pythonhosted.org/packages/dc/89/a231a5c43ede5d6f77ba4a91e915a87dea4aeea76560ba4d2bf185c683f0/numpy-2.3.4-cp314-cp314-win_amd64.whl", hash = "sha256:3da3491cee49cf16157e70f607c03a217ea6647b1cea4819c4f48e53d49139b9", size = 12920542, upload-time = "2025-10-15T16:17:24.783Z" }, + { url = "https://files.pythonhosted.org/packages/0d/0c/ae9434a888f717c5ed2ff2393b3f344f0ff6f1c793519fa0c540461dc530/numpy-2.3.4-cp314-cp314-win_arm64.whl", hash = "sha256:6d9cd732068e8288dbe2717177320723ccec4fb064123f0caf9bbd90ab5be868", size = 10480213, upload-time = "2025-10-15T16:17:26.935Z" }, + { url = "https://files.pythonhosted.org/packages/83/4b/c4a5f0841f92536f6b9592694a5b5f68c9ab37b775ff342649eadf9055d3/numpy-2.3.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:22758999b256b595cf0b1d102b133bb61866ba5ceecf15f759623b64c020c9ec", size = 21052280, upload-time = "2025-10-15T16:17:29.638Z" }, + { url = "https://files.pythonhosted.org/packages/3e/80/90308845fc93b984d2cc96d83e2324ce8ad1fd6efea81b324cba4b673854/numpy-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9cb177bc55b010b19798dc5497d540dea67fd13a8d9e882b2dae71de0cf09eb3", size = 14302930, upload-time = "2025-10-15T16:17:32.384Z" }, + { url = "https://files.pythonhosted.org/packages/3d/4e/07439f22f2a3b247cec4d63a713faae55e1141a36e77fb212881f7cda3fb/numpy-2.3.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0f2bcc76f1e05e5ab58893407c63d90b2029908fa41f9f1cc51eecce936c3365", size = 5231504, upload-time = "2025-10-15T16:17:34.515Z" }, + { url = "https://files.pythonhosted.org/packages/ab/de/1e11f2547e2fe3d00482b19721855348b94ada8359aef5d40dd57bfae9df/numpy-2.3.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dc20bde86802df2ed8397a08d793da0ad7a5fd4ea3ac85d757bf5dd4ad7c252", size = 6739405, upload-time = "2025-10-15T16:17:36.128Z" }, + { url = "https://files.pythonhosted.org/packages/3b/40/8cd57393a26cebe2e923005db5134a946c62fa56a1087dc7c478f3e30837/numpy-2.3.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e199c087e2aa71c8f9ce1cb7a8e10677dc12457e7cc1be4798632da37c3e86e", size = 14354866, upload-time = "2025-10-15T16:17:38.884Z" }, + { url = "https://files.pythonhosted.org/packages/93/39/5b3510f023f96874ee6fea2e40dfa99313a00bf3ab779f3c92978f34aace/numpy-2.3.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85597b2d25ddf655495e2363fe044b0ae999b75bc4d630dc0d886484b03a5eb0", size = 16703296, upload-time = "2025-10-15T16:17:41.564Z" }, + { url = "https://files.pythonhosted.org/packages/41/0d/19bb163617c8045209c1996c4e427bccbc4bbff1e2c711f39203c8ddbb4a/numpy-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04a69abe45b49c5955923cf2c407843d1c85013b424ae8a560bba16c92fe44a0", size = 16136046, upload-time = "2025-10-15T16:17:43.901Z" }, + { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f", size = 18652691, upload-time = "2025-10-15T16:17:46.247Z" }, + { url = "https://files.pythonhosted.org/packages/f8/73/f85056701dbbbb910c51d846c58d29fd46b30eecd2b6ba760fc8b8a1641b/numpy-2.3.4-cp314-cp314t-win32.whl", hash = "sha256:863e3b5f4d9915aaf1b8ec79ae560ad21f0b8d5e3adc31e73126491bb86dee1d", size = 6485782, upload-time = "2025-10-15T16:17:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/17/90/28fa6f9865181cb817c2471ee65678afa8a7e2a1fb16141473d5fa6bacc3/numpy-2.3.4-cp314-cp314t-win_amd64.whl", hash = "sha256:962064de37b9aef801d33bc579690f8bfe6c5e70e29b61783f60bcba838a14d6", size = 13113301, upload-time = "2025-10-15T16:17:50.938Z" }, + { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29", size = 10547532, upload-time = "2025-10-15T16:17:53.48Z" }, + { url = "https://files.pythonhosted.org/packages/b1/b6/64898f51a86ec88ca1257a59c1d7fd077b60082a119affefcdf1dd0df8ca/numpy-2.3.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6e274603039f924c0fe5cb73438fa9246699c78a6df1bd3decef9ae592ae1c05", size = 21131552, upload-time = "2025-10-15T16:17:55.845Z" }, + { url = "https://files.pythonhosted.org/packages/ce/4c/f135dc6ebe2b6a3c77f4e4838fa63d350f85c99462012306ada1bd4bc460/numpy-2.3.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d149aee5c72176d9ddbc6803aef9c0f6d2ceeea7626574fc68518da5476fa346", size = 14377796, upload-time = "2025-10-15T16:17:58.308Z" }, + { url = "https://files.pythonhosted.org/packages/d0/a4/f33f9c23fcc13dd8412fc8614559b5b797e0aba9d8e01dfa8bae10c84004/numpy-2.3.4-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:6d34ed9db9e6395bb6cd33286035f73a59b058169733a9db9f85e650b88df37e", size = 5306904, upload-time = "2025-10-15T16:18:00.596Z" }, + { url = "https://files.pythonhosted.org/packages/28/af/c44097f25f834360f9fb960fa082863e0bad14a42f36527b2a121abdec56/numpy-2.3.4-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:fdebe771ca06bb8d6abce84e51dca9f7921fe6ad34a0c914541b063e9a68928b", size = 6819682, upload-time = "2025-10-15T16:18:02.32Z" }, + { url = "https://files.pythonhosted.org/packages/c5/8c/cd283b54c3c2b77e188f63e23039844f56b23bba1712318288c13fe86baf/numpy-2.3.4-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e92defe6c08211eb77902253b14fe5b480ebc5112bc741fd5e9cd0608f847", size = 14422300, upload-time = "2025-10-15T16:18:04.271Z" }, + { url = "https://files.pythonhosted.org/packages/b0/f0/8404db5098d92446b3e3695cf41c6f0ecb703d701cb0b7566ee2177f2eee/numpy-2.3.4-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13b9062e4f5c7ee5c7e5be96f29ba71bc5a37fed3d1d77c37390ae00724d296d", size = 16760806, upload-time = "2025-10-15T16:18:06.668Z" }, + { url = "https://files.pythonhosted.org/packages/95/8e/2844c3959ce9a63acc7c8e50881133d86666f0420bcde695e115ced0920f/numpy-2.3.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:81b3a59793523e552c4a96109dde028aa4448ae06ccac5a76ff6532a85558a7f", size = 12973130, upload-time = "2025-10-15T16:18:09.397Z" }, ] [[package]] @@ -1987,104 +1965,100 @@ wheels = [ [[package]] name = "pillow" -version = "11.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069, upload-time = "2025-07-01T09:16:30.666Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/5d/45a3553a253ac8763f3561371432a90bdbe6000fbdcf1397ffe502aa206c/pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860", size = 5316554, upload-time = "2025-07-01T09:13:39.342Z" }, - { url = "https://files.pythonhosted.org/packages/7c/c8/67c12ab069ef586a25a4a79ced553586748fad100c77c0ce59bb4983ac98/pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad", size = 4686548, upload-time = "2025-07-01T09:13:41.835Z" }, - { url = "https://files.pythonhosted.org/packages/2f/bd/6741ebd56263390b382ae4c5de02979af7f8bd9807346d068700dd6d5cf9/pillow-11.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7107195ddc914f656c7fc8e4a5e1c25f32e9236ea3ea860f257b0436011fddd0", size = 5859742, upload-time = "2025-07-03T13:09:47.439Z" }, - { url = "https://files.pythonhosted.org/packages/ca/0b/c412a9e27e1e6a829e6ab6c2dca52dd563efbedf4c9c6aa453d9a9b77359/pillow-11.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc3e831b563b3114baac7ec2ee86819eb03caa1a2cef0b481a5675b59c4fe23b", size = 7633087, upload-time = "2025-07-03T13:09:51.796Z" }, - { url = "https://files.pythonhosted.org/packages/59/9d/9b7076aaf30f5dd17e5e5589b2d2f5a5d7e30ff67a171eb686e4eecc2adf/pillow-11.3.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1f182ebd2303acf8c380a54f615ec883322593320a9b00438eb842c1f37ae50", size = 5963350, upload-time = "2025-07-01T09:13:43.865Z" }, - { url = "https://files.pythonhosted.org/packages/f0/16/1a6bf01fb622fb9cf5c91683823f073f053005c849b1f52ed613afcf8dae/pillow-11.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4445fa62e15936a028672fd48c4c11a66d641d2c05726c7ec1f8ba6a572036ae", size = 6631840, upload-time = "2025-07-01T09:13:46.161Z" }, - { url = "https://files.pythonhosted.org/packages/7b/e6/6ff7077077eb47fde78739e7d570bdcd7c10495666b6afcd23ab56b19a43/pillow-11.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71f511f6b3b91dd543282477be45a033e4845a40278fa8dcdbfdb07109bf18f9", size = 6074005, upload-time = "2025-07-01T09:13:47.829Z" }, - { url = "https://files.pythonhosted.org/packages/c3/3a/b13f36832ea6d279a697231658199e0a03cd87ef12048016bdcc84131601/pillow-11.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:040a5b691b0713e1f6cbe222e0f4f74cd233421e105850ae3b3c0ceda520f42e", size = 6708372, upload-time = "2025-07-01T09:13:52.145Z" }, - { url = "https://files.pythonhosted.org/packages/6c/e4/61b2e1a7528740efbc70b3d581f33937e38e98ef3d50b05007267a55bcb2/pillow-11.3.0-cp310-cp310-win32.whl", hash = "sha256:89bd777bc6624fe4115e9fac3352c79ed60f3bb18651420635f26e643e3dd1f6", size = 6277090, upload-time = "2025-07-01T09:13:53.915Z" }, - { url = "https://files.pythonhosted.org/packages/a9/d3/60c781c83a785d6afbd6a326ed4d759d141de43aa7365725cbcd65ce5e54/pillow-11.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:19d2ff547c75b8e3ff46f4d9ef969a06c30ab2d4263a9e287733aa8b2429ce8f", size = 6985988, upload-time = "2025-07-01T09:13:55.699Z" }, - { url = "https://files.pythonhosted.org/packages/9f/28/4f4a0203165eefb3763939c6789ba31013a2e90adffb456610f30f613850/pillow-11.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:819931d25e57b513242859ce1876c58c59dc31587847bf74cfe06b2e0cb22d2f", size = 2422899, upload-time = "2025-07-01T09:13:57.497Z" }, - { url = "https://files.pythonhosted.org/packages/db/26/77f8ed17ca4ffd60e1dcd220a6ec6d71210ba398cfa33a13a1cd614c5613/pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722", size = 5316531, upload-time = "2025-07-01T09:13:59.203Z" }, - { url = "https://files.pythonhosted.org/packages/cb/39/ee475903197ce709322a17a866892efb560f57900d9af2e55f86db51b0a5/pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288", size = 4686560, upload-time = "2025-07-01T09:14:01.101Z" }, - { url = "https://files.pythonhosted.org/packages/d5/90/442068a160fd179938ba55ec8c97050a612426fae5ec0a764e345839f76d/pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d", size = 5870978, upload-time = "2025-07-03T13:09:55.638Z" }, - { url = "https://files.pythonhosted.org/packages/13/92/dcdd147ab02daf405387f0218dcf792dc6dd5b14d2573d40b4caeef01059/pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494", size = 7641168, upload-time = "2025-07-03T13:10:00.37Z" }, - { url = "https://files.pythonhosted.org/packages/6e/db/839d6ba7fd38b51af641aa904e2960e7a5644d60ec754c046b7d2aee00e5/pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58", size = 5973053, upload-time = "2025-07-01T09:14:04.491Z" }, - { url = "https://files.pythonhosted.org/packages/f2/2f/d7675ecae6c43e9f12aa8d58b6012683b20b6edfbdac7abcb4e6af7a3784/pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f", size = 6640273, upload-time = "2025-07-01T09:14:06.235Z" }, - { url = "https://files.pythonhosted.org/packages/45/ad/931694675ede172e15b2ff03c8144a0ddaea1d87adb72bb07655eaffb654/pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e", size = 6082043, upload-time = "2025-07-01T09:14:07.978Z" }, - { url = "https://files.pythonhosted.org/packages/3a/04/ba8f2b11fc80d2dd462d7abec16351b45ec99cbbaea4387648a44190351a/pillow-11.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:932c754c2d51ad2b2271fd01c3d121daaa35e27efae2a616f77bf164bc0b3e94", size = 6715516, upload-time = "2025-07-01T09:14:10.233Z" }, - { url = "https://files.pythonhosted.org/packages/48/59/8cd06d7f3944cc7d892e8533c56b0acb68399f640786313275faec1e3b6f/pillow-11.3.0-cp311-cp311-win32.whl", hash = "sha256:b4b8f3efc8d530a1544e5962bd6b403d5f7fe8b9e08227c6b255f98ad82b4ba0", size = 6274768, upload-time = "2025-07-01T09:14:11.921Z" }, - { url = "https://files.pythonhosted.org/packages/f1/cc/29c0f5d64ab8eae20f3232da8f8571660aa0ab4b8f1331da5c2f5f9a938e/pillow-11.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1a992e86b0dd7aeb1f053cd506508c0999d710a8f07b4c791c63843fc6a807ac", size = 6986055, upload-time = "2025-07-01T09:14:13.623Z" }, - { url = "https://files.pythonhosted.org/packages/c6/df/90bd886fabd544c25addd63e5ca6932c86f2b701d5da6c7839387a076b4a/pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd", size = 2423079, upload-time = "2025-07-01T09:14:15.268Z" }, - { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800, upload-time = "2025-07-01T09:14:17.648Z" }, - { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296, upload-time = "2025-07-01T09:14:19.828Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726, upload-time = "2025-07-03T13:10:04.448Z" }, - { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652, upload-time = "2025-07-03T13:10:10.391Z" }, - { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787, upload-time = "2025-07-01T09:14:21.63Z" }, - { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236, upload-time = "2025-07-01T09:14:23.321Z" }, - { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950, upload-time = "2025-07-01T09:14:25.237Z" }, - { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d", size = 6723358, upload-time = "2025-07-01T09:14:27.053Z" }, - { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149", size = 6275079, upload-time = "2025-07-01T09:14:30.104Z" }, - { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d", size = 6986324, upload-time = "2025-07-01T09:14:31.899Z" }, - { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542", size = 2423067, upload-time = "2025-07-01T09:14:33.709Z" }, - { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd", size = 2128328, upload-time = "2025-07-01T09:14:35.276Z" }, - { url = "https://files.pythonhosted.org/packages/4b/e8/100c3d114b1a0bf4042f27e0f87d2f25e857e838034e98ca98fe7b8c0a9c/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8", size = 2170652, upload-time = "2025-07-01T09:14:37.203Z" }, - { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f", size = 2227443, upload-time = "2025-07-01T09:14:39.344Z" }, - { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c", size = 5278474, upload-time = "2025-07-01T09:14:41.843Z" }, - { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd", size = 4686038, upload-time = "2025-07-01T09:14:44.008Z" }, - { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e", size = 5864407, upload-time = "2025-07-03T13:10:15.628Z" }, - { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1", size = 7639094, upload-time = "2025-07-03T13:10:21.857Z" }, - { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805", size = 5973503, upload-time = "2025-07-01T09:14:45.698Z" }, - { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8", size = 6642574, upload-time = "2025-07-01T09:14:47.415Z" }, - { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2", size = 6084060, upload-time = "2025-07-01T09:14:49.636Z" }, - { url = "https://files.pythonhosted.org/packages/d5/ea/502d938cbaeec836ac28a9b730193716f0114c41325db428e6b280513f09/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b", size = 6721407, upload-time = "2025-07-01T09:14:51.962Z" }, - { url = "https://files.pythonhosted.org/packages/45/9c/9c5e2a73f125f6cbc59cc7087c8f2d649a7ae453f83bd0362ff7c9e2aee2/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3", size = 6273841, upload-time = "2025-07-01T09:14:54.142Z" }, - { url = "https://files.pythonhosted.org/packages/23/85/397c73524e0cd212067e0c969aa245b01d50183439550d24d9f55781b776/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51", size = 6978450, upload-time = "2025-07-01T09:14:56.436Z" }, - { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580", size = 2423055, upload-time = "2025-07-01T09:14:58.072Z" }, - { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e", size = 5281110, upload-time = "2025-07-01T09:14:59.79Z" }, - { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d", size = 4689547, upload-time = "2025-07-01T09:15:01.648Z" }, - { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced", size = 5901554, upload-time = "2025-07-03T13:10:27.018Z" }, - { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c", size = 7669132, upload-time = "2025-07-03T13:10:33.01Z" }, - { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8", size = 6005001, upload-time = "2025-07-01T09:15:03.365Z" }, - { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59", size = 6668814, upload-time = "2025-07-01T09:15:05.655Z" }, - { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe", size = 6113124, upload-time = "2025-07-01T09:15:07.358Z" }, - { url = "https://files.pythonhosted.org/packages/cd/5a/6fec59b1dfb619234f7636d4157d11fb4e196caeee220232a8d2ec48488d/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c", size = 6747186, upload-time = "2025-07-01T09:15:09.317Z" }, - { url = "https://files.pythonhosted.org/packages/49/6b/00187a044f98255225f172de653941e61da37104a9ea60e4f6887717e2b5/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788", size = 6277546, upload-time = "2025-07-01T09:15:11.311Z" }, - { url = "https://files.pythonhosted.org/packages/e8/5c/6caaba7e261c0d75bab23be79f1d06b5ad2a2ae49f028ccec801b0e853d6/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31", size = 6985102, upload-time = "2025-07-01T09:15:13.164Z" }, - { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e", size = 2424803, upload-time = "2025-07-01T09:15:15.695Z" }, - { url = "https://files.pythonhosted.org/packages/73/f4/04905af42837292ed86cb1b1dabe03dce1edc008ef14c473c5c7e1443c5d/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12", size = 5278520, upload-time = "2025-07-01T09:15:17.429Z" }, - { url = "https://files.pythonhosted.org/packages/41/b0/33d79e377a336247df6348a54e6d2a2b85d644ca202555e3faa0cf811ecc/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a", size = 4686116, upload-time = "2025-07-01T09:15:19.423Z" }, - { url = "https://files.pythonhosted.org/packages/49/2d/ed8bc0ab219ae8768f529597d9509d184fe8a6c4741a6864fea334d25f3f/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632", size = 5864597, upload-time = "2025-07-03T13:10:38.404Z" }, - { url = "https://files.pythonhosted.org/packages/b5/3d/b932bb4225c80b58dfadaca9d42d08d0b7064d2d1791b6a237f87f661834/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673", size = 7638246, upload-time = "2025-07-03T13:10:44.987Z" }, - { url = "https://files.pythonhosted.org/packages/09/b5/0487044b7c096f1b48f0d7ad416472c02e0e4bf6919541b111efd3cae690/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027", size = 5973336, upload-time = "2025-07-01T09:15:21.237Z" }, - { url = "https://files.pythonhosted.org/packages/a8/2d/524f9318f6cbfcc79fbc004801ea6b607ec3f843977652fdee4857a7568b/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77", size = 6642699, upload-time = "2025-07-01T09:15:23.186Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d2/a9a4f280c6aefedce1e8f615baaa5474e0701d86dd6f1dede66726462bbd/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874", size = 6083789, upload-time = "2025-07-01T09:15:25.1Z" }, - { url = "https://files.pythonhosted.org/packages/fe/54/86b0cd9dbb683a9d5e960b66c7379e821a19be4ac5810e2e5a715c09a0c0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a", size = 6720386, upload-time = "2025-07-01T09:15:27.378Z" }, - { url = "https://files.pythonhosted.org/packages/e7/95/88efcaf384c3588e24259c4203b909cbe3e3c2d887af9e938c2022c9dd48/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214", size = 6370911, upload-time = "2025-07-01T09:15:29.294Z" }, - { url = "https://files.pythonhosted.org/packages/2e/cc/934e5820850ec5eb107e7b1a72dd278140731c669f396110ebc326f2a503/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635", size = 7117383, upload-time = "2025-07-01T09:15:31.128Z" }, - { url = "https://files.pythonhosted.org/packages/d6/e9/9c0a616a71da2a5d163aa37405e8aced9a906d574b4a214bede134e731bc/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6", size = 2511385, upload-time = "2025-07-01T09:15:33.328Z" }, - { url = "https://files.pythonhosted.org/packages/1a/33/c88376898aff369658b225262cd4f2659b13e8178e7534df9e6e1fa289f6/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae", size = 5281129, upload-time = "2025-07-01T09:15:35.194Z" }, - { url = "https://files.pythonhosted.org/packages/1f/70/d376247fb36f1844b42910911c83a02d5544ebd2a8bad9efcc0f707ea774/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653", size = 4689580, upload-time = "2025-07-01T09:15:37.114Z" }, - { url = "https://files.pythonhosted.org/packages/eb/1c/537e930496149fbac69efd2fc4329035bbe2e5475b4165439e3be9cb183b/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6", size = 5902860, upload-time = "2025-07-03T13:10:50.248Z" }, - { url = "https://files.pythonhosted.org/packages/bd/57/80f53264954dcefeebcf9dae6e3eb1daea1b488f0be8b8fef12f79a3eb10/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36", size = 7670694, upload-time = "2025-07-03T13:10:56.432Z" }, - { url = "https://files.pythonhosted.org/packages/70/ff/4727d3b71a8578b4587d9c276e90efad2d6fe0335fd76742a6da08132e8c/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b", size = 6005888, upload-time = "2025-07-01T09:15:39.436Z" }, - { url = "https://files.pythonhosted.org/packages/05/ae/716592277934f85d3be51d7256f3636672d7b1abfafdc42cf3f8cbd4b4c8/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477", size = 6670330, upload-time = "2025-07-01T09:15:41.269Z" }, - { url = "https://files.pythonhosted.org/packages/e7/bb/7fe6cddcc8827b01b1a9766f5fdeb7418680744f9082035bdbabecf1d57f/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50", size = 6114089, upload-time = "2025-07-01T09:15:43.13Z" }, - { url = "https://files.pythonhosted.org/packages/8b/f5/06bfaa444c8e80f1a8e4bff98da9c83b37b5be3b1deaa43d27a0db37ef84/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b", size = 6748206, upload-time = "2025-07-01T09:15:44.937Z" }, - { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12", size = 6377370, upload-time = "2025-07-01T09:15:46.673Z" }, - { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db", size = 7121500, upload-time = "2025-07-01T09:15:48.512Z" }, - { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" }, - { url = "https://files.pythonhosted.org/packages/6f/8b/209bd6b62ce8367f47e68a218bffac88888fdf2c9fcf1ecadc6c3ec1ebc7/pillow-11.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3cee80663f29e3843b68199b9d6f4f54bd1d4a6b59bdd91bceefc51238bcb967", size = 5270556, upload-time = "2025-07-01T09:16:09.961Z" }, - { url = "https://files.pythonhosted.org/packages/2e/e6/231a0b76070c2cfd9e260a7a5b504fb72da0a95279410fa7afd99d9751d6/pillow-11.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b5f56c3f344f2ccaf0dd875d3e180f631dc60a51b314295a3e681fe8cf851fbe", size = 4654625, upload-time = "2025-07-01T09:16:11.913Z" }, - { url = "https://files.pythonhosted.org/packages/13/f4/10cf94fda33cb12765f2397fc285fa6d8eb9c29de7f3185165b702fc7386/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e67d793d180c9df62f1f40aee3accca4829d3794c95098887edc18af4b8b780c", size = 4874207, upload-time = "2025-07-03T13:11:10.201Z" }, - { url = "https://files.pythonhosted.org/packages/72/c9/583821097dc691880c92892e8e2d41fe0a5a3d6021f4963371d2f6d57250/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d000f46e2917c705e9fb93a3606ee4a819d1e3aa7a9b442f6444f07e77cf5e25", size = 6583939, upload-time = "2025-07-03T13:11:15.68Z" }, - { url = "https://files.pythonhosted.org/packages/3b/8e/5c9d410f9217b12320efc7c413e72693f48468979a013ad17fd690397b9a/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:527b37216b6ac3a12d7838dc3bd75208ec57c1c6d11ef01902266a5a0c14fc27", size = 4957166, upload-time = "2025-07-01T09:16:13.74Z" }, - { url = "https://files.pythonhosted.org/packages/62/bb/78347dbe13219991877ffb3a91bf09da8317fbfcd4b5f9140aeae020ad71/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be5463ac478b623b9dd3937afd7fb7ab3d79dd290a28e2b6df292dc75063eb8a", size = 5581482, upload-time = "2025-07-01T09:16:16.107Z" }, - { url = "https://files.pythonhosted.org/packages/d9/28/1000353d5e61498aaeaaf7f1e4b49ddb05f2c6575f9d4f9f914a3538b6e1/pillow-11.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8dc70ca24c110503e16918a658b869019126ecfe03109b754c402daff12b3d9f", size = 6984596, upload-time = "2025-07-01T09:16:18.07Z" }, - { url = "https://files.pythonhosted.org/packages/9e/e3/6fa84033758276fb31da12e5fb66ad747ae83b93c67af17f8c6ff4cc8f34/pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6", size = 5270566, upload-time = "2025-07-01T09:16:19.801Z" }, - { url = "https://files.pythonhosted.org/packages/5b/ee/e8d2e1ab4892970b561e1ba96cbd59c0d28cf66737fc44abb2aec3795a4e/pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438", size = 4654618, upload-time = "2025-07-01T09:16:21.818Z" }, - { url = "https://files.pythonhosted.org/packages/f2/6d/17f80f4e1f0761f02160fc433abd4109fa1548dcfdca46cfdadaf9efa565/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3", size = 4874248, upload-time = "2025-07-03T13:11:20.738Z" }, - { url = "https://files.pythonhosted.org/packages/de/5f/c22340acd61cef960130585bbe2120e2fd8434c214802f07e8c03596b17e/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c", size = 6583963, upload-time = "2025-07-03T13:11:26.283Z" }, - { url = "https://files.pythonhosted.org/packages/31/5e/03966aedfbfcbb4d5f8aa042452d3361f325b963ebbadddac05b122e47dd/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361", size = 4957170, upload-time = "2025-07-01T09:16:23.762Z" }, - { url = "https://files.pythonhosted.org/packages/cc/2d/e082982aacc927fc2cab48e1e731bdb1643a1406acace8bed0900a61464e/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7", size = 5581505, upload-time = "2025-07-01T09:16:25.593Z" }, - { url = "https://files.pythonhosted.org/packages/34/e7/ae39f538fd6844e982063c3a5e4598b8ced43b9633baa3a85ef33af8c05c/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8", size = 6984598, upload-time = "2025-07-01T09:16:27.732Z" }, +version = "12.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/08/26e68b6b5da219c2a2cb7b563af008b53bb8e6b6fcb3fa40715fcdb2523a/pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b", size = 5289809, upload-time = "2025-10-15T18:21:27.791Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/4e58fb097fb74c7b4758a680aacd558810a417d1edaa7000142976ef9d2f/pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1", size = 4650606, upload-time = "2025-10-15T18:21:29.823Z" }, + { url = "https://files.pythonhosted.org/packages/4b/e0/1fa492aa9f77b3bc6d471c468e62bfea1823056bf7e5e4f1914d7ab2565e/pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363", size = 6221023, upload-time = "2025-10-15T18:21:31.415Z" }, + { url = "https://files.pythonhosted.org/packages/c1/09/4de7cd03e33734ccd0c876f0251401f1314e819cbfd89a0fcb6e77927cc6/pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca", size = 8024937, upload-time = "2025-10-15T18:21:33.453Z" }, + { url = "https://files.pythonhosted.org/packages/2e/69/0688e7c1390666592876d9d474f5e135abb4acb39dcb583c4dc5490f1aff/pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e", size = 6334139, upload-time = "2025-10-15T18:21:35.395Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1c/880921e98f525b9b44ce747ad1ea8f73fd7e992bafe3ca5e5644bf433dea/pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782", size = 7026074, upload-time = "2025-10-15T18:21:37.219Z" }, + { url = "https://files.pythonhosted.org/packages/28/03/96f718331b19b355610ef4ebdbbde3557c726513030665071fd025745671/pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10", size = 6448852, upload-time = "2025-10-15T18:21:39.168Z" }, + { url = "https://files.pythonhosted.org/packages/3a/a0/6a193b3f0cc9437b122978d2c5cbce59510ccf9a5b48825096ed7472da2f/pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa", size = 7117058, upload-time = "2025-10-15T18:21:40.997Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c4/043192375eaa4463254e8e61f0e2ec9a846b983929a8d0a7122e0a6d6fff/pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275", size = 6295431, upload-time = "2025-10-15T18:21:42.518Z" }, + { url = "https://files.pythonhosted.org/packages/92/c6/c2f2fc7e56301c21827e689bb8b0b465f1b52878b57471a070678c0c33cd/pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d", size = 7000412, upload-time = "2025-10-15T18:21:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d2/5f675067ba82da7a1c238a73b32e3fd78d67f9d9f80fbadd33a40b9c0481/pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7", size = 2435903, upload-time = "2025-10-15T18:21:46.29Z" }, + { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" }, + { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" }, + { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" }, + { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" }, + { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" }, + { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" }, + { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" }, + { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" }, + { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" }, + { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" }, + { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" }, + { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" }, + { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" }, + { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" }, + { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" }, + { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" }, + { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" }, + { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" }, + { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" }, + { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" }, + { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" }, + { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" }, + { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" }, + { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" }, + { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" }, + { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" }, + { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" }, + { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" }, + { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" }, + { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" }, + { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" }, + { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" }, + { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" }, + { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" }, + { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" }, + { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" }, + { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" }, + { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" }, + { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" }, + { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" }, + { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" }, + { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" }, + { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" }, + { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" }, + { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" }, + { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" }, + { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" }, + { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" }, + { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" }, + { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" }, + { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" }, + { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" }, + { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" }, + { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" }, + { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" }, + { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" }, + { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" }, + { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" }, + { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" }, + { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" }, + { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" }, + { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" }, + { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" }, + { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" }, + { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" }, + { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" }, + { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" }, + { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" }, ] [[package]] @@ -2174,7 +2148,7 @@ requires-dist = [ { name = "backports-strenum", marker = "python_full_version < '3.11'", specifier = ">=1.3.0" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34.131" }, { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, - { name = "cocode", marker = "extra == 'dev'", editable = "../cocode" }, + { name = "cocode", marker = "extra == 'dev'", specifier = "==0.3.0" }, { name = "fal-client", marker = "extra == 'fal'", specifier = ">=0.4.1" }, { name = "filetype", specifier = ">=1.2.0" }, { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.2.1" }, @@ -3134,7 +3108,7 @@ version = "3.5.0.20251001" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ac/5b/d903d119f6159e0c9291c53db0e92ea33cb725331f75069d1b45b83b4405/types_networkx-3.5.0.20251001.tar.gz", hash = "sha256:8e3c5c491ba5870d75e175751d70ddeac81df43caf2a64bae161e181f5e8ea7a", size = 71748, upload-time = "2025-10-01T03:04:26.155Z" } wheels = [ From af386232f3aff04ffed287f292938b813e385d36 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 14:11:16 +0200 Subject: [PATCH 100/115] fix --- pipelex/core/stuffs/stuff_factory.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pipelex/core/stuffs/stuff_factory.py b/pipelex/core/stuffs/stuff_factory.py index d7e0f820a..c11ba9d3c 100644 --- a/pipelex/core/stuffs/stuff_factory.py +++ b/pipelex/core/stuffs/stuff_factory.py @@ -8,7 +8,6 @@ from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode -from pipelex.core.domains.domain import SpecialDomain from pipelex.core.stuffs.list_content import ListContent from pipelex.core.stuffs.stuff import Stuff from pipelex.core.stuffs.stuff_content import StuffContent @@ -226,12 +225,9 @@ def make_stuff_from_stuff_content_using_search_domains( if not concept_code: msg = "Stuff content data dict is badly formed: no concept code" raise StuffFactoryError(msg) - domain_and_concept_code = ConceptFactory.make_domain_and_concept_code_from_concept_string_or_code( - domain=SpecialDomain.NATIVE, concept_string_or_code=concept_code - ) content_value = stuff_content_dict["content"] - if NativeConceptCode.get_validated_native_concept_string(concept_string_or_code=domain_and_concept_code.concept_code): - concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode(domain_and_concept_code.concept_code)) + if NativeConceptCode.get_validated_native_concept_string(concept_string_or_code=concept_code): + concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode(concept_code)) content = StuffContentFactory.make_stuff_content_from_concept_with_fallback( concept=concept, value=content_value, From 7b03343e47bec2cdc61c34dc977aef2e3bfea417 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 15:17:44 +0200 Subject: [PATCH 101/115] fix library Manager --- pipelex/builder/builder.py | 2 +- pipelex/core/domains/domain_library.py | 2 +- .../core/domains/domain_library_abstract.py | 2 - pipelex/core/pipes/pipe_library.py | 1 - pipelex/hub.py | 1 - pipelex/libraries/library.py | 55 ++++-- pipelex/libraries/library_manager.py | 156 ++++-------------- pipelex/libraries/library_manager_abstract.py | 61 +------ pipelex/pipelex.py | 16 +- 9 files changed, 93 insertions(+), 203 deletions(-) diff --git a/pipelex/builder/builder.py b/pipelex/builder/builder.py index bca9d8647..4ee6f4bb7 100644 --- a/pipelex/builder/builder.py +++ b/pipelex/builder/builder.py @@ -248,7 +248,7 @@ async def validate_bundle_spec(pipelex_bundle_spec: PipelexBundleSpec): raise PipelexBundleError(message=pipe_spec_error.message, pipe_failures=pipe_failures) from pipe_spec_error try: - pipes = library_manager.load_from_blueprint(blueprint=pipelex_bundle_blueprint) + pipes = library_manager.get_library().load_from_blueprints(blueprints=[pipelex_bundle_blueprint]) dry_run_result = await dry_run_pipes(pipes=pipes, raise_on_failure=True) except StaticValidationError as static_validation_error: static_validation_error_data = StaticValidationErrorData( diff --git a/pipelex/core/domains/domain_library.py b/pipelex/core/domains/domain_library.py index 5996acc2b..828e793f2 100644 --- a/pipelex/core/domains/domain_library.py +++ b/pipelex/core/domains/domain_library.py @@ -51,4 +51,4 @@ def get_required_domain(self, domain: str) -> Domain: if not the_domain: msg = f"Domain '{domain}' not found. Check for typos and make sure it is declared in a pipeline library." raise DomainLibraryError(msg) - return the_domain \ No newline at end of file + return the_domain diff --git a/pipelex/core/domains/domain_library_abstract.py b/pipelex/core/domains/domain_library_abstract.py index 271d7ae79..4102b4605 100644 --- a/pipelex/core/domains/domain_library_abstract.py +++ b/pipelex/core/domains/domain_library_abstract.py @@ -7,12 +7,10 @@ class DomainLibraryAbstract(ABC): @abstractmethod def get_domain(self, domain: str) -> Domain | None: """Get a domain by code from this library.""" - pass @abstractmethod def get_required_domain(self, domain: str) -> Domain: """Get a domain by code from this library, raising an error if not found.""" - pass @abstractmethod def teardown(self) -> None: diff --git a/pipelex/core/pipes/pipe_library.py b/pipelex/core/pipes/pipe_library.py index f970b9490..4cfd0f7a4 100644 --- a/pipelex/core/pipes/pipe_library.py +++ b/pipelex/core/pipes/pipe_library.py @@ -86,7 +86,6 @@ def remove_pipes_by_codes(self, pipe_codes: list[str]) -> None: if pipe_code in self.root: del self.root[pipe_code] - @override def pretty_list_pipes(self) -> None: def _format_concept_code(concept_code: str | None, current_domain: str) -> str: diff --git a/pipelex/hub.py b/pipelex/hub.py index 807a7c21f..8946bd7cf 100644 --- a/pipelex/hub.py +++ b/pipelex/hub.py @@ -24,7 +24,6 @@ from pipelex.observer.observer_protocol import ObserverProtocol from pipelex.pipe_run.pipe_router_protocol import PipeRouterProtocol from pipelex.pipeline.activity.activity_manager_protocol import ActivityManagerProtocol -from pipelex.pipeline.pipeline import Pipeline from pipelex.pipeline.pipeline_manager_abstract import PipelineManagerAbstract from pipelex.pipeline.track.pipeline_tracker_protocol import PipelineTrackerProtocol from pipelex.plugins.plugin_manager import PluginManager diff --git a/pipelex/libraries/library.py b/pipelex/libraries/library.py index 51327e25e..b47b83f61 100644 --- a/pipelex/libraries/library.py +++ b/pipelex/libraries/library.py @@ -15,10 +15,10 @@ class Library(BaseModel): """A Library bundles together domain, concept, and pipe libraries for a specific context. - + This represents a complete set of Pipelex definitions (domains, concepts, pipes) that can be loaded and used together, typically for a single pipeline run. - + Each Library (except BASE) inherits native concepts and base pipes from the BASE library. """ @@ -29,14 +29,10 @@ class Library(BaseModel): @classmethod def make_empty(cls) -> "Library": """Create an empty library with initialized concept library (includes native concepts). - + This should only be used for the BASE library. """ - return cls( - domain_library=DomainLibrary.make_empty(), - concept_library=ConceptLibrary.make_empty(), - pipe_library=PipeLibrary.make_empty(), - ) + return cls.make_base() @classmethod def make_base(cls) -> "Library": @@ -48,16 +44,25 @@ def make_base(cls) -> "Library": # 2 - Pipe library, add the builder pipes pipe_library = PipeLibrary.make_empty() - + # 3 - Domain library, add the domains domain_library = DomainLibrary.make_empty() - + return cls( domain_library=domain_library, concept_library=concept_library, pipe_library=pipe_library, ) + def get_domain_library(self) -> DomainLibrary: + return self.domain_library + + def get_concept_library(self) -> ConceptLibrary: + return self.concept_library + + def get_pipe_library(self) -> PipeLibrary: + return self.pipe_library + def teardown(self) -> None: """Teardown all libraries in this bundle.""" self.pipe_library.teardown() @@ -72,35 +77,35 @@ def validate_with_libraries(self) -> None: def load_from_blueprints(self, blueprints: list[PipelexBundleBlueprint]) -> list[PipeAbstract]: """Load domains, concepts, and pipes from a list of blueprints. - + Args: blueprints: List of parsed PLX blueprints to load - + Returns: List of all pipes that were loaded """ all_pipes: list[PipeAbstract] = [] - + # Load all domains first all_domains: list[Domain] = [] for blueprint in blueprints: domain = self._load_domain_from_blueprint(blueprint) all_domains.append(domain) self.domain_library.add_domains(domains=all_domains) - + # Load all concepts second all_concepts: list[Concept] = [] for blueprint in blueprints: concepts = self._load_concepts_from_blueprint(blueprint) all_concepts.extend(concepts) self.concept_library.add_concepts(concepts=all_concepts) - + # Load all pipes third for blueprint in blueprints: pipes = self._load_pipes_from_blueprint(blueprint) all_pipes.extend(pipes) self.pipe_library.add_pipes(pipes=all_pipes) - + return all_pipes def _load_domain_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> Domain: @@ -146,3 +151,21 @@ def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[ pipes.append(pipe) return pipes + def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> None: + if blueprint.pipe is not None: + self.pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) + + # Remove concepts (they may depend on domain) + if blueprint.concept is not None: + from pipelex.core.concepts.concept_factory import ConceptFactory + + concept_codes_to_remove = [ + ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) + for concept_code in blueprint.concept + ] + self.concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) + + self.domain_library.remove_domain_by_code(domain_code=blueprint.domain) + + def validate_library(self): + self.validate_with_libraries() diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 3a600ac18..0124b4124 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -6,26 +6,20 @@ from pipelex import log from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint -from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.domains.domain import Domain from pipelex.core.domains.domain_library import DomainLibrary from pipelex.core.interpreter import PipelexInterpreter -from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.core.stuffs.structured_content import StructuredContent from pipelex.core.validation import report_validation_error from pipelex.exceptions import ( ConceptDefinitionError, ConceptLibraryError, - ConceptLoadingError, DomainDefinitionError, - DomainLoadingError, LibraryError, LibraryLoadingError, PipeDefinitionError, PipeLibraryError, - PipeLoadingError, ) from pipelex.libraries.library import Library from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract @@ -36,6 +30,7 @@ ) from pipelex.system.configuration.config_loader import config_manager from pipelex.system.registries.class_registry_utils import ClassRegistryUtils +from pipelex.system.registries.func_registry import func_registry from pipelex.system.registries.func_registry_utils import FuncRegistryUtils from pipelex.types import StrEnum @@ -55,11 +50,12 @@ def error_class(self) -> type[LibraryError]: class SpecialLibraryId(StrEnum): """Special library identifiers. - - BASE: The base/default library containing native concepts and builder pipes. - All new libraries inherit (copy) the contents of the BASE library when created. + + UNTITLED: The untitled/default library """ - BASE = "base" + + UNTITLED = "untitled" + class LibraryManager(LibraryManagerAbstract): allowed_root_attributes: ClassVar[list[str]] = [ @@ -71,8 +67,8 @@ class LibraryManager(LibraryManagerAbstract): ] def __init__(self): - # BASE library is the fallback library for all others - self._libraries: dict[str, Library] = {SpecialLibraryId.BASE: Library.make_empty()} + # UNTITLED library is the fallback library for all others + self._libraries: dict[str, Library] = {SpecialLibraryId.UNTITLED: Library.make_empty()} ############################################################ # Manager lifecycle @@ -81,7 +77,7 @@ def __init__(self): @override def setup(self) -> None: self._libraries.clear() - self._libraries[SpecialLibraryId.BASE] = Library.make_empty() + self.create_library(library_id=SpecialLibraryId.UNTITLED) @override def teardown(self) -> None: @@ -94,87 +90,53 @@ def reset(self) -> None: self.teardown() self.setup() + @override + def create_library(self, library_id: str): + if library_id in self._libraries: + msg = f"Library '{library_id}' already exists" + raise LibraryError(msg) + self._libraries[library_id] = Library.make_empty() + @override def open_library(self, library_id: str) -> None: """Open a new library with the given library_id. - + The new library will inherit native concepts and base pipes from the BASE library. """ if library_id in self._libraries: msg = f"Library '{library_id}' already exists" raise LibraryError(msg) - - # Create a new library that inherits from BASE - base_library = Library.make_base_library() - self._libraries[library_id] = base_library - @override - def close_library(self, library_id: str) -> None: - """Close and cleanup a library with the given library_id.""" - if library_id not in self._libraries: - msg = f"Trying to close a library that does not exist: '{library_id}'" - raise LibraryError(msg) - self._libraries[library_id].teardown() - self._libraries.pop(library_id) + # Create a new library that inherits from UNTITLED + base_library = Library.make_base() + self.create_library(library_id=library_id) + self.set_library(library_id=library_id, library=base_library) ############################################################ # Public library accessors ############################################################ + @override + def set_library(self, library_id: str, library: Library) -> None: + if library_id not in self._libraries: + msg = f"Library '{library_id}' does not exist" + raise LibraryError(msg) + self._libraries[library_id] = library + @override def get_library(self, library_id: str | None = None) -> Library: """Get the Library object for a specific library_id.""" if library_id is None: - library_id = SpecialLibraryId.BASE + library_id = SpecialLibraryId.UNTITLED if library_id not in self._libraries: msg = f"Trying to get a library that does not exist: '{library_id}'" raise LibraryError(msg) return self._libraries[library_id] - @override - def get_domain_library(self, library_id: str | None = None) -> DomainLibrary: - """Get the domain library for a specific library_id.""" - return self.get_library(library_id).domain_library - - @override - def get_concept_library(self, library_id: str | None = None) -> ConceptLibrary: - """Get the concept library for a specific library_id.""" - return self.get_library(library_id).concept_library - - @override - def get_pipe_library(self, library_id: str | None = None) -> PipeLibrary: - """Get the pipe library for a specific library_id.""" - return self.get_library(library_id).pipe_library - - @override - def get_required_domain(self, domain: str, library_id: str | None = None) -> Domain: - """Get a required domain from the specified library.""" - return self.get_library(library_id).domain_library.get_required_domain(domain=domain) - - @override - def get_required_concept(self, concept_string: str, library_id: str | None = None) -> Concept: - """Get a required concept from the specified library.""" - return self.get_library(library_id).concept_library.get_required_concept(concept_string=concept_string) - - @override - def get_required_pipe(self, pipe_code: str, library_id: str | None = None) -> PipeAbstract: - """Get a required pipe from the specified library.""" - return self.get_library(library_id).pipe_library.get_required_pipe(pipe_code=pipe_code) - ############################################################ # Private methods ############################################################ - ############################################################ - # LibraryManagerAbstract - ############################################################ - - @override - def validate_libraries(self, library_id: str | None = None): - log.debug("LibraryManager validating libraries") - library = self.get_library(library_id) - library.validate_with_libraries() - def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: """Get all valid Pipelex PLX files from the given directories.""" all_plx_paths: list[Path] = [] @@ -209,46 +171,6 @@ def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: return all_plx_paths - @override - def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> list[PipeAbstract]: - """Load a single blueprint into the specified library.""" - library = self.get_library(library_id) - - try: - return library.load_from_blueprints([blueprint]) - except DomainDefinitionError as exc: - msg = f"Could not load domain from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {exc}" - raise DomainLoadingError(message=msg, domain_code=exc.domain_code, description=exc.description, source=exc.source) from exc - except ConceptDefinitionError as exc: - msg = f"Could not load concepts from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {exc}" - raise ConceptLoadingError( - message=msg, concept_definition_error=exc, concept_code=exc.concept_code, description=exc.description, source=exc.source - ) from exc - except PipeDefinitionError as exc: - msg = f"Could not load pipes from PLX blueprint at '{blueprint.source}', domain code: '{blueprint.domain}': {exc}" - raise PipeLoadingError( - message=msg, pipe_definition_error=exc, pipe_code=exc.pipe_code or "", description=exc.description or "", source=exc.source - ) from exc - - @override - def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> None: - library = self.get_library(library_id) - - if blueprint.pipe is not None: - library.pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) - - # Remove concepts (they may depend on domain) - if blueprint.concept is not None: - from pipelex.core.concepts.concept_factory import ConceptFactory - concept_codes_to_remove = [ - ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) - for concept_code in blueprint.concept - ] - library.concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) - - library.domain_library.remove_domain_by_code(domain_code=blueprint.domain) - - def _import_pipelex_modules_directly(self) -> None: """Import pipelex modules to register @pipe_func decorated functions. @@ -269,19 +191,13 @@ def load_libraries( library_file_paths: list[Path] | None = None, ) -> None: if library_id is None: - library_id = SpecialLibraryId.BASE + library_id = SpecialLibraryId.UNTITLED # Ensure libraries exist for this library_id if library_id not in self._libraries: - if library_id == SpecialLibraryId.BASE: - # Auto-setup for BASE if not already done - self.setup() - else: - msg = f"Library '{library_id}' does not exist. Call open_library() first." - raise LibraryError(msg) + msg = f"Trying to load a library that does not exist: '{library_id}'" + raise LibraryError(msg) - library = self.get_library(library_id) - # Collect directories to scan (user project directories) user_dirs: set[Path] = set() if library_dirs: @@ -334,8 +250,6 @@ def load_libraries( self._import_pipelex_modules_directly() # Verify critical functions were registered - from pipelex.system.registries.func_registry import func_registry # noqa: PLC0415 - intentional local import - critical_functions = ["create_concept_spec", "assemble_pipelex_bundle_spec"] for func_name in critical_functions: if func_registry.has_function(func_name): @@ -356,7 +270,9 @@ def load_libraries( ) # Auto-discover and register all StructuredContent classes from sys.modules - num_registered = ClassRegistryUtils.auto_register_all_subclasses(base_class=StructuredContent) + num_registered = ClassRegistryUtils.auto_register_all_subclasses( + base_class=StructuredContent, + ) log.debug(f"Auto-registered {num_registered} StructuredContent classes from loaded modules") # Parse all blueprints @@ -379,7 +295,7 @@ def load_libraries( # Load all blueprints into the library try: - library.load_from_blueprints(blueprints) + self.get_library(library_id=library_id).load_from_blueprints(blueprints=blueprints) except DomainDefinitionError as domain_def_error: msg = f"Could not load domains from blueprints: {domain_def_error}" raise LibraryLoadingError(msg) from domain_def_error diff --git a/pipelex/libraries/library_manager_abstract.py b/pipelex/libraries/library_manager_abstract.py index 2cdfa8307..8e417abd7 100644 --- a/pipelex/libraries/library_manager_abstract.py +++ b/pipelex/libraries/library_manager_abstract.py @@ -2,12 +2,8 @@ from pathlib import Path from typing import TYPE_CHECKING -from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint -from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract -from pipelex.core.domains.domain import Domain from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract -from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract if TYPE_CHECKING: @@ -28,53 +24,20 @@ def reset(self) -> None: pass @abstractmethod - def open_library(self, library_id: str) -> None: - """Open a new library with the given library_id.""" - pass - - @abstractmethod - def close_library(self, library_id: str) -> None: - """Close and cleanup a library with the given library_id.""" - pass - - @abstractmethod - def get_library(self, library_id: str | None = None) -> "Library": - """Get the Library object for a specific library_id.""" - pass - - @abstractmethod - def get_domain_library(self, library_id: str | None = None) -> DomainLibraryAbstract: - """Get the domain library for a specific library_id.""" - pass - - @abstractmethod - def get_concept_library(self, library_id: str | None = None) -> ConceptLibraryAbstract: - """Get the concept library for a specific library_id.""" - pass - - @abstractmethod - def get_pipe_library(self, library_id: str | None = None) -> PipeLibraryAbstract: - """Get the pipe library for a specific library_id.""" - pass + def create_library(self, library_id: str) -> None: + """Create a new library with the given library_id.""" @abstractmethod - def get_required_domain(self, domain: str, library_id: str | None = None) -> Domain: - """Get a required domain from the specified library.""" - pass + def set_library(self, library_id: str, library: "Library") -> None: + """Set the Library object for a specific library_id.""" @abstractmethod - def get_required_concept(self, concept_string: str, library_id: str | None = None) -> Concept: - """Get a required concept from the specified library.""" - pass - - @abstractmethod - def get_required_pipe(self, pipe_code: str, library_id: str | None = None) -> PipeAbstract: - """Get a required pipe from the specified library.""" - pass + def open_library(self, library_id: str) -> None: + """Open a new library with the given library_id.""" @abstractmethod - def validate_libraries(self, library_id: str | None = None) -> None: - pass + def get_library(self, library_id: str | None = None) -> "Library": + """Get the Library object for a specific library_id.""" @abstractmethod def load_libraries( @@ -84,11 +47,3 @@ def load_libraries( library_file_paths: list[Path] | None = None, ) -> None: pass - - @abstractmethod - def load_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> list[PipeAbstract]: - pass - - @abstractmethod - def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint, library_id: str | None = None) -> None: - pass diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index 4e9a54e26..993c6dfdb 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -254,14 +254,14 @@ def setup_libraries(self): self.pipelex_hub.set_pipe_library(pipe_library=self.library_manager.get_pipe_library()) log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}") - def validate_libraries(self): - try: - self.library_manager.validate_libraries() - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not validate libraries because of: {validation_error_msg}" - raise PipelexSetupError(msg) from validation_error - log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}") + # def validate_libraries(self): + # try: + # self.library_manager.validate_libraries() + # except ValidationError as validation_error: + # validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) + # msg = f"Could not validate libraries because of: {validation_error_msg}" + # raise PipelexSetupError(msg) from validation_error + # log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done for {get_config().project_name}") def teardown(self): # pipelex From 1c14c3c6556f7ed13d8ade60553b5ec3e0165c74 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 15:24:36 +0200 Subject: [PATCH 102/115] fix UT --- pipelex/core/stuffs/stuff_factory.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelex/core/stuffs/stuff_factory.py b/pipelex/core/stuffs/stuff_factory.py index c11ba9d3c..c495e7090 100644 --- a/pipelex/core/stuffs/stuff_factory.py +++ b/pipelex/core/stuffs/stuff_factory.py @@ -221,13 +221,13 @@ def make_stuff_from_stuff_content_using_search_domains( else: stuff_content_dict: dict[str, Any] = stuff_content_or_data try: - concept_code = stuff_content_dict.get("concept") or stuff_content_dict.get("concept_code") - if not concept_code: + concept_string = stuff_content_dict.get("concept") or stuff_content_dict.get("concept_code") + if not concept_string: msg = "Stuff content data dict is badly formed: no concept code" raise StuffFactoryError(msg) content_value = stuff_content_dict["content"] - if NativeConceptCode.get_validated_native_concept_string(concept_string_or_code=concept_code): - concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode(concept_code)) + if NativeConceptCode.get_validated_native_concept_string(concept_string_or_code=concept_string): + concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode(concept_string.split(".")[-1])) content = StuffContentFactory.make_stuff_content_from_concept_with_fallback( concept=concept, value=content_value, @@ -243,7 +243,7 @@ def make_stuff_from_stuff_content_using_search_domains( raise StuffFactoryError(msg) from exc concept_library = get_concept_library() - concept = concept_library.get_required_concept(concept_string=concept_code) + concept = concept_library.get_required_concept(concept_string=concept_string) if isinstance(content_value, StuffContent): return StuffFactory.make_stuff( From 0b5133b4e868ca6cdebb171db1b27ce2f82889df Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 16:18:50 +0200 Subject: [PATCH 103/115] fix libraries --- pipelex/hub.py | 2 +- pipelex/libraries/__init__.py | 3 +- pipelex/libraries/library.py | 75 +++++++++- pipelex/libraries/library_ids.py | 12 ++ pipelex/libraries/library_manager.py | 130 ++++-------------- pipelex/libraries/library_manager_abstract.py | 4 - pipelex/libraries/library_utils.py | 37 ++++- 7 files changed, 148 insertions(+), 115 deletions(-) create mode 100644 pipelex/libraries/library_ids.py diff --git a/pipelex/hub.py b/pipelex/hub.py index 8946bd7cf..94a78f8c4 100644 --- a/pipelex/hub.py +++ b/pipelex/hub.py @@ -19,7 +19,7 @@ from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract -from pipelex.libraries.library_manager import SpecialLibraryId +from pipelex.libraries.library_ids import SpecialLibraryId from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.observer.observer_protocol import ObserverProtocol from pipelex.pipe_run.pipe_router_protocol import PipeRouterProtocol diff --git a/pipelex/libraries/__init__.py b/pipelex/libraries/__init__.py index 3484bf75f..7f8c6dff2 100644 --- a/pipelex/libraries/__init__.py +++ b/pipelex/libraries/__init__.py @@ -1,5 +1,6 @@ from pipelex.libraries.library import Library -from pipelex.libraries.library_manager import LibraryManager, SpecialLibraryId +from pipelex.libraries.library_ids import SpecialLibraryId +from pipelex.libraries.library_manager import LibraryManager from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract __all__ = [ diff --git a/pipelex/libraries/library.py b/pipelex/libraries/library.py index b47b83f61..bb7f02cb3 100644 --- a/pipelex/libraries/library.py +++ b/pipelex/libraries/library.py @@ -1,4 +1,6 @@ -from pydantic import BaseModel, Field +from pathlib import Path + +from pydantic import BaseModel, Field, ValidationError from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint from pipelex.core.concepts.concept import Concept @@ -8,9 +10,17 @@ from pipelex.core.domains.domain_blueprint import DomainBlueprint from pipelex.core.domains.domain_factory import DomainFactory from pipelex.core.domains.domain_library import DomainLibrary +from pipelex.core.interpreter import PipelexInterpreter from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_factory import PipeFactory from pipelex.core.pipes.pipe_library import PipeLibrary +from pipelex.core.validation import report_validation_error +from pipelex.exceptions import ( + ConceptDefinitionError, + DomainDefinitionError, + LibraryLoadingError, + PipeDefinitionError, +) class Library(BaseModel): @@ -48,12 +58,22 @@ def make_base(cls) -> "Library": # 3 - Domain library, add the domains domain_library = DomainLibrary.make_empty() - return cls( + library = cls( domain_library=domain_library, concept_library=concept_library, pipe_library=pipe_library, ) + library.load_from_plx_files( + plx_file_paths=[ + Path("pipelex/builder/builder.plx"), + Path("pipelex/builder/pipe/pipe_design.plx"), + Path("pipelex/builder/concept/concept.plx"), + ] + ) + + return library + def get_domain_library(self) -> DomainLibrary: return self.domain_library @@ -169,3 +189,54 @@ def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> None: def validate_library(self): self.validate_with_libraries() + + ############################################################ + # Library loading from sources + ############################################################ + + def load_from_plx_files(self, plx_file_paths: list[Path]) -> None: + """Load library from a list of PLX file paths. + + This method: + 1. Parses blueprints from PLX files + 2. Loads blueprints into the library + + Note: Module imports and registry loading should be done by the LibraryManager + before calling this method. + + Args: + plx_file_paths: List of PLX file paths to load. + """ + blueprints: list[PipelexBundleBlueprint] = [] + for plx_file_path in plx_file_paths: + try: + blueprint = PipelexInterpreter(file_path=plx_file_path).make_pipelex_bundle_blueprint() + except FileNotFoundError as file_not_found_error: + msg = f"Could not find PLX blueprint at '{plx_file_path}'" + raise LibraryLoadingError(msg) from file_not_found_error + except PipeDefinitionError as pipe_def_error: + msg = f"Could not load PLX blueprint from '{plx_file_path}': {pipe_def_error}" + raise LibraryLoadingError(msg) from pipe_def_error + except ValidationError as validation_error: + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) + msg = f"Could not load PLX blueprint from '{plx_file_path}' because of: {validation_error_msg}" + raise LibraryLoadingError(msg) from validation_error + blueprint.source = str(plx_file_path) + blueprints.append(blueprint) + + # Load all blueprints into the library + try: + self.load_from_blueprints(blueprints=blueprints) + except DomainDefinitionError as domain_def_error: + msg = f"Could not load domains from blueprints: {domain_def_error}" + raise LibraryLoadingError(msg) from domain_def_error + except ConceptDefinitionError as concept_def_error: + msg = f"Could not load concepts from blueprints: {concept_def_error}" + raise LibraryLoadingError(msg) from concept_def_error + except PipeDefinitionError as pipe_def_error: + msg = f"Could not load pipes from blueprints: {pipe_def_error}" + raise LibraryLoadingError(msg) from pipe_def_error + except ValidationError as validation_error: + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) + msg = f"Could not load blueprints because of: {validation_error_msg}" + raise LibraryLoadingError(msg) from validation_error diff --git a/pipelex/libraries/library_ids.py b/pipelex/libraries/library_ids.py new file mode 100644 index 000000000..9355ae4f8 --- /dev/null +++ b/pipelex/libraries/library_ids.py @@ -0,0 +1,12 @@ +"""Library identifiers and enumerations.""" + +from pipelex.types import StrEnum + + +class SpecialLibraryId(StrEnum): + """Special library identifiers. + + UNTITLED: The untitled/default library + """ + + UNTITLED = "untitled" diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 0124b4124..1a78de754 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -1,31 +1,21 @@ from pathlib import Path from typing import ClassVar -from pydantic import ValidationError from typing_extensions import override from pipelex import log -from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint -from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.domains.domain_library import DomainLibrary -from pipelex.core.interpreter import PipelexInterpreter -from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.core.stuffs.structured_content import StructuredContent -from pipelex.core.validation import report_validation_error from pipelex.exceptions import ( - ConceptDefinitionError, ConceptLibraryError, - DomainDefinitionError, LibraryError, - LibraryLoadingError, - PipeDefinitionError, PipeLibraryError, ) from pipelex.libraries.library import Library +from pipelex.libraries.library_ids import SpecialLibraryId from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.libraries.library_utils import ( - find_plx_files_in_dir, get_pipelex_package_dir_for_imports, + get_pipelex_plx_files_from_dirs, get_pipelex_plx_files_from_package, ) from pipelex.system.configuration.config_loader import config_manager @@ -48,15 +38,6 @@ def error_class(self) -> type[LibraryError]: return PipeLibraryError -class SpecialLibraryId(StrEnum): - """Special library identifiers. - - UNTITLED: The untitled/default library - """ - - UNTITLED = "untitled" - - class LibraryManager(LibraryManagerAbstract): allowed_root_attributes: ClassVar[list[str]] = [ "domain", @@ -137,52 +118,6 @@ def get_library(self, library_id: str | None = None) -> Library: # Private methods ############################################################ - def _get_pipelex_plx_files_from_dirs(self, dirs: set[Path]) -> list[Path]: - """Get all valid Pipelex PLX files from the given directories.""" - all_plx_paths: list[Path] = [] - seen_files: set[str] = set() # Track by absolute path to avoid duplicates - - for dir_path in dirs: - if not dir_path.exists(): - log.debug(f"Directory does not exist, skipping: {dir_path}") - continue - - # Find all .plx files in the directory, excluding problematic directories - plx_files = find_plx_files_in_dir( - dir_path=str(dir_path), - pattern="*.plx", - is_recursive=True, - ) - - # Filter to only include valid Pipelex files - for plx_file in plx_files: - absolute_path = str(plx_file.resolve()) - - # Skip if already seen - if absolute_path in seen_files: - log.debug(f"Skipping duplicate PLX file: {plx_file}") - continue - - if PipelexInterpreter.is_pipelex_file(plx_file): - all_plx_paths.append(plx_file) - seen_files.add(absolute_path) - else: - log.debug(f"Skipping non-Pipelex PLX file: {plx_file}") - - return all_plx_paths - - def _import_pipelex_modules_directly(self) -> None: - """Import pipelex modules to register @pipe_func decorated functions. - - This ensures critical pipelex functions are registered regardless of how pipelex - is installed (wheel, source, relative path, etc.). - """ - import pipelex.builder # noqa: PLC0415 - intentional local import - - log.verbose("Registering @pipe_func functions from pipelex.builder") - functions_count = FuncRegistryUtils.register_pipe_funcs_from_package("pipelex.builder", pipelex.builder) - log.verbose(f"Registered {functions_count} @pipe_func functions from pipelex.builder") - @override def load_libraries( self, @@ -205,15 +140,15 @@ def load_libraries( else: user_dirs.add(Path(config_manager.local_root_dir)) + # Get PLX file paths valid_plx_paths: list[Path] if library_file_paths: valid_plx_paths = library_file_paths else: # Get PLX files from user directories - user_plx_paths: list[Path] = self._get_pipelex_plx_files_from_dirs(user_dirs) + user_plx_paths: list[Path] = get_pipelex_plx_files_from_dirs(user_dirs) - # Get PLX files from pipelex package using importlib.resources - # This works reliably in all installation modes (wheel, source, relative) + # Get PLX files from pipelex package pipelex_plx_paths: list[Path] = get_pipelex_plx_files_from_package() # Combine and deduplicate @@ -231,7 +166,7 @@ def load_libraries( valid_plx_paths.append(plx_path) seen_absolute_paths.add(absolute_path) - # Import modules to load them into sys.modules (but don't register classes yet) + # Import modules and register in global registries # Import from user directories for library_dir in user_dirs: # Only import files that contain StructuredContent subclasses (uses AST pre-check) @@ -250,6 +185,7 @@ def load_libraries( self._import_pipelex_modules_directly() # Verify critical functions were registered + # TODO: This should be a Unit test critical_functions = ["create_concept_spec", "assemble_pipelex_bundle_spec"] for func_name in critical_functions: if func_registry.has_function(func_name): @@ -275,37 +211,21 @@ def load_libraries( ) log.debug(f"Auto-registered {num_registered} StructuredContent classes from loaded modules") - # Parse all blueprints - blueprints: list[PipelexBundleBlueprint] = [] - for plx_file_path in valid_plx_paths: - try: - blueprint = PipelexInterpreter(file_path=plx_file_path).make_pipelex_bundle_blueprint() - except FileNotFoundError as file_not_found_error: - msg = f"Could not find PLX blueprint at '{plx_file_path}'" - raise LibraryLoadingError(msg) from file_not_found_error - except PipeDefinitionError as pipe_def_error: - msg = f"Could not load PLX blueprint from '{plx_file_path}': {pipe_def_error}" - raise LibraryLoadingError(msg) from pipe_def_error - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not load PLX blueprint from '{plx_file_path}' because of: {validation_error_msg}" - raise LibraryLoadingError(msg) from validation_error - blueprint.source = str(plx_file_path) - blueprints.append(blueprint) - - # Load all blueprints into the library - try: - self.get_library(library_id=library_id).load_from_blueprints(blueprints=blueprints) - except DomainDefinitionError as domain_def_error: - msg = f"Could not load domains from blueprints: {domain_def_error}" - raise LibraryLoadingError(msg) from domain_def_error - except ConceptDefinitionError as concept_def_error: - msg = f"Could not load concepts from blueprints: {concept_def_error}" - raise LibraryLoadingError(msg) from concept_def_error - except PipeDefinitionError as pipe_def_error: - msg = f"Could not load pipes from blueprints: {pipe_def_error}" - raise LibraryLoadingError(msg) from pipe_def_error - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not load blueprints because of: {validation_error_msg}" - raise LibraryLoadingError(msg) from validation_error + # Delegate to the Library instance to load blueprints + self.get_library(library_id=library_id).load_from_plx_files(plx_file_paths=valid_plx_paths) + + ############################################################ + # Private helper methods + ############################################################ + + def _import_pipelex_modules_directly(self) -> None: + """Import pipelex modules to register @pipe_func decorated functions. + + This ensures critical pipelex functions are registered regardless of how pipelex + is installed (wheel, source, relative path, etc.). + """ + import pipelex.builder # noqa: PLC0415 - intentional local import + + log.verbose("Registering @pipe_func functions from pipelex.builder") + functions_count = FuncRegistryUtils.register_pipe_funcs_from_package("pipelex.builder", pipelex.builder) + log.verbose(f"Registered {functions_count} @pipe_func functions from pipelex.builder") diff --git a/pipelex/libraries/library_manager_abstract.py b/pipelex/libraries/library_manager_abstract.py index 8e417abd7..ca965669e 100644 --- a/pipelex/libraries/library_manager_abstract.py +++ b/pipelex/libraries/library_manager_abstract.py @@ -2,10 +2,6 @@ from pathlib import Path from typing import TYPE_CHECKING -from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract -from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract -from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract - if TYPE_CHECKING: from pipelex.libraries.library import Library diff --git a/pipelex/libraries/library_utils.py b/pipelex/libraries/library_utils.py index 40c58a6c2..8eccbf0e9 100644 --- a/pipelex/libraries/library_utils.py +++ b/pipelex/libraries/library_utils.py @@ -1,5 +1,3 @@ -"""Utility functions for library management.""" - from importlib.abc import Traversable from importlib.resources import files from pathlib import Path @@ -89,3 +87,38 @@ def find_plx_files_in_dir(dir_path: str, pattern: str, is_recursive: bool) -> li filtered_files.append(file_path) return filtered_files + + +def get_pipelex_plx_files_from_dirs(dirs: set[Path]) -> list[Path]: + """Get all valid Pipelex PLX files from the given directories.""" + all_plx_paths: list[Path] = [] + seen_files: set[str] = set() # Track by absolute path to avoid duplicates + + for dir_path in dirs: + if not dir_path.exists(): + log.debug(f"Directory does not exist, skipping: {dir_path}") + continue + + # Find all .plx files in the directory, excluding problematic directories + plx_files = find_plx_files_in_dir( + dir_path=str(dir_path), + pattern="*.plx", + is_recursive=True, + ) + + # Filter to only include valid Pipelex files + for plx_file in plx_files: + absolute_path = str(plx_file.resolve()) + + # Skip if already seen + if absolute_path in seen_files: + log.debug(f"Skipping duplicate PLX file: {plx_file}") + continue + + if PipelexInterpreter.is_pipelex_file(plx_file): + all_plx_paths.append(plx_file) + seen_files.add(absolute_path) + else: + log.debug(f"Skipping non-Pipelex PLX file: {plx_file}") + + return all_plx_paths From c94a4355d88f635a4c00a53d0f176f331f82506e Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 16:48:21 +0200 Subject: [PATCH 104/115] fix --- pipelex/core/concepts/concept.py | 31 +++-- pipelex/tools/codegen/runner_generator.py | 25 ++-- .../concepts/test_concept_compact_memory.py | 44 +++++-- tests/unit/tools/test_runner_generator.py | 111 ++++++++++++++++++ 4 files changed, 186 insertions(+), 25 deletions(-) create mode 100644 tests/unit/tools/test_runner_generator.py diff --git a/pipelex/core/concepts/concept.py b/pipelex/core/concepts/concept.py index ae2d73ba0..f3ce38e1d 100644 --- a/pipelex/core/concepts/concept.py +++ b/pipelex/core/concepts/concept.py @@ -137,28 +137,45 @@ def get_compact_memory_example(self, var_name: str) -> dict[str, Any] | str | in # Generate the content based on structure content_example = self._generate_content_example_for_class(structure_class, var_name) - # For simple native concepts - return compact format - if self.structure_class_name == "TextContent": + # Check if this is actually a native concept (not just using a native structure class) + is_native = Concept.is_native_concept(self) + + # For simple native concepts ONLY - return compact format + if is_native and self.structure_class_name == "TextContent": return cast("str", content_example) # Just a string - elif self.structure_class_name == "ImageContent": + elif is_native and self.structure_class_name == "ImageContent": # Return dict with class instantiation info return { "_class": "ImageContent", "url": cast("str", content_example), } - elif self.structure_class_name == "PDFContent": + elif is_native and self.structure_class_name == "PDFContent": # Return dict with class instantiation info return { "_class": "PDFContent", "url": cast("str", content_example), } - elif self.structure_class_name == "NumberContent": + elif is_native and self.structure_class_name == "NumberContent": return cast("int", content_example) # Just a number - # For complex concepts, wrap with concept_code + # For refined or complex concepts, wrap with concept_code + # For Image/PDF content, wrap in the _class format + if self.structure_class_name == "ImageContent": + content_wrapped = { + "_class": "ImageContent", + "url": cast("str", content_example), + } + elif self.structure_class_name == "PDFContent": + content_wrapped = { + "_class": "PDFContent", + "url": cast("str", content_example), + } + else: + content_wrapped = content_example + return { "concept_code": self.concept_string, - "content": content_example, + "content": content_wrapped, } @classmethod diff --git a/pipelex/tools/codegen/runner_generator.py b/pipelex/tools/codegen/runner_generator.py index b4c07261a..610ff0390 100644 --- a/pipelex/tools/codegen/runner_generator.py +++ b/pipelex/tools/codegen/runner_generator.py @@ -6,7 +6,7 @@ from pipelex.core.pipes.pipe_abstract import PipeAbstract -def _value_to_python_code(value: Any, indent_level: int = 0) -> str: +def value_to_python_code(value: Any, indent_level: int = 0) -> str: """Convert a value to Python code representation recursively. Args: @@ -25,6 +25,17 @@ def _value_to_python_code(value: Any, indent_level: int = 0) -> str: url = value.get("url", "your_url") # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType, reportUnknownVariableType] return f'{class_name}(url="{url}")' return str(value) # pyright: ignore[reportUnknownArgumentType] + elif isinstance(value, dict) and "concept_code" in value and "content" in value: + # Special handling for refined concepts with explicit concept_code + # Format: {"concept": "domain.ConceptCode", "content": ContentClass(...)} + concept_code = value["concept_code"] # pyright: ignore[reportUnknownVariableType] + content = value["content"] # pyright: ignore[reportUnknownVariableType] + + # Generate the content part + content_code = value_to_python_code(content, indent_level + 1) + + # Return the full format with concept and content + return f'{{\n{indent} "concept": "{concept_code}",\n{indent} "content": {content_code},\n{indent}}}' elif isinstance(value, str): # String value - add quotes return f'"{value}"' @@ -38,17 +49,17 @@ def _value_to_python_code(value: Any, indent_level: int = 0) -> str: # List - recursively convert items if not value: return "[]" - items: list[str] = [_value_to_python_code(item, indent_level + 1) for item in value] # pyright: ignore[reportUnknownVariableType] + items: list[str] = [value_to_python_code(item, indent_level + 1) for item in value] # pyright: ignore[reportUnknownVariableType] return "[" + ", ".join(items) + "]" elif isinstance(value, dict): # Dict - recursively convert with proper formatting if not value: return "{}" - lines: list[str] = [] + lines_dict: list[str] = [] for key, val in value.items(): # pyright: ignore[reportUnknownVariableType] - val_code = _value_to_python_code(val, indent_level + 1) - lines.append(f'{indent} "{key}": {val_code}') - return "{\n" + ",\n".join(lines) + f"\n{indent}}}" + val_code = value_to_python_code(val, indent_level + 1) + lines_dict.append(f'{indent} "{key}": {val_code}') + return "{\n" + ",\n".join(lines_dict) + f"\n{indent}}}" else: # Fallback - use repr return repr(value) @@ -59,7 +70,7 @@ def generate_compact_memory_entry(var_name: str, concept: Concept) -> str: example_value = concept.get_compact_memory_example(var_name) # Convert the example value to a Python code string - value_str = _value_to_python_code(example_value, indent_level=3) + value_str = value_to_python_code(example_value, indent_level=3) return f' "{var_name}": {value_str},' diff --git a/tests/unit/core/concepts/test_concept_compact_memory.py b/tests/unit/core/concepts/test_concept_compact_memory.py index a5edb15ad..c947e7b1b 100644 --- a/tests/unit/core/concepts/test_concept_compact_memory.py +++ b/tests/unit/core/concepts/test_concept_compact_memory.py @@ -2,7 +2,6 @@ from __future__ import annotations -from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode @@ -87,19 +86,42 @@ def test_get_compact_memory_example_custom_structured(self) -> None: def test_get_compact_memory_example_for_refined_text_concept(self) -> None: """Test compact memory example for a concept that refines Text.""" - # Create a concept that refines Text - blueprint = ConceptBlueprint( + # Create a concept that refines Text using direct make() to control domain + concept = ConceptFactory.make( + domain="test_domain", + concept_code="Question", description="A question", + structure_class_name="TextContent", refines="native.Text", ) - concept = ConceptFactory.make_from_blueprint( - domain="test_domain", - concept_code="Question", - blueprint=blueprint, + # Test - should return full format with concept_code since it's a refined (non-native) concept + # Even though it refines Text, it's not the native Text concept itself + result = concept.get_compact_memory_example("question") + assert isinstance(result, dict) + assert "concept_code" in result + assert result["concept_code"] == "test_domain.Question" + assert "content" in result + assert result["content"] == "question_text" # Content is still the simple text string + + def test_get_compact_memory_example_for_refined_image_concept(self) -> None: + """Test compact memory example for a concept that refines Image (non-native).""" + # Create a concept that refines Image using direct make() to control domain + concept = ConceptFactory.make( + domain="tables", + concept_code="TableScreenshot", + description="A screenshot of a table", + structure_class_name="ImageContent", + refines="native.Image", ) - # Test - should return a simple string since it uses TextContent - result = concept.get_compact_memory_example("question") - assert isinstance(result, str) - assert result == "question_text" + # Test - should return full format with concept_code since it's a refined (non-native) concept + result = concept.get_compact_memory_example("table_screenshot") + assert isinstance(result, dict) + assert "concept_code" in result + assert result["concept_code"] == "tables.TableScreenshot" + assert "content" in result + # Content should be wrapped in ImageContent format + assert isinstance(result["content"], dict) + assert result["content"]["_class"] == "ImageContent" + assert result["content"]["url"] == "table_screenshot_url" diff --git a/tests/unit/tools/test_runner_generator.py b/tests/unit/tools/test_runner_generator.py new file mode 100644 index 000000000..354656bcf --- /dev/null +++ b/tests/unit/tools/test_runner_generator.py @@ -0,0 +1,111 @@ +"""Unit tests for runner_generator module.""" + +from __future__ import annotations + +from pipelex.core.concepts.concept_factory import ConceptFactory +from pipelex.core.concepts.concept_native import NativeConceptCode +from pipelex.tools.codegen.runner_generator import generate_compact_memory_entry, value_to_python_code + + +class TestValueToPythonCode: + """Test value_to_python_code function.""" + + def test_simple_string(self) -> None: + """Test conversion of a simple string.""" + result = value_to_python_code("hello") + assert result == '"hello"' + + def test_simple_number(self) -> None: + """Test conversion of a number.""" + result = value_to_python_code(42) + assert result == "42" + + def test_simple_boolean(self) -> None: + """Test conversion of a boolean.""" + result = value_to_python_code(True) + assert result == "True" + + def test_dict_with_class_image_content(self) -> None: + """Test conversion of ImageContent dict.""" + value = {"_class": "ImageContent", "url": "test_url"} + result = value_to_python_code(value) + assert result == 'ImageContent(url="test_url")' + + def test_dict_with_class_pdf_content(self) -> None: + """Test conversion of PDFContent dict.""" + value = {"_class": "PDFContent", "url": "test_url"} + result = value_to_python_code(value) + assert result == 'PDFContent(url="test_url")' + + def test_dict_with_concept_code_and_content_simple(self) -> None: + """Test conversion of refined concept with simple content.""" + value = { + "concept_code": "test_domain.Question", + "content": "question_text", + } + result = value_to_python_code(value, indent_level=3) + expected = '{\n "concept": "test_domain.Question",\n "content": "question_text",\n }' + assert result == expected + + def test_dict_with_concept_code_and_content_image(self) -> None: + """Test conversion of refined Image concept.""" + value = { + "concept_code": "tables.TableScreenshot", + "content": {"_class": "ImageContent", "url": "table_screenshot_url"}, + } + result = value_to_python_code(value, indent_level=3) + expected = ( + "{\n" + ' "concept": "tables.TableScreenshot",\n' + ' "content": ImageContent(url="table_screenshot_url"),\n' + " }" + ) + assert result == expected + + +class TestGenerateCompactMemoryEntry: + """Test generate_compact_memory_entry function.""" + + def test_generate_entry_for_native_text(self) -> None: + """Test generating entry for native Text concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.TEXT) + result = generate_compact_memory_entry("message", concept) + assert result == ' "message": "message_text",' + + def test_generate_entry_for_native_image(self) -> None: + """Test generating entry for native Image concept.""" + concept = ConceptFactory.make_native_concept(NativeConceptCode.IMAGE) + result = generate_compact_memory_entry("photo", concept) + assert result == ' "photo": ImageContent(url="photo_url"),' + + def test_generate_entry_for_refined_image(self) -> None: + """Test generating entry for a concept that refines Image.""" + concept = ConceptFactory.make( + domain="tables", + concept_code="TableScreenshot", + description="A screenshot of a table", + structure_class_name="ImageContent", + refines="native.Image", + ) + result = generate_compact_memory_entry("table_screenshot", concept) + + # Should generate the full format with concept and content + assert '"concept": "tables.TableScreenshot"' in result + assert 'ImageContent(url="table_screenshot_url")' in result + assert "table_screenshot" in result + + def test_generate_entry_for_refined_text(self) -> None: + """Test generating entry for a concept that refines Text.""" + concept = ConceptFactory.make( + domain="test_domain", + concept_code="Question", + description="A question", + structure_class_name="TextContent", + refines="native.Text", + ) + result = generate_compact_memory_entry("question", concept) + + # Should generate the full format with concept and content + assert '"concept": "test_domain.Question"' in result + assert '"question_text"' in result + assert "question" in result From fe7eabca91743cc11eb8d31c3c8eb8f3361ea488 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 16:59:00 +0200 Subject: [PATCH 105/115] fix cli --- pipelex/cli/commands/run_cmd.py | 84 +++++++-------------------------- 1 file changed, 17 insertions(+), 67 deletions(-) diff --git a/pipelex/cli/commands/run_cmd.py b/pipelex/cli/commands/run_cmd.py index 573ae825a..0aa49db5a 100644 --- a/pipelex/cli/commands/run_cmd.py +++ b/pipelex/cli/commands/run_cmd.py @@ -1,7 +1,6 @@ from __future__ import annotations import asyncio -import subprocess from typing import Annotated import typer @@ -20,8 +19,23 @@ run_app = typer.Typer(help="Run pipelines and generate runner files", no_args_is_help=True) -def do_generate_runner(pipe_code: str, output_path: str | None, execute: bool, lint: bool) -> None: - """Generate a Python runner file for the given pipe.""" +@run_app.command("prepare") +def prepare_runner_cmd( + pipe_code: Annotated[str, typer.Argument(help="The pipe code to prepare a runner for")], + output_path: Annotated[ + str | None, + typer.Option("--output_path", "-o", help="Path to save the generated Python file"), + ] = None, +) -> None: + """Prepare a Python runner file for a pipe. + + The generated file will include: + - All necessary imports + - Example input values based on the pipe's input types + + Native concept types (Text, Image, PDF, etc.) will be automatically handled. + Custom concept types will have their structure recursively generated. + """ # Initialize Pipelex Pipelex.make() @@ -52,70 +66,6 @@ def do_generate_runner(pipe_code: str, output_path: str | None, execute: bool, l typer.echo(typer.style(f"❌ Error saving file: {e}", fg=typer.colors.RED)) raise typer.Exit(1) from e - # Lint the file if requested - if lint: - typer.echo("\n🔍 Running linter...") - result = subprocess.run( # noqa: S603 - ["ruff", "check", output_path], # noqa: S607 - check=False, - capture_output=True, - text=True, - ) - if result.returncode == 0: - typer.echo(typer.style("✅ Linting passed", fg=typer.colors.GREEN)) - else: - typer.echo(typer.style("⚠️ Linting found issues:", fg=typer.colors.YELLOW)) - typer.echo(result.stdout) - typer.echo(result.stderr) - - # Execute the file if requested (with warning) - if execute: - typer.echo("\n⚠️ Note: Execution may fail if input values need to be filled in") - typer.echo("🚀 Executing generated file...") - result = subprocess.run( # noqa: S603 - ["python", output_path], # noqa: S607 - check=False, - capture_output=True, - text=True, - ) - if result.returncode == 0: - typer.echo(typer.style("✅ Execution successful:", fg=typer.colors.GREEN)) - typer.echo(result.stdout) - else: - typer.echo(typer.style("❌ Execution failed:", fg=typer.colors.RED)) - typer.echo(result.stdout) - typer.echo(result.stderr) - - -@run_app.command("prepare") -def prepare_runner_cmd( - pipe_code: Annotated[str, typer.Argument(help="The pipe code to prepare a runner for")], - output: Annotated[ - str | None, - typer.Option("--output", "-o", help="Path to save the generated Python file"), - ] = None, - execute: Annotated[ - bool, - typer.Option("--execute", "-e", help="Execute the generated file after creation"), - ] = False, - lint: Annotated[ - bool, - typer.Option("--lint", "-l", help="Run linter on the generated file"), - ] = False, -) -> None: - """Prepare a Python runner file for a pipe. - - The generated file will include: - - All necessary imports - - Example input values based on the pipe's input types - - A function to run the pipeline - - Code to execute the pipeline - - Native concept types (Text, Image, PDF, etc.) will be automatically handled. - Custom concept types will have their structure recursively generated. - """ - do_generate_runner(pipe_code=pipe_code, output_path=output, execute=execute, lint=lint) - def run_cmd( target: Annotated[ From 788f76d2da2014bf98442374dbe6c93f61b779e7 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 17:00:42 +0200 Subject: [PATCH 106/115] update badge --- .badges/tests.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.badges/tests.json b/.badges/tests.json index f748d9646..e921a4aae 100644 --- a/.badges/tests.json +++ b/.badges/tests.json @@ -1,7 +1,7 @@ { "schemaVersion": 1, "label": "tests", - "message": "1253", + "message": "1272", "color": "blue", "cacheSeconds": 300 } \ No newline at end of file From 72a3ad69773fa2fbc9cf65a12f93bcb783204a73 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 17:19:42 +0200 Subject: [PATCH 107/115] fix cli --- pipelex/cli/_cli.py | 4 +-- pipelex/cli/commands/__init__.py | 4 +-- pipelex/cli/commands/build_cmd.py | 51 ++++++++++++++++++++++++++++- pipelex/cli/commands/run_cmd.py | 54 +------------------------------ 4 files changed, 55 insertions(+), 58 deletions(-) diff --git a/pipelex/cli/_cli.py b/pipelex/cli/_cli.py index f0ab7a041..ce224dfcd 100644 --- a/pipelex/cli/_cli.py +++ b/pipelex/cli/_cli.py @@ -3,7 +3,7 @@ from typer.core import TyperGroup from typing_extensions import override -from pipelex.cli.commands import init_app, run_app, show_app, validate_app +from pipelex.cli.commands import init_app, run_cmd, show_app, validate_app from pipelex.cli.commands.build_cmd import build_app from pipelex.cli.commands.kit_cmd import kit_app @@ -37,4 +37,4 @@ def main() -> None: app.add_typer(show_app, name="show", help="Show and list commands") app.add_typer(build_app, name="build", help="Build artifacts like pipeline blueprints") app.add_typer(kit_app, name="kit", help="Manage kit assets") -app.add_typer(run_app, name="run", help="Run pipelines and prepare runner files") +app.command(name="run", help="Execute a pipeline")(run_cmd) diff --git a/pipelex/cli/commands/__init__.py b/pipelex/cli/commands/__init__.py index bf8a79d19..e92b3228a 100644 --- a/pipelex/cli/commands/__init__.py +++ b/pipelex/cli/commands/__init__.py @@ -4,8 +4,8 @@ """ from pipelex.cli.commands.init_cmd import init_app -from pipelex.cli.commands.run_cmd import run_app +from pipelex.cli.commands.run_cmd import run_cmd from pipelex.cli.commands.show_cmd import show_app from pipelex.cli.commands.validate_cmd import validate_app -__all__ = ["init_app", "run_app", "show_app", "validate_app"] +__all__ = ["init_app", "run_cmd", "show_app", "validate_app"] diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index 32574e198..8a1f2f6a3 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -7,10 +7,11 @@ from pipelex import pretty_print from pipelex.builder.builder import PipelexBundleSpec from pipelex.builder.builder_loop import BuilderLoop -from pipelex.hub import get_report_delegate +from pipelex.hub import get_report_delegate, get_required_pipe from pipelex.language.plx_factory import PlxFactory from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline +from pipelex.tools.codegen.runner_generator import generate_runner_code from pipelex.tools.misc.file_utils import ensure_directory_for_file_path, save_text_to_path from pipelex.tools.misc.json_utils import save_as_json_to_path @@ -181,3 +182,51 @@ async def run_pipeline(): typer.echo(typer.style(f"\n✅ Pipeline built in {end_time - start_time:.2f} seconds", fg=typer.colors.GREEN)) get_report_delegate().generate_report() + + +@build_app.command("prepare", help="Prepare a Python runner file for a pipe") +def prepare_runner_cmd( + pipe_code: Annotated[str, typer.Argument(help="The pipe code to prepare a runner for")], + output_path: Annotated[ + str | None, + typer.Option("--output", "-o", help="Path to save the generated Python file"), + ] = None, +) -> None: + """Prepare a Python runner file for a pipe. + + The generated file will include: + - All necessary imports + - Example input values based on the pipe's input types + + Native concept types (Text, Image, PDF, etc.) will be automatically handled. + Custom concept types will have their structure recursively generated. + """ + # Initialize Pipelex + Pipelex.make() + + # Get the pipe + try: + pipe = get_required_pipe(pipe_code=pipe_code) + except Exception as e: + typer.echo(typer.style(f"❌ Error: Could not find pipe '{pipe_code}': {e}", fg=typer.colors.RED)) + raise typer.Exit(1) from e + + # Generate the code + try: + runner_code = generate_runner_code(pipe) + except Exception as e: + typer.echo(typer.style(f"❌ Error generating runner code: {e}", fg=typer.colors.RED)) + raise typer.Exit(1) from e + + # Determine output path + if not output_path: + output_path = f"run_{pipe_code}.py" + + # Save the file + try: + ensure_directory_for_file_path(file_path=output_path) + save_text_to_path(text=runner_code, path=output_path) + typer.echo(typer.style(f"✅ Generated runner file: {output_path}", fg=typer.colors.GREEN)) + except Exception as e: + typer.echo(typer.style(f"❌ Error saving file: {e}", fg=typer.colors.RED)) + raise typer.Exit(1) from e diff --git a/pipelex/cli/commands/run_cmd.py b/pipelex/cli/commands/run_cmd.py index 0aa49db5a..9278ea1ce 100644 --- a/pipelex/cli/commands/run_cmd.py +++ b/pipelex/cli/commands/run_cmd.py @@ -9,63 +9,11 @@ from pipelex.builder.builder import load_pipe_from_bundle from pipelex.builder.builder_errors import PipelexBundleError from pipelex.exceptions import PipeInputError -from pipelex.hub import get_required_pipe from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline -from pipelex.tools.codegen.runner_generator import generate_runner_code -from pipelex.tools.misc.file_utils import ensure_directory_for_file_path, get_incremental_file_path, save_text_to_path +from pipelex.tools.misc.file_utils import get_incremental_file_path from pipelex.tools.misc.json_utils import JsonTypeError, load_json_dict_from_path, save_as_json_to_path -run_app = typer.Typer(help="Run pipelines and generate runner files", no_args_is_help=True) - - -@run_app.command("prepare") -def prepare_runner_cmd( - pipe_code: Annotated[str, typer.Argument(help="The pipe code to prepare a runner for")], - output_path: Annotated[ - str | None, - typer.Option("--output_path", "-o", help="Path to save the generated Python file"), - ] = None, -) -> None: - """Prepare a Python runner file for a pipe. - - The generated file will include: - - All necessary imports - - Example input values based on the pipe's input types - - Native concept types (Text, Image, PDF, etc.) will be automatically handled. - Custom concept types will have their structure recursively generated. - """ - # Initialize Pipelex - Pipelex.make() - - # Get the pipe - try: - pipe = get_required_pipe(pipe_code=pipe_code) - except Exception as e: - typer.echo(typer.style(f"❌ Error: Could not find pipe '{pipe_code}': {e}", fg=typer.colors.RED)) - raise typer.Exit(1) from e - - # Generate the code - try: - runner_code = generate_runner_code(pipe) - except Exception as e: - typer.echo(typer.style(f"❌ Error generating runner code: {e}", fg=typer.colors.RED)) - raise typer.Exit(1) from e - - # Determine output path - if not output_path: - output_path = f"run_{pipe_code}.py" - - # Save the file - try: - ensure_directory_for_file_path(file_path=output_path) - save_text_to_path(text=runner_code, path=output_path) - typer.echo(typer.style(f"✅ Generated runner file: {output_path}", fg=typer.colors.GREEN)) - except Exception as e: - typer.echo(typer.style(f"❌ Error saving file: {e}", fg=typer.colors.RED)) - raise typer.Exit(1) from e - def run_cmd( target: Annotated[ From 750621abdde99ca8ea30354ce748b4b2c368e51d Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 17:31:28 +0200 Subject: [PATCH 108/115] ok --- pipelex/core/concepts/concept_library.py | 3 ++- pipelex/libraries/library.py | 6 ------ pipelex/pipe_operators/llm/pipe_llm.py | 7 ++----- pipelex/pipelex.py | 15 +-------------- tests/integration/pipelex/test_fundamentals.py | 4 ---- 5 files changed, 5 insertions(+), 30 deletions(-) diff --git a/pipelex/core/concepts/concept_library.py b/pipelex/core/concepts/concept_library.py index 1db9518d8..1daa8fe61 100644 --- a/pipelex/core/concepts/concept_library.py +++ b/pipelex/core/concepts/concept_library.py @@ -29,7 +29,8 @@ def validate_with_libraries(self): @override def setup(self): - pass + all_native_concepts = ConceptFactory.make_all_native_concepts() + self.add_concepts(concepts=all_native_concepts) @override def teardown(self): diff --git a/pipelex/libraries/library.py b/pipelex/libraries/library.py index bb7f02cb3..42d5233bc 100644 --- a/pipelex/libraries/library.py +++ b/pipelex/libraries/library.py @@ -49,8 +49,6 @@ def make_base(cls) -> "Library": """Create the BASE library that contains native concepts and builder pipes.""" # 1 - Concept library, add the native concepts concept_library = ConceptLibrary.make_empty() - all_native_concepts = ConceptFactory.make_all_native_concepts() - concept_library.add_concepts(concepts=all_native_concepts) # 2 - Pipe library, add the builder pipes pipe_library = PipeLibrary.make_empty() @@ -136,8 +134,6 @@ def _load_domain_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> Doma code=blueprint.domain, description=blueprint.description or "", system_prompt=blueprint.system_prompt, - system_prompt_to_structure=blueprint.system_prompt_to_structure, - prompt_template_to_structure=blueprint.prompt_template_to_structure, ), ) @@ -177,8 +173,6 @@ def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> None: # Remove concepts (they may depend on domain) if blueprint.concept is not None: - from pipelex.core.concepts.concept_factory import ConceptFactory - concept_codes_to_remove = [ ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) for concept_code in blueprint.concept diff --git a/pipelex/pipe_operators/llm/pipe_llm.py b/pipelex/pipe_operators/llm/pipe_llm.py index cd739ae87..971f505f9 100644 --- a/pipelex/pipe_operators/llm/pipe_llm.py +++ b/pipelex/pipe_operators/llm/pipe_llm.py @@ -79,7 +79,6 @@ def validate_output_concept_consistency(self) -> Self: @override def validate_with_libraries(self, pipeline_run_id: str | None = None): - llm_config = get_config().cogt.llm_config self.validate_inputs() self.llm_prompt_spec.validate_with_libraries() if self.llm_choices: @@ -186,7 +185,6 @@ async def _run_operator_pipe( else: output_concept = get_required_concept( concept_string=ConceptFactory.make_concept_string_with_domain(domain=self.domain, concept_code=output_concept_code), - pipeline_run_id=job_metadata.pipeline_run_id, ) multiplicity_resolution = output_multiplicity_to_apply( @@ -294,7 +292,6 @@ async def _run_operator_pipe( output_structure_prompt = await PipeLLM.get_output_structure_prompt( concept_string=pipe_run_params.dynamic_output_concept_code or output_concept.concept_string, is_with_preliminary_text=is_with_preliminary_text, - pipeline_run_id=job_metadata.pipeline_run_id, ) llm_prompt_1_for_object = await self.llm_prompt_spec.make_llm_prompt( output_concept_string=output_concept.concept_string, @@ -431,8 +428,8 @@ async def _dry_run_operator_pipe( ) @staticmethod - async def get_output_structure_prompt(concept_string: str, is_with_preliminary_text: bool, pipeline_run_id: str | None = None) -> str | None: - concept = get_required_concept(concept_string=concept_string, pipeline_run_id=pipeline_run_id) + async def get_output_structure_prompt(concept_string: str, is_with_preliminary_text: bool) -> str | None: + concept = get_required_concept(concept_string=concept_string) output_class = get_class_registry().get_class(concept.structure_class_name) log.debug(f"get_output_structure_prompt for {concept_string} with {is_with_preliminary_text=}") log.debug(f"output_class: {output_class}") diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index e8c01a702..17d80bbcf 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -244,21 +244,8 @@ def setup( def setup_libraries(self): self.library_manager.setup() + print("jdqojsoqjio", self.library_manager) self.library_manager.load_libraries() - # Set the UNTITLED libraries in the hub for backward compatibility - self.pipelex_hub.set_domain_library(domain_library=self.library_manager.get_domain_library()) - self.pipelex_hub.set_concept_library(concept_library=self.library_manager.get_concept_library()) - self.pipelex_hub.set_pipe_library(pipe_library=self.library_manager.get_pipe_library()) - log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} setup libraries done for {get_config().project_name}") - - # def validate_libraries(self): - # try: - # self.library_manager.validate_libraries() - # except ValidationError as validation_error: - # validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - # msg = f"Could not validate libraries because of: {validation_error_msg}" - # raise PipelexSetupError(msg) from validation_error - # log.debug(f"{PACKAGE_NAME} version {PACKAGE_VERSION} validate libraries done") def teardown(self): # pipelex diff --git a/tests/integration/pipelex/test_fundamentals.py b/tests/integration/pipelex/test_fundamentals.py index fc9f5f1fc..482989bc7 100644 --- a/tests/integration/pipelex/test_fundamentals.py +++ b/tests/integration/pipelex/test_fundamentals.py @@ -3,7 +3,6 @@ from pipelex.config import get_config from pipelex.hub import get_pipes from pipelex.pipe_run.dry_run import dry_run_pipes -from pipelex.pipelex import Pipelex # We use gha_disabled here because those tests are called directly and explicitly by the tests-check.yml file before the rest of the tests. @@ -14,9 +13,6 @@ def test_boot(self): # Therefore this test will fail if Pipelex.make() fails. pass - def test_validate_libraries(self): - Pipelex.get_instance().validate_libraries() - @pytest.mark.asyncio(loop_scope="class") async def test_dry_run_all_pipes(self): results = await dry_run_pipes(pipes=get_pipes(), raise_on_failure=False) From 5ac55d938cce53b1d5f5c5ee6d314016dd40826b Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 17:35:48 +0200 Subject: [PATCH 109/115] move some code --- pipelex/builder/runner_code.py | 93 +++++++++++++++++++++++ pipelex/cli/commands/build_cmd.py | 2 +- pipelex/tools/codegen/runner_generator.py | 93 ----------------------- tests/unit/tools/test_runner_generator.py | 3 +- 4 files changed, 96 insertions(+), 95 deletions(-) create mode 100644 pipelex/builder/runner_code.py diff --git a/pipelex/builder/runner_code.py b/pipelex/builder/runner_code.py new file mode 100644 index 000000000..76157bb82 --- /dev/null +++ b/pipelex/builder/runner_code.py @@ -0,0 +1,93 @@ +from pipelex.core.concepts.concept import Concept +from pipelex.core.pipes.pipe_abstract import PipeAbstract +from pipelex.tools.codegen.runner_generator import value_to_python_code + + +def generate_compact_memory_entry(var_name: str, concept: Concept) -> str: + """Generate the compact_memory dictionary entry for a given input.""" + example_value = concept.get_compact_memory_example(var_name) + + # Convert the example value to a Python code string + value_str = value_to_python_code(example_value, indent_level=3) + + return f' "{var_name}": {value_str},' + + +def generate_runner_code(pipe: PipeAbstract) -> str: + """Generate the complete Python runner code for a pipe.""" + pipe_code = pipe.code + inputs = pipe.inputs + + # Determine which imports are needed based on input concepts + needs_pdf = False + needs_image = False + for input_req in inputs.root.values(): + concept = input_req.concept + if concept.structure_class_name == "PDFContent": + needs_pdf = True + elif concept.structure_class_name == "ImageContent": + needs_image = True + + # Build import section + import_lines = ["import asyncio", ""] + + # Add content class imports if needed + if needs_pdf: + import_lines.append("from pipelex.core.stuffs.pdf_content import PDFContent") + if needs_image: + import_lines.append("from pipelex.core.stuffs.image_content import ImageContent") + + import_lines.extend( + [ + "from pipelex.pipelex import Pipelex", + "from pipelex.pipeline.execute import execute_pipeline", + ] + ) + + # Build input_memory entries + if inputs.nb_inputs > 0: + input_memory_entries: list[str] = [] + for var_name, input_req in inputs.root.items(): + concept = input_req.concept + entry = generate_compact_memory_entry(var_name, concept) + input_memory_entries.append(entry) + input_memory_block = "\n".join(input_memory_entries) + else: + input_memory_block = " # No inputs required" + + # Build the main function + function_lines = [ + "", + "", + f"async def run_{pipe_code}():", + " return await execute_pipeline(", + f' pipe_code="{pipe_code}",', + ] + + if inputs.nb_inputs > 0: + function_lines.extend( + [ + " input_memory={", + input_memory_block, + " },", + ] + ) + + function_lines.extend( + [ + " )", + "", + "", + 'if __name__ == "__main__":', + " # Initialize Pipelex", + " Pipelex.make()", + "", + " # Run the pipeline", + f" result = asyncio.run(run_{pipe_code}())", + "", + ] + ) + + # Combine everything + code_lines = import_lines + function_lines + return "\n".join(code_lines) diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index 8a1f2f6a3..c2cc5f5cb 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -7,11 +7,11 @@ from pipelex import pretty_print from pipelex.builder.builder import PipelexBundleSpec from pipelex.builder.builder_loop import BuilderLoop +from pipelex.builder.runner_code import generate_runner_code from pipelex.hub import get_report_delegate, get_required_pipe from pipelex.language.plx_factory import PlxFactory from pipelex.pipelex import Pipelex from pipelex.pipeline.execute import execute_pipeline -from pipelex.tools.codegen.runner_generator import generate_runner_code from pipelex.tools.misc.file_utils import ensure_directory_for_file_path, save_text_to_path from pipelex.tools.misc.json_utils import save_as_json_to_path diff --git a/pipelex/tools/codegen/runner_generator.py b/pipelex/tools/codegen/runner_generator.py index 610ff0390..ddb112f68 100644 --- a/pipelex/tools/codegen/runner_generator.py +++ b/pipelex/tools/codegen/runner_generator.py @@ -2,9 +2,6 @@ from typing import Any -from pipelex.core.concepts.concept import Concept -from pipelex.core.pipes.pipe_abstract import PipeAbstract - def value_to_python_code(value: Any, indent_level: int = 0) -> str: """Convert a value to Python code representation recursively. @@ -63,93 +60,3 @@ def value_to_python_code(value: Any, indent_level: int = 0) -> str: else: # Fallback - use repr return repr(value) - - -def generate_compact_memory_entry(var_name: str, concept: Concept) -> str: - """Generate the compact_memory dictionary entry for a given input.""" - example_value = concept.get_compact_memory_example(var_name) - - # Convert the example value to a Python code string - value_str = value_to_python_code(example_value, indent_level=3) - - return f' "{var_name}": {value_str},' - - -def generate_runner_code(pipe: PipeAbstract) -> str: - """Generate the complete Python runner code for a pipe.""" - pipe_code = pipe.code - inputs = pipe.inputs - - # Determine which imports are needed based on input concepts - needs_pdf = False - needs_image = False - for input_req in inputs.root.values(): - concept = input_req.concept - if concept.structure_class_name == "PDFContent": - needs_pdf = True - elif concept.structure_class_name == "ImageContent": - needs_image = True - - # Build import section - import_lines = ["import asyncio", ""] - - # Add content class imports if needed - if needs_pdf: - import_lines.append("from pipelex.core.stuffs.pdf_content import PDFContent") - if needs_image: - import_lines.append("from pipelex.core.stuffs.image_content import ImageContent") - - import_lines.extend( - [ - "from pipelex.pipelex import Pipelex", - "from pipelex.pipeline.execute import execute_pipeline", - ] - ) - - # Build input_memory entries - if inputs.nb_inputs > 0: - input_memory_entries: list[str] = [] - for var_name, input_req in inputs.root.items(): - concept = input_req.concept - entry = generate_compact_memory_entry(var_name, concept) - input_memory_entries.append(entry) - input_memory_block = "\n".join(input_memory_entries) - else: - input_memory_block = " # No inputs required" - - # Build the main function - function_lines = [ - "", - "", - f"async def run_{pipe_code}():", - " return await execute_pipeline(", - f' pipe_code="{pipe_code}",', - ] - - if inputs.nb_inputs > 0: - function_lines.extend( - [ - " input_memory={", - input_memory_block, - " },", - ] - ) - - function_lines.extend( - [ - " )", - "", - "", - 'if __name__ == "__main__":', - " # Initialize Pipelex", - " Pipelex.make()", - "", - " # Run the pipeline", - f" result = asyncio.run(run_{pipe_code}())", - "", - ] - ) - - # Combine everything - code_lines = import_lines + function_lines - return "\n".join(code_lines) diff --git a/tests/unit/tools/test_runner_generator.py b/tests/unit/tools/test_runner_generator.py index 354656bcf..cd8937cf8 100644 --- a/tests/unit/tools/test_runner_generator.py +++ b/tests/unit/tools/test_runner_generator.py @@ -2,9 +2,10 @@ from __future__ import annotations +from pipelex.builder.runner_code import generate_compact_memory_entry from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode -from pipelex.tools.codegen.runner_generator import generate_compact_memory_entry, value_to_python_code +from pipelex.tools.codegen.runner_generator import value_to_python_code class TestValueToPythonCode: From 1f448bb6c5a8495e1e034e522d73694cd6f2b82f Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 17:38:19 +0200 Subject: [PATCH 110/115] move around some files --- pipelex/builder/runner_code.py | 62 ++++++++++++++++++++++- pipelex/tools/codegen/__init__.py | 1 - pipelex/tools/codegen/runner_generator.py | 62 ----------------------- tests/unit/tools/test_runner_generator.py | 3 +- 4 files changed, 62 insertions(+), 66 deletions(-) delete mode 100644 pipelex/tools/codegen/__init__.py delete mode 100644 pipelex/tools/codegen/runner_generator.py diff --git a/pipelex/builder/runner_code.py b/pipelex/builder/runner_code.py index 76157bb82..4234f55aa 100644 --- a/pipelex/builder/runner_code.py +++ b/pipelex/builder/runner_code.py @@ -1,6 +1,66 @@ +from typing import Any + from pipelex.core.concepts.concept import Concept from pipelex.core.pipes.pipe_abstract import PipeAbstract -from pipelex.tools.codegen.runner_generator import value_to_python_code + + +def value_to_python_code(value: Any, indent_level: int = 0) -> str: + """Convert a value to Python code representation recursively. + + Args: + value: The value to convert (can be str, int, dict, list, etc.) + indent_level: Current indentation level for nested dicts + + Returns: + String representation of Python code + """ + indent = " " * indent_level + + if isinstance(value, dict) and "_class" in value: + # Special handling for Content class instantiation (e.g., PDFContent, ImageContent) + class_name = value["_class"] # pyright: ignore[reportUnknownVariableType] + if class_name in {"PDFContent", "ImageContent"}: + url = value.get("url", "your_url") # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType, reportUnknownVariableType] + return f'{class_name}(url="{url}")' + return str(value) # pyright: ignore[reportUnknownArgumentType] + elif isinstance(value, dict) and "concept_code" in value and "content" in value: + # Special handling for refined concepts with explicit concept_code + # Format: {"concept": "domain.ConceptCode", "content": ContentClass(...)} + concept_code = value["concept_code"] # pyright: ignore[reportUnknownVariableType] + content = value["content"] # pyright: ignore[reportUnknownVariableType] + + # Generate the content part + content_code = value_to_python_code(content, indent_level + 1) + + # Return the full format with concept and content + return f'{{\n{indent} "concept": "{concept_code}",\n{indent} "content": {content_code},\n{indent}}}' + elif isinstance(value, str): + # String value - add quotes + return f'"{value}"' + elif isinstance(value, bool): + # Boolean - Python True/False + return str(value) + elif isinstance(value, (int, float)): + # Numeric value + return str(value) + elif isinstance(value, list): + # List - recursively convert items + if not value: + return "[]" + items: list[str] = [value_to_python_code(item, indent_level + 1) for item in value] # pyright: ignore[reportUnknownVariableType] + return "[" + ", ".join(items) + "]" + elif isinstance(value, dict): + # Dict - recursively convert with proper formatting + if not value: + return "{}" + lines_dict: list[str] = [] + for key, val in value.items(): # pyright: ignore[reportUnknownVariableType] + val_code = value_to_python_code(val, indent_level + 1) + lines_dict.append(f'{indent} "{key}": {val_code}') + return "{\n" + ",\n".join(lines_dict) + f"\n{indent}}}" + else: + # Fallback - use repr + return repr(value) def generate_compact_memory_entry(var_name: str, concept: Concept) -> str: diff --git a/pipelex/tools/codegen/__init__.py b/pipelex/tools/codegen/__init__.py deleted file mode 100644 index c7351a0c9..000000000 --- a/pipelex/tools/codegen/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Code generation utilities for Pipelex.""" diff --git a/pipelex/tools/codegen/runner_generator.py b/pipelex/tools/codegen/runner_generator.py deleted file mode 100644 index ddb112f68..000000000 --- a/pipelex/tools/codegen/runner_generator.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Generate Python runner code from pipe definitions and concepts.""" - -from typing import Any - - -def value_to_python_code(value: Any, indent_level: int = 0) -> str: - """Convert a value to Python code representation recursively. - - Args: - value: The value to convert (can be str, int, dict, list, etc.) - indent_level: Current indentation level for nested dicts - - Returns: - String representation of Python code - """ - indent = " " * indent_level - - if isinstance(value, dict) and "_class" in value: - # Special handling for Content class instantiation (e.g., PDFContent, ImageContent) - class_name = value["_class"] # pyright: ignore[reportUnknownVariableType] - if class_name in {"PDFContent", "ImageContent"}: - url = value.get("url", "your_url") # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType, reportUnknownVariableType] - return f'{class_name}(url="{url}")' - return str(value) # pyright: ignore[reportUnknownArgumentType] - elif isinstance(value, dict) and "concept_code" in value and "content" in value: - # Special handling for refined concepts with explicit concept_code - # Format: {"concept": "domain.ConceptCode", "content": ContentClass(...)} - concept_code = value["concept_code"] # pyright: ignore[reportUnknownVariableType] - content = value["content"] # pyright: ignore[reportUnknownVariableType] - - # Generate the content part - content_code = value_to_python_code(content, indent_level + 1) - - # Return the full format with concept and content - return f'{{\n{indent} "concept": "{concept_code}",\n{indent} "content": {content_code},\n{indent}}}' - elif isinstance(value, str): - # String value - add quotes - return f'"{value}"' - elif isinstance(value, bool): - # Boolean - Python True/False - return str(value) - elif isinstance(value, (int, float)): - # Numeric value - return str(value) - elif isinstance(value, list): - # List - recursively convert items - if not value: - return "[]" - items: list[str] = [value_to_python_code(item, indent_level + 1) for item in value] # pyright: ignore[reportUnknownVariableType] - return "[" + ", ".join(items) + "]" - elif isinstance(value, dict): - # Dict - recursively convert with proper formatting - if not value: - return "{}" - lines_dict: list[str] = [] - for key, val in value.items(): # pyright: ignore[reportUnknownVariableType] - val_code = value_to_python_code(val, indent_level + 1) - lines_dict.append(f'{indent} "{key}": {val_code}') - return "{\n" + ",\n".join(lines_dict) + f"\n{indent}}}" - else: - # Fallback - use repr - return repr(value) diff --git a/tests/unit/tools/test_runner_generator.py b/tests/unit/tools/test_runner_generator.py index cd8937cf8..129e8c863 100644 --- a/tests/unit/tools/test_runner_generator.py +++ b/tests/unit/tools/test_runner_generator.py @@ -2,10 +2,9 @@ from __future__ import annotations -from pipelex.builder.runner_code import generate_compact_memory_entry +from pipelex.builder.runner_code import generate_compact_memory_entry, value_to_python_code from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode -from pipelex.tools.codegen.runner_generator import value_to_python_code class TestValueToPythonCode: From ec3fc8c4fdc3f670f42c39f201e5daed085be735 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Thu, 16 Oct 2025 18:44:32 +0200 Subject: [PATCH 111/115] fix almost all make cc --- pipelex/cli/commands/validate_cmd.py | 6 +- pipelex/core/pipes/pipe_abstract.py | 4 +- pipelex/core/pipes/pipe_library.py | 2 +- pipelex/libraries/library.py | 208 +----------------- pipelex/libraries/library_factory.py | 21 ++ pipelex/libraries/library_ids.py | 2 - pipelex/libraries/library_manager.py | 175 ++++++++++++++- pipelex/libraries/library_manager_factory.py | 7 - .../pipe_operators/img_gen/pipe_img_gen.py | 6 +- pipelex/pipelex.py | 4 +- tests/integration/pipelex/test_libraries.py | 51 ++--- 11 files changed, 226 insertions(+), 260 deletions(-) create mode 100644 pipelex/libraries/library_factory.py delete mode 100644 pipelex/libraries/library_manager_factory.py diff --git a/pipelex/cli/commands/validate_cmd.py b/pipelex/cli/commands/validate_cmd.py index 6deb49c50..1d024d2a7 100644 --- a/pipelex/cli/commands/validate_cmd.py +++ b/pipelex/cli/commands/validate_cmd.py @@ -13,16 +13,14 @@ def do_validate_all_libraries_and_dry_run() -> None: """Validate libraries and dry-run all pipes.""" - pipelex_instance = Pipelex.make() - pipelex_instance.validate_libraries() + Pipelex.make() asyncio.run(dry_run_pipes(pipes=get_pipes(), raise_on_failure=True)) log.info("Setup sequence passed OK, config and pipelines are validated.") def do_dry_run_pipe(pipe_code: str) -> None: """Dry run a single pipe.""" - pipelex_instance = Pipelex.make() - pipelex_instance.validate_libraries() + Pipelex.make() asyncio.run( dry_run_pipe( diff --git a/pipelex/core/pipes/pipe_abstract.py b/pipelex/core/pipes/pipe_abstract.py index 3e42b5319..f93a60642 100644 --- a/pipelex/core/pipes/pipe_abstract.py +++ b/pipelex/core/pipes/pipe_abstract.py @@ -31,10 +31,10 @@ def validate_pipe_code_syntax(cls, code: str) -> str: return code @abstractmethod - def validate_output(self, pipeline_run_id: str | None = None): + def validate_output(self): """Validate the output for the pipe.""" - def validate_with_libraries(self, pipeline_run_id: str | None = None): + def validate_with_libraries(self): """Validate the pipe with the libraries, after the static validation""" @abstractmethod diff --git a/pipelex/core/pipes/pipe_library.py b/pipelex/core/pipes/pipe_library.py index 4cfd0f7a4..62839d2c9 100644 --- a/pipelex/core/pipes/pipe_library.py +++ b/pipelex/core/pipes/pipe_library.py @@ -20,7 +20,7 @@ class PipeLibrary(RootModel[PipeLibraryRoot], PipeLibraryAbstract): def validate_with_libraries(self, pipeline_run_id: str | None = None): concept_library = get_concept_library() for pipe in self.root.values(): - pipe.validate_output(pipeline_run_id=pipeline_run_id) + pipe.validate_output() try: for concept in pipe.concept_dependencies(): try: diff --git a/pipelex/libraries/library.py b/pipelex/libraries/library.py index 42d5233bc..92f93a0ef 100644 --- a/pipelex/libraries/library.py +++ b/pipelex/libraries/library.py @@ -1,26 +1,8 @@ -from pathlib import Path +from pydantic import BaseModel -from pydantic import BaseModel, Field, ValidationError - -from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint -from pipelex.core.concepts.concept import Concept -from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.domains.domain import Domain -from pipelex.core.domains.domain_blueprint import DomainBlueprint -from pipelex.core.domains.domain_factory import DomainFactory from pipelex.core.domains.domain_library import DomainLibrary -from pipelex.core.interpreter import PipelexInterpreter -from pipelex.core.pipes.pipe_abstract import PipeAbstract -from pipelex.core.pipes.pipe_factory import PipeFactory from pipelex.core.pipes.pipe_library import PipeLibrary -from pipelex.core.validation import report_validation_error -from pipelex.exceptions import ( - ConceptDefinitionError, - DomainDefinitionError, - LibraryLoadingError, - PipeDefinitionError, -) class Library(BaseModel): @@ -32,45 +14,9 @@ class Library(BaseModel): Each Library (except BASE) inherits native concepts and base pipes from the BASE library. """ - domain_library: DomainLibrary = Field(default_factory=DomainLibrary.make_empty) - concept_library: ConceptLibrary = Field(default_factory=ConceptLibrary.make_empty) - pipe_library: PipeLibrary = Field(default_factory=PipeLibrary.make_empty) - - @classmethod - def make_empty(cls) -> "Library": - """Create an empty library with initialized concept library (includes native concepts). - - This should only be used for the BASE library. - """ - return cls.make_base() - - @classmethod - def make_base(cls) -> "Library": - """Create the BASE library that contains native concepts and builder pipes.""" - # 1 - Concept library, add the native concepts - concept_library = ConceptLibrary.make_empty() - - # 2 - Pipe library, add the builder pipes - pipe_library = PipeLibrary.make_empty() - - # 3 - Domain library, add the domains - domain_library = DomainLibrary.make_empty() - - library = cls( - domain_library=domain_library, - concept_library=concept_library, - pipe_library=pipe_library, - ) - - library.load_from_plx_files( - plx_file_paths=[ - Path("pipelex/builder/builder.plx"), - Path("pipelex/builder/pipe/pipe_design.plx"), - Path("pipelex/builder/concept/concept.plx"), - ] - ) - - return library + domain_library: DomainLibrary + concept_library: ConceptLibrary + pipe_library: PipeLibrary def get_domain_library(self) -> DomainLibrary: return self.domain_library @@ -82,155 +28,11 @@ def get_pipe_library(self) -> PipeLibrary: return self.pipe_library def teardown(self) -> None: - """Teardown all libraries in this bundle.""" self.pipe_library.teardown() self.concept_library.teardown() self.domain_library.teardown() - def validate_with_libraries(self) -> None: - """Validate all libraries in this bundle.""" + def validate_library(self) -> None: self.concept_library.validate_with_libraries() self.pipe_library.validate_with_libraries() self.domain_library.validate_with_libraries() - - def load_from_blueprints(self, blueprints: list[PipelexBundleBlueprint]) -> list[PipeAbstract]: - """Load domains, concepts, and pipes from a list of blueprints. - - Args: - blueprints: List of parsed PLX blueprints to load - - Returns: - List of all pipes that were loaded - """ - all_pipes: list[PipeAbstract] = [] - - # Load all domains first - all_domains: list[Domain] = [] - for blueprint in blueprints: - domain = self._load_domain_from_blueprint(blueprint) - all_domains.append(domain) - self.domain_library.add_domains(domains=all_domains) - - # Load all concepts second - all_concepts: list[Concept] = [] - for blueprint in blueprints: - concepts = self._load_concepts_from_blueprint(blueprint) - all_concepts.extend(concepts) - self.concept_library.add_concepts(concepts=all_concepts) - - # Load all pipes third - for blueprint in blueprints: - pipes = self._load_pipes_from_blueprint(blueprint) - all_pipes.extend(pipes) - self.pipe_library.add_pipes(pipes=all_pipes) - - return all_pipes - - def _load_domain_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> Domain: - """Load a domain from a blueprint.""" - return DomainFactory.make_from_blueprint( - blueprint=DomainBlueprint( - source=blueprint.source, - code=blueprint.domain, - description=blueprint.description or "", - system_prompt=blueprint.system_prompt, - ), - ) - - def _load_concepts_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[Concept]: - """Load concepts from a blueprint.""" - if blueprint.concept is None: - return [] - - concepts: list[Concept] = [] - for concept_code, concept_blueprint_or_description in blueprint.concept.items(): - concept = ConceptFactory.make_from_blueprint_or_description( - domain=blueprint.domain, - concept_code=concept_code, - concept_codes_from_the_same_domain=list(blueprint.concept.keys()), - concept_blueprint_or_description=concept_blueprint_or_description, - ) - concepts.append(concept) - return concepts - - def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbstract]: - """Load pipes from a blueprint.""" - pipes: list[PipeAbstract] = [] - if blueprint.pipe is not None: - for pipe_name, pipe_blueprint in blueprint.pipe.items(): - pipe = PipeFactory.make_from_blueprint( - domain=blueprint.domain, - pipe_code=pipe_name, - blueprint=pipe_blueprint, - concept_codes_from_the_same_domain=list(blueprint.concept.keys()) if blueprint.concept else None, - ) - pipes.append(pipe) - return pipes - - def remove_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> None: - if blueprint.pipe is not None: - self.pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) - - # Remove concepts (they may depend on domain) - if blueprint.concept is not None: - concept_codes_to_remove = [ - ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) - for concept_code in blueprint.concept - ] - self.concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) - - self.domain_library.remove_domain_by_code(domain_code=blueprint.domain) - - def validate_library(self): - self.validate_with_libraries() - - ############################################################ - # Library loading from sources - ############################################################ - - def load_from_plx_files(self, plx_file_paths: list[Path]) -> None: - """Load library from a list of PLX file paths. - - This method: - 1. Parses blueprints from PLX files - 2. Loads blueprints into the library - - Note: Module imports and registry loading should be done by the LibraryManager - before calling this method. - - Args: - plx_file_paths: List of PLX file paths to load. - """ - blueprints: list[PipelexBundleBlueprint] = [] - for plx_file_path in plx_file_paths: - try: - blueprint = PipelexInterpreter(file_path=plx_file_path).make_pipelex_bundle_blueprint() - except FileNotFoundError as file_not_found_error: - msg = f"Could not find PLX blueprint at '{plx_file_path}'" - raise LibraryLoadingError(msg) from file_not_found_error - except PipeDefinitionError as pipe_def_error: - msg = f"Could not load PLX blueprint from '{plx_file_path}': {pipe_def_error}" - raise LibraryLoadingError(msg) from pipe_def_error - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not load PLX blueprint from '{plx_file_path}' because of: {validation_error_msg}" - raise LibraryLoadingError(msg) from validation_error - blueprint.source = str(plx_file_path) - blueprints.append(blueprint) - - # Load all blueprints into the library - try: - self.load_from_blueprints(blueprints=blueprints) - except DomainDefinitionError as domain_def_error: - msg = f"Could not load domains from blueprints: {domain_def_error}" - raise LibraryLoadingError(msg) from domain_def_error - except ConceptDefinitionError as concept_def_error: - msg = f"Could not load concepts from blueprints: {concept_def_error}" - raise LibraryLoadingError(msg) from concept_def_error - except PipeDefinitionError as pipe_def_error: - msg = f"Could not load pipes from blueprints: {pipe_def_error}" - raise LibraryLoadingError(msg) from pipe_def_error - except ValidationError as validation_error: - validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) - msg = f"Could not load blueprints because of: {validation_error_msg}" - raise LibraryLoadingError(msg) from validation_error diff --git a/pipelex/libraries/library_factory.py b/pipelex/libraries/library_factory.py new file mode 100644 index 000000000..985ff327e --- /dev/null +++ b/pipelex/libraries/library_factory.py @@ -0,0 +1,21 @@ +from pydantic import BaseModel + +from pipelex.core.concepts.concept_library import ConceptLibrary +from pipelex.core.domains.domain_library import DomainLibrary +from pipelex.core.pipes.pipe_library import PipeLibrary +from pipelex.libraries.library import Library + + +class LibraryFactory(BaseModel): + @classmethod + def make_empty(cls) -> Library: + # 1 - Concept library, add the native concepts + concept_library = ConceptLibrary.make_empty() + + # 2 - Pipe library, add the builder pipes + pipe_library = PipeLibrary.make_empty() + + # 3 - Domain library, add the domains + domain_library = DomainLibrary.make_empty() + + return Library(domain_library=domain_library, concept_library=concept_library, pipe_library=pipe_library) diff --git a/pipelex/libraries/library_ids.py b/pipelex/libraries/library_ids.py index 9355ae4f8..2c74b28ed 100644 --- a/pipelex/libraries/library_ids.py +++ b/pipelex/libraries/library_ids.py @@ -1,5 +1,3 @@ -"""Library identifiers and enumerations.""" - from pipelex.types import StrEnum diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index fc94ca6eb..9f2ecd0c5 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -1,16 +1,32 @@ from pathlib import Path from typing import ClassVar +from pydantic import ValidationError from typing_extensions import override from pipelex import log +from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint +from pipelex.core.concepts.concept import Concept +from pipelex.core.concepts.concept_factory import ConceptFactory +from pipelex.core.domains.domain import Domain +from pipelex.core.domains.domain_blueprint import DomainBlueprint +from pipelex.core.domains.domain_factory import DomainFactory +from pipelex.core.interpreter import PipelexInterpreter +from pipelex.core.pipes.pipe_abstract import PipeAbstract +from pipelex.core.pipes.pipe_factory import PipeFactory from pipelex.core.stuffs.structured_content import StructuredContent +from pipelex.core.validation import report_validation_error from pipelex.exceptions import ( + ConceptDefinitionError, ConceptLibraryError, + DomainDefinitionError, LibraryError, + LibraryLoadingError, + PipeDefinitionError, PipeLibraryError, ) from pipelex.libraries.library import Library +from pipelex.libraries.library_factory import LibraryFactory from pipelex.libraries.library_ids import SpecialLibraryId from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract from pipelex.libraries.library_utils import ( @@ -47,7 +63,7 @@ class LibraryManager(LibraryManagerAbstract): def __init__(self): # UNTITLED library is the fallback library for all others - self._libraries: dict[str, Library] = {SpecialLibraryId.UNTITLED: Library.make_empty()} + self._libraries: dict[str, Library] = {} ############################################################ # Manager lifecycle @@ -56,7 +72,8 @@ def __init__(self): @override def setup(self) -> None: self._libraries.clear() - self.create_library(library_id=SpecialLibraryId.UNTITLED) + # Create and initialize UNTITLED library with base PLX files + self.open_library(library_id=SpecialLibraryId.UNTITLED) @override def teardown(self) -> None: @@ -74,7 +91,7 @@ def create_library(self, library_id: str): if library_id in self._libraries: msg = f"Library '{library_id}' already exists" raise LibraryError(msg) - self._libraries[library_id] = Library.make_empty() + self._libraries[library_id] = LibraryFactory.make_empty() @override def open_library(self, library_id: str) -> None: @@ -86,10 +103,16 @@ def open_library(self, library_id: str) -> None: msg = f"Library '{library_id}' already exists" raise LibraryError(msg) - # Create a new library that inherits from UNTITLED - base_library = Library.make_base() + # Create a new library that inherits from BASE self.create_library(library_id=library_id) - self.set_library(library_id=library_id, library=base_library) + + # Load base PLX files (builder pipes) into the new library + base_plx_paths = [ + Path("pipelex/builder/builder.plx"), + Path("pipelex/builder/pipe/pipe_design.plx"), + Path("pipelex/builder/concept/concept.plx"), + ] + self._load_plx_files_into_library(library_id=library_id, plx_file_paths=base_plx_paths) ############################################################ # Public library accessors @@ -209,13 +232,104 @@ def load_libraries( ) log.debug(f"Auto-registered {num_registered} StructuredContent classes from loaded modules") - # Delegate to the Library instance to load blueprints - self.get_library(library_id=library_id).load_from_plx_files(plx_file_paths=valid_plx_paths) + # Load PLX files into the specific library + self._load_plx_files_into_library(library_id=library_id, plx_file_paths=valid_plx_paths) ############################################################ # Private helper methods ############################################################ + def load_from_blueprints(self, library_id: str, blueprints: list[PipelexBundleBlueprint]) -> list[PipeAbstract]: + """Load domains, concepts, and pipes from a list of blueprints. + + Args: + library_id: The ID of the library to load into + blueprints: List of parsed PLX blueprints to load + + Returns: + List of all pipes that were loaded + """ + library = self.get_library(library_id=library_id) + all_pipes: list[PipeAbstract] = [] + + # Load all domains first + all_domains: list[Domain] = [] + for blueprint in blueprints: + domain = self._load_domain_from_blueprint(blueprint) + all_domains.append(domain) + library.domain_library.add_domains(domains=all_domains) + + # Load all concepts second + all_concepts: list[Concept] = [] + for blueprint in blueprints: + concepts = self._load_concepts_from_blueprint(blueprint) + all_concepts.extend(concepts) + library.concept_library.add_concepts(concepts=all_concepts) + + # Load all pipes third + for blueprint in blueprints: + pipes = self._load_pipes_from_blueprint(blueprint) + all_pipes.extend(pipes) + library.pipe_library.add_pipes(pipes=all_pipes) + + return all_pipes + + def _load_domain_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> Domain: + """Load a domain from a blueprint.""" + return DomainFactory.make_from_blueprint( + blueprint=DomainBlueprint( + source=blueprint.source, + code=blueprint.domain, + description=blueprint.description or "", + system_prompt=blueprint.system_prompt, + ), + ) + + def _load_plx_files_into_library(self, library_id: str, plx_file_paths: list[Path]) -> None: + """Load PLX files into a specific library. + + This method: + 1. Parses blueprints from PLX files + 2. Loads blueprints into the specified library + + Args: + library_id: The ID of the library to load into + plx_file_paths: List of PLX file paths to load + """ + blueprints: list[PipelexBundleBlueprint] = [] + for plx_file_path in plx_file_paths: + try: + blueprint = PipelexInterpreter(file_path=plx_file_path).make_pipelex_bundle_blueprint() + except FileNotFoundError as file_not_found_error: + msg = f"Could not find PLX blueprint at '{plx_file_path}'" + raise LibraryLoadingError(msg) from file_not_found_error + except PipeDefinitionError as pipe_def_error: + msg = f"Could not load PLX blueprint from '{plx_file_path}': {pipe_def_error}" + raise LibraryLoadingError(msg) from pipe_def_error + except ValidationError as validation_error: + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) + msg = f"Could not load PLX blueprint from '{plx_file_path}' because of: {validation_error_msg}" + raise LibraryLoadingError(msg) from validation_error + blueprint.source = str(plx_file_path) + blueprints.append(blueprint) + + # Load all blueprints into the library + try: + self.load_from_blueprints(library_id=library_id, blueprints=blueprints) + except DomainDefinitionError as domain_def_error: + msg = f"Could not load domains from blueprints: {domain_def_error}" + raise LibraryLoadingError(msg) from domain_def_error + except ConceptDefinitionError as concept_def_error: + msg = f"Could not load concepts from blueprints: {concept_def_error}" + raise LibraryLoadingError(msg) from concept_def_error + except PipeDefinitionError as pipe_def_error: + msg = f"Could not load pipes from blueprints: {pipe_def_error}" + raise LibraryLoadingError(msg) from pipe_def_error + except ValidationError as validation_error: + validation_error_msg = report_validation_error(category="plx", validation_error=validation_error) + msg = f"Could not load blueprints because of: {validation_error_msg}" + raise LibraryLoadingError(msg) from validation_error + def _import_pipelex_modules_directly(self) -> None: """Import pipelex modules to register @pipe_func decorated functions. @@ -227,3 +341,48 @@ def _import_pipelex_modules_directly(self) -> None: log.verbose("Registering @pipe_func functions from pipelex.builder") functions_count = FuncRegistryUtils.register_pipe_funcs_from_package("pipelex.builder", pipelex.builder) log.verbose(f"Registered {functions_count} @pipe_func functions from pipelex.builder") + + def _load_concepts_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[Concept]: + """Load concepts from a blueprint.""" + if blueprint.concept is None: + return [] + + concepts: list[Concept] = [] + for concept_code, concept_blueprint_or_description in blueprint.concept.items(): + concept = ConceptFactory.make_from_blueprint_or_description( + domain=blueprint.domain, + concept_code=concept_code, + concept_codes_from_the_same_domain=list(blueprint.concept.keys()), + concept_blueprint_or_description=concept_blueprint_or_description, + ) + concepts.append(concept) + return concepts + + def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[PipeAbstract]: + """Load pipes from a blueprint.""" + pipes: list[PipeAbstract] = [] + if blueprint.pipe is not None: + for pipe_name, pipe_blueprint in blueprint.pipe.items(): + pipe = PipeFactory.make_from_blueprint( + domain=blueprint.domain, + pipe_code=pipe_name, + blueprint=pipe_blueprint, + concept_codes_from_the_same_domain=list(blueprint.concept.keys()) if blueprint.concept else None, + ) + pipes.append(pipe) + return pipes + + def remove_from_blueprint(self, library_id: str, blueprint: PipelexBundleBlueprint) -> None: + library = self.get_library(library_id=library_id) + if blueprint.pipe is not None: + library.pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) + + # Remove concepts (they may depend on domain) + if blueprint.concept is not None: + concept_codes_to_remove = [ + ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) + for concept_code in blueprint.concept + ] + library.concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) + + library.domain_library.remove_domain_by_code(domain_code=blueprint.domain) diff --git a/pipelex/libraries/library_manager_factory.py b/pipelex/libraries/library_manager_factory.py deleted file mode 100644 index d1430821f..000000000 --- a/pipelex/libraries/library_manager_factory.py +++ /dev/null @@ -1,7 +0,0 @@ -from pipelex.libraries.library_manager import LibraryManager - - -class LibraryManagerFactory: - @classmethod - def make_empty(cls) -> LibraryManager: - return LibraryManager() diff --git a/pipelex/pipe_operators/img_gen/pipe_img_gen.py b/pipelex/pipe_operators/img_gen/pipe_img_gen.py index f9be6c8d8..0cd6b8d43 100644 --- a/pipelex/pipe_operators/img_gen/pipe_img_gen.py +++ b/pipelex/pipe_operators/img_gen/pipe_img_gen.py @@ -97,14 +97,14 @@ def validate_inputs(self) -> Self: return self @override - def validate_with_libraries(self, pipeline_run_id: str | None = None): + def validate_with_libraries(self): self._validate_inputs() if self.img_gen: check_img_gen_choice_with_deck(img_gen_choice=self.img_gen) @override - def validate_output(self, pipeline_run_id: str | None = None): - if not get_concept_library(pipeline_run_id=pipeline_run_id).is_compatible( + def validate_output(self): + if not get_concept_library().is_compatible( tested_concept=self.output, wanted_concept=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True, diff --git a/pipelex/pipelex.py b/pipelex/pipelex.py index 17d80bbcf..ba7e437c3 100644 --- a/pipelex/pipelex.py +++ b/pipelex/pipelex.py @@ -29,7 +29,7 @@ from pipelex.core.validation import report_validation_error from pipelex.exceptions import PipelexConfigError, PipelexSetupError from pipelex.hub import PipelexHub, set_pipelex_hub -from pipelex.libraries.library_manager_factory import LibraryManagerFactory +from pipelex.libraries.library_manager import LibraryManager from pipelex.observer.local_observer import LocalObserver from pipelex.observer.observer_protocol import ObserverProtocol from pipelex.pipe_run.pipe_router import PipeRouter @@ -115,7 +115,7 @@ def __init__( self.pipelex_hub.set_report_delegate(self.reporting_delegate) # pipelex libraries - self.library_manager = LibraryManagerFactory.make_empty() + self.library_manager = LibraryManager() self.pipelex_hub.set_library_manager(library_manager=self.library_manager) # pipelex pipeline diff --git a/tests/integration/pipelex/test_libraries.py b/tests/integration/pipelex/test_libraries.py index 560a49a36..a990da56f 100644 --- a/tests/integration/pipelex/test_libraries.py +++ b/tests/integration/pipelex/test_libraries.py @@ -1,15 +1,10 @@ -from pathlib import Path - -import pytest +# from pipelex.libraries.library_manager_factory import LibraryManagerFactory from rich import box from rich.console import Console from rich.table import Table -from pipelex import pretty_print from pipelex.core.concepts.concept_library import ConceptLibrary from pipelex.core.pipes.pipe_library import PipeLibrary -from pipelex.libraries.library_manager_factory import LibraryManagerFactory -from tests.integration.pipelex.test_data import LibraryTestCases def pretty_print_all_pipes( @@ -78,26 +73,26 @@ def pretty_print_all_concepts( console.print(table) -class TestLibraries: - @pytest.mark.parametrize(("known_concept", "known_pipe"), LibraryTestCases.KNOWN_CONCEPTS_AND_PIPES) - def test_load_combo_libraries( - self, - known_concept: str, - known_pipe: str, - ): - library_manager = LibraryManagerFactory.make_empty() - test_pipelines_dir = [Path(LibraryTestCases.TEST_PIPELINES_DIR_PATH)] - library_manager.load_libraries(library_dirs=test_pipelines_dir) - # Verify that libraries were loaded - concept_library = library_manager.get_concept_library() - pipe_library = library_manager.get_pipe_library() - assert len(concept_library.root) > 0, "No concepts were loaded" - assert len(pipe_library.root) > 0, "No pipes were loaded" +# class TestLibraries: +# @pytest.mark.parametrize(("known_concept", "known_pipe"), LibraryTestCases.KNOWN_CONCEPTS_AND_PIPES) +# def test_load_combo_libraries( +# self, +# known_concept: str, +# known_pipe: str, +# ): +# library_manager = LibraryManager() +# test_pipelines_dir = [Path(LibraryTestCases.TEST_PIPELINES_DIR_PATH)] +# library_manager.load_libraries(library_dirs=test_pipelines_dir) +# # Verify that libraries were loaded +# concept_library = library_manager.get_library(library_id=SpecialLibraryId.UNTITLED).concept_library +# pipe_library = library_manager.get_pipe_library() +# assert len(concept_library.root) > 0, "No concepts were loaded" +# assert len(pipe_library.root) > 0, "No pipes were loaded" - # Test individual concepts and pipes - assert concept_library.get_required_concept(concept_string=known_concept) is not None - pretty_print( - f"Concept: {known_concept} is correctly loaded as {concept_library.get_required_concept(concept_string=known_concept)}", - ) - assert pipe_library.get_optional_pipe(known_pipe) is not None - pretty_print(f"Pipe: {known_pipe} is correctly loaded as {pipe_library.get_optional_pipe(known_pipe)}") +# # Test individual concepts and pipes +# assert concept_library.get_required_concept(concept_string=known_concept) is not None +# pretty_print( +# f"Concept: {known_concept} is correctly loaded as {concept_library.get_required_concept(concept_string=known_concept)}", +# ) +# assert pipe_library.get_optional_pipe(known_pipe) is not None +# pretty_print(f"Pipe: {known_pipe} is correctly loaded as {pipe_library.get_optional_pipe(known_pipe)}") From 4cc39aef123d8b79e5ba9b2ab566a89efcaa1608 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Fri, 17 Oct 2025 12:05:50 +0200 Subject: [PATCH 112/115] ok --- pipelex/core/concepts/concept_library.py | 16 +++++----------- .../core/concepts/concept_library_abstract.py | 5 ----- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/pipelex/core/concepts/concept_library.py b/pipelex/core/concepts/concept_library.py index 1daa8fe61..a9c386c14 100644 --- a/pipelex/core/concepts/concept_library.py +++ b/pipelex/core/concepts/concept_library.py @@ -1,6 +1,5 @@ -from typing import Any - -from pydantic import Field, RootModel +from pydantic import Field, RootModel, model_validator +from pipelex.types import Self from typing_extensions import override from pipelex.core.concepts.concept import Concept @@ -12,7 +11,6 @@ from pipelex.core.stuffs.image_content import ImageContent from pipelex.exceptions import ConceptLibraryConceptNotFoundError, ConceptLibraryError from pipelex.hub import get_class_registry -from pipelex.types import Self ConceptLibraryRoot = dict[str, Concept] @@ -20,8 +18,8 @@ class ConceptLibrary(RootModel[ConceptLibraryRoot], ConceptLibraryAbstract): root: ConceptLibraryRoot = Field(default_factory=dict) - def validate_with_libraries(self): - """Validates that the each refine concept code in the refines array of each concept in the library exists in the library""" + @model_validator(mode="before") + def validation_static(self): for concept in self.root.values(): if concept.refines and concept.refines not in self.root: msg = f"Concept '{concept.code}' refines '{concept.refines}' but no concept with the code '{concept.refines}' exists" @@ -106,17 +104,13 @@ def get_native_concepts(self) -> list[Concept]: """Create all native concepts from the hardcoded data""" return [self.get_native_concept(native_concept=native_concept) for native_concept in NativeConceptCode.values_list()] - @override - def get_class(self, concept_code: str) -> type[Any] | None: - return get_class_registry().get_class(concept_code) - @override def is_image_concept(self, concept: Concept) -> bool: """Check if the concept is an image concept. It is an image concept if its structure class is a subclass of ImageContent or if it refines the native Image concept. """ - pydantic_model = self.get_class(concept_code=concept.structure_class_name) + pydantic_model = get_class_registry().get_class(concept.structure_class_name) is_image_class = bool(pydantic_model and issubclass(pydantic_model, ImageContent)) refines_image = self.is_compatible( tested_concept=concept, diff --git a/pipelex/core/concepts/concept_library_abstract.py b/pipelex/core/concepts/concept_library_abstract.py index 386e895a3..f08e7d34f 100644 --- a/pipelex/core/concepts/concept_library_abstract.py +++ b/pipelex/core/concepts/concept_library_abstract.py @@ -1,5 +1,4 @@ from abc import ABC, abstractmethod -from typing import Any from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_native import NativeConceptCode @@ -54,10 +53,6 @@ def is_image_concept(self, concept: Concept) -> bool: def search_for_concept_in_domains(self, concept_code: str, search_domains: list[str]) -> Concept | None: pass - @abstractmethod - def get_class(self, concept_code: str) -> type[Any] | None: - pass - @abstractmethod def get_native_concept(self, native_concept: NativeConceptCode) -> Concept: pass From 5b9450a33d63d7034b1cdddab975163ca953ce66 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Fri, 17 Oct 2025 14:16:17 +0200 Subject: [PATCH 113/115] fix --- pipelex/core/concepts/concept_library.py | 19 +-------- .../core/concepts/concept_library_abstract.py | 4 -- pipelex/core/pipes/pipe_library.py | 22 +--------- pipelex/core/pipes/pipe_library_abstract.py | 4 -- pipelex/libraries/library.py | 31 ++++++++++++-- .../llm/llm_prompt_blueprint.py | 3 -- pipelex/pipe_operators/llm/pipe_llm.py | 8 ++-- .../concept_library/test_concept_library.py | 41 +++++++++++++++---- 8 files changed, 67 insertions(+), 65 deletions(-) diff --git a/pipelex/core/concepts/concept_library.py b/pipelex/core/concepts/concept_library.py index a9c386c14..b783e0562 100644 --- a/pipelex/core/concepts/concept_library.py +++ b/pipelex/core/concepts/concept_library.py @@ -1,5 +1,4 @@ from pydantic import Field, RootModel, model_validator -from pipelex.types import Self from typing_extensions import override from pipelex.core.concepts.concept import Concept @@ -8,9 +7,8 @@ from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract from pipelex.core.concepts.concept_native import NativeConceptCode from pipelex.core.domains.domain import SpecialDomain -from pipelex.core.stuffs.image_content import ImageContent from pipelex.exceptions import ConceptLibraryConceptNotFoundError, ConceptLibraryError -from pipelex.hub import get_class_registry +from pipelex.types import Self ConceptLibraryRoot = dict[str, Concept] @@ -104,21 +102,6 @@ def get_native_concepts(self) -> list[Concept]: """Create all native concepts from the hardcoded data""" return [self.get_native_concept(native_concept=native_concept) for native_concept in NativeConceptCode.values_list()] - @override - def is_image_concept(self, concept: Concept) -> bool: - """Check if the concept is an image concept. - It is an image concept if its structure class is a subclass of ImageContent - or if it refines the native Image concept. - """ - pydantic_model = get_class_registry().get_class(concept.structure_class_name) - is_image_class = bool(pydantic_model and issubclass(pydantic_model, ImageContent)) - refines_image = self.is_compatible( - tested_concept=concept, - wanted_concept=self.get_native_concept(native_concept=NativeConceptCode.IMAGE), - strict=True, - ) - return is_image_class or refines_image - @override def search_for_concept_in_domains(self, concept_code: str, search_domains: list[str]) -> Concept | None: ConceptBlueprint.validate_concept_code(concept_code=concept_code) diff --git a/pipelex/core/concepts/concept_library_abstract.py b/pipelex/core/concepts/concept_library_abstract.py index f08e7d34f..c32ae4c12 100644 --- a/pipelex/core/concepts/concept_library_abstract.py +++ b/pipelex/core/concepts/concept_library_abstract.py @@ -45,10 +45,6 @@ def reset(self) -> None: def teardown(self) -> None: pass - @abstractmethod - def is_image_concept(self, concept: Concept) -> bool: - pass - @abstractmethod def search_for_concept_in_domains(self, concept_code: str, search_domains: list[str]) -> Concept | None: pass diff --git a/pipelex/core/pipes/pipe_library.py b/pipelex/core/pipes/pipe_library.py index 62839d2c9..72e2f072b 100644 --- a/pipelex/core/pipes/pipe_library.py +++ b/pipelex/core/pipes/pipe_library.py @@ -8,33 +8,13 @@ from pipelex import pretty_print from pipelex.core.pipes.pipe_abstract import PipeAbstract from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract -from pipelex.exceptions import ConceptError, ConceptLibraryConceptNotFoundError, PipeLibraryError, PipeLibraryPipeNotFoundError -from pipelex.hub import get_concept_library +from pipelex.exceptions import PipeLibraryError, PipeLibraryPipeNotFoundError from pipelex.types import Self PipeLibraryRoot = dict[str, PipeAbstract] class PipeLibrary(RootModel[PipeLibraryRoot], PipeLibraryAbstract): - @override - def validate_with_libraries(self, pipeline_run_id: str | None = None): - concept_library = get_concept_library() - for pipe in self.root.values(): - pipe.validate_output() - try: - for concept in pipe.concept_dependencies(): - try: - concept_library.get_required_concept(concept_string=concept.concept_string) - except ConceptError as concept_error: - msg = f"Error validating pipe '{pipe.code}' dependency concept '{concept.concept_string}' because of: {concept_error}" - raise PipeLibraryError(msg) from concept_error - for pipe_code in pipe.pipe_dependencies(): - self.get_required_pipe(pipe_code=pipe_code) - pipe.validate_with_libraries() - except (ConceptLibraryConceptNotFoundError, PipeLibraryPipeNotFoundError) as not_found_error: - msg = f"Missing dependency for pipe '{pipe.code}': {not_found_error}" - raise PipeLibraryError(msg) from not_found_error - @override def teardown(self): self.root = {} diff --git a/pipelex/core/pipes/pipe_library_abstract.py b/pipelex/core/pipes/pipe_library_abstract.py index 4ea9aa2e8..12e035400 100644 --- a/pipelex/core/pipes/pipe_library_abstract.py +++ b/pipelex/core/pipes/pipe_library_abstract.py @@ -17,10 +17,6 @@ def reset(self) -> None: self.teardown() self.setup() - @abstractmethod - def validate_with_libraries(self, pipeline_run_id: str | None = None) -> None: - pass - @abstractmethod def get_required_pipe(self, pipe_code: str) -> PipeAbstract: pass diff --git a/pipelex/libraries/library.py b/pipelex/libraries/library.py index 92f93a0ef..2760c1c65 100644 --- a/pipelex/libraries/library.py +++ b/pipelex/libraries/library.py @@ -3,6 +3,12 @@ from pipelex.core.concepts.concept_library import ConceptLibrary from pipelex.core.domains.domain_library import DomainLibrary from pipelex.core.pipes.pipe_library import PipeLibrary +from pipelex.exceptions import ( + ConceptError, + ConceptLibraryConceptNotFoundError, + PipeLibraryError, + PipeLibraryPipeNotFoundError, +) class Library(BaseModel): @@ -33,6 +39,25 @@ def teardown(self) -> None: self.domain_library.teardown() def validate_library(self) -> None: - self.concept_library.validate_with_libraries() - self.pipe_library.validate_with_libraries() - self.domain_library.validate_with_libraries() + self.validate_pipe_library_with_libraries() + + def validate_pipe_library_with_libraries(self) -> None: + for pipe in self.pipe_library.root.values(): + try: + # Validate concept dependencies exit + for concept in pipe.concept_dependencies(): + try: + self.concept_library.get_required_concept(concept_string=concept.concept_string) + except ConceptError as concept_error: + msg = f"Error validating pipe '{pipe.code}' dependency concept '{concept.concept_string}' because of: {concept_error}" + raise PipeLibraryError(msg) from concept_error + + # Validate pipe dependencies exit + for pipe_code in pipe.pipe_dependencies(): + self.pipe_library.get_required_pipe(pipe_code=pipe_code) + + except (ConceptLibraryConceptNotFoundError, PipeLibraryPipeNotFoundError) as not_found_error: + msg = f"Missing dependency for pipe '{pipe.code}': {not_found_error}" + raise PipeLibraryError(msg) from not_found_error + for pipe in self.pipe_library.root.values(): + pipe.validate_with_libraries() diff --git a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py index 6d5a031ed..512427f1f 100644 --- a/pipelex/pipe_operators/llm/llm_prompt_blueprint.py +++ b/pipelex/pipe_operators/llm/llm_prompt_blueprint.py @@ -25,9 +25,6 @@ class LLMPromptBlueprint(BaseModel): prompt_blueprint: TemplateBlueprint | None = None user_images: list[str] | None = None - def validate_with_libraries(self): - pass - def required_variables(self) -> set[str]: required_variables: set[str] = set() if self.user_images: diff --git a/pipelex/pipe_operators/llm/pipe_llm.py b/pipelex/pipe_operators/llm/pipe_llm.py index 971f505f9..5d01e2b36 100644 --- a/pipelex/pipe_operators/llm/pipe_llm.py +++ b/pipelex/pipe_operators/llm/pipe_llm.py @@ -13,6 +13,7 @@ from pipelex.cogt.models.model_deck_check import check_llm_choice_with_deck from pipelex.cogt.templating.template_category import TemplateCategory from pipelex.config import StaticValidationReaction, get_config +from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode from pipelex.core.domains.domain import SpecialDomain @@ -78,9 +79,8 @@ def validate_output_concept_consistency(self) -> Self: return self @override - def validate_with_libraries(self, pipeline_run_id: str | None = None): + def validate_with_libraries(self): self.validate_inputs() - self.llm_prompt_spec.validate_with_libraries() if self.llm_choices: for llm_choice in self.llm_choices.list_choices(): check_llm_choice_with_deck(llm_choice=llm_choice) @@ -139,7 +139,9 @@ def validate_inputs(self): for input_name, requirement in self.needed_inputs().items: if input_name not in required_variables: explanation: str | None = None - if get_concept_library().is_image_concept(concept=requirement.concept): + if Concept.are_concept_compatible( + concept_1=requirement.concept, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True + ): # We have an exraneous image input, the user probably forgot to add it into the prompt template explanation = ( f"You have provided an image input named '{input_name}', but it is not referenced in the prompt template. " diff --git a/tests/unit/pipelex/core/concepts/concept_library/test_concept_library.py b/tests/unit/pipelex/core/concepts/concept_library/test_concept_library.py index 011b12c87..fc0841f75 100644 --- a/tests/unit/pipelex/core/concepts/concept_library/test_concept_library.py +++ b/tests/unit/pipelex/core/concepts/concept_library/test_concept_library.py @@ -1,12 +1,12 @@ +from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode -from pipelex.hub import get_concept_library +from pipelex.hub import get_native_concept class TestConceptLibrary: def test_is_image_concept(self): - concept_library = get_concept_library() native_image_concept = ConceptFactory.make_native_concept(native_concept_code=NativeConceptCode.IMAGE) concept_1 = ConceptFactory.make_from_blueprint( @@ -62,10 +62,33 @@ def test_is_image_concept(self): ), ) - assert concept_library.is_image_concept(concept=native_image_concept) is True - assert concept_library.is_image_concept(concept=concept_1) is True - assert concept_library.is_image_concept(concept=concept_2) is True - assert concept_library.is_image_concept(concept=concept_3) is True - assert concept_library.is_image_concept(concept=concept_4) is True - assert concept_library.is_image_concept(concept=concept_5) is False - assert concept_library.is_image_concept(concept=concept_6) is False + assert ( + Concept.are_concept_compatible( + concept_1=native_image_concept, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True + ) + is True + ) + assert ( + Concept.are_concept_compatible(concept_1=concept_1, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True) + is True + ) + assert ( + Concept.are_concept_compatible(concept_1=concept_2, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True) + is True + ) + assert ( + Concept.are_concept_compatible(concept_1=concept_3, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True) + is True + ) + assert ( + Concept.are_concept_compatible(concept_1=concept_4, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True) + is True + ) + assert ( + Concept.are_concept_compatible(concept_1=concept_5, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True) + is False + ) + assert ( + Concept.are_concept_compatible(concept_1=concept_6, concept_2=get_native_concept(native_concept=NativeConceptCode.IMAGE), strict=True) + is False + ) From 6f40bbfdbdbc509ac83442fbc08004f6155be6e2 Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Tue, 21 Oct 2025 09:18:16 -0700 Subject: [PATCH 114/115] ok --- pipelex/cli/commands/build_cmd.py | 1 - pipelex/core/concepts/concept_library.py | 2 +- pipelex/libraries/library_manager.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pipelex/cli/commands/build_cmd.py b/pipelex/cli/commands/build_cmd.py index f9e7b5c79..b8f7213a2 100644 --- a/pipelex/cli/commands/build_cmd.py +++ b/pipelex/cli/commands/build_cmd.py @@ -324,4 +324,3 @@ async def run_pipeline(): typer.secho(f"\n✅ Pipeline built in {end_time - start_time:.2f} seconds", fg=typer.colors.GREEN) get_report_delegate().generate_report() - diff --git a/pipelex/core/concepts/concept_library.py b/pipelex/core/concepts/concept_library.py index 51388ef70..b69e3aae6 100644 --- a/pipelex/core/concepts/concept_library.py +++ b/pipelex/core/concepts/concept_library.py @@ -144,4 +144,4 @@ def get_required_concept_from_concept_string_or_code(self, concept_string_or_cod if len(found_concepts) > 1: msg = f"Multiple concepts found for '{concept_string_or_code}': {found_concepts}. Please specify the domain." raise ConceptLibraryConceptNotFoundError(msg) - return found_concepts[0] \ No newline at end of file + return found_concepts[0] diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index 9f2ecd0c5..c90e22a2b 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -383,6 +383,6 @@ def remove_from_blueprint(self, library_id: str, blueprint: PipelexBundleBluepri ConceptFactory.make_concept_string_with_domain(domain=blueprint.domain, concept_code=concept_code) for concept_code in blueprint.concept ] - library.concept_library.remove_concepts_by_codes(concept_codes=concept_codes_to_remove) + library.concept_library.remove_concepts_by_concept_strings(concept_strings=concept_codes_to_remove) library.domain_library.remove_domain_by_code(domain_code=blueprint.domain) From 4bcb9dceb55d0e5c3cb3967e356629732f9881ac Mon Sep 17 00:00:00 2001 From: thomashebrard Date: Tue, 21 Oct 2025 10:58:26 -0700 Subject: [PATCH 115/115] make cc --- pipelex/builder/builder_validation.py | 5 +++-- pipelex/core/stuffs/stuff_factory.py | 2 +- pipelex/hub.py | 6 +++--- pipelex/libraries/concept/__init__.py | 0 .../concepts => libraries/concept}/concept_library.py | 2 +- .../concept}/concept_library_abstract.py | 0 pipelex/libraries/domain/__init__.py | 0 .../domains => libraries/domain}/domain_library.py | 2 +- .../domain}/domain_library_abstract.py | 0 pipelex/libraries/library.py | 6 +++--- pipelex/libraries/library_factory.py | 6 +++--- pipelex/libraries/library_manager.py | 8 ++++++-- pipelex/libraries/library_manager_abstract.py | 11 +++++++++++ pipelex/libraries/pipe/__init__.py | 0 .../{core/pipes => libraries/pipe}/pipe_library.py | 2 +- .../pipes => libraries/pipe}/pipe_library_abstract.py | 0 .../controller/pipe_batch/test_pipe_batch_simple.py | 3 ++- tests/integration/pipelex/test_libraries.py | 4 ++-- 18 files changed, 37 insertions(+), 20 deletions(-) create mode 100644 pipelex/libraries/concept/__init__.py rename pipelex/{core/concepts => libraries/concept}/concept_library.py (98%) rename pipelex/{core/concepts => libraries/concept}/concept_library_abstract.py (100%) create mode 100644 pipelex/libraries/domain/__init__.py rename pipelex/{core/domains => libraries/domain}/domain_library.py (95%) rename pipelex/{core/domains => libraries/domain}/domain_library_abstract.py (100%) create mode 100644 pipelex/libraries/pipe/__init__.py rename pipelex/{core/pipes => libraries/pipe}/pipe_library.py (98%) rename pipelex/{core/pipes => libraries/pipe}/pipe_library_abstract.py (100%) diff --git a/pipelex/builder/builder_validation.py b/pipelex/builder/builder_validation.py index 50f712491..96f5e02b2 100644 --- a/pipelex/builder/builder_validation.py +++ b/pipelex/builder/builder_validation.py @@ -21,6 +21,7 @@ StaticValidationError, ) from pipelex.hub import get_library_manager +from pipelex.libraries.library_ids import SpecialLibraryId from pipelex.pipe_run.dry_run import DryRunOutput, dry_run_pipes @@ -41,7 +42,7 @@ async def validate_bundle_spec(bundle_spec: PipelexBundleSpec): library_manager = get_library_manager() dry_run_result = await dry_run_bundle_blueprint(bundle_blueprint=bundle_blueprint) - library_manager.remove_from_blueprint(blueprint=bundle_blueprint) + library_manager.remove_from_blueprints(library_id=SpecialLibraryId.UNTITLED, blueprints=[bundle_blueprint]) dry_run_pipe_failures = extract_pipe_failures_from_dry_run_result(bundle_spec=bundle_spec, dry_run_result=dry_run_result) if dry_run_pipe_failures: @@ -103,7 +104,7 @@ def document_pipe_failures_from_dry_run_blueprint( async def dry_run_bundle_blueprint(bundle_blueprint: PipelexBundleBlueprint) -> dict[str, DryRunOutput]: library_manager = get_library_manager() try: - pipes = library_manager.load_from_blueprint(blueprint=bundle_blueprint) + pipes = library_manager.load_from_blueprints(library_id=SpecialLibraryId.UNTITLED, blueprints=[bundle_blueprint]) dry_run_result = await dry_run_pipes(pipes=pipes, raise_on_failure=True) except StaticValidationError as static_validation_error: static_validation_error_data = StaticValidationErrorData( diff --git a/pipelex/core/stuffs/stuff_factory.py b/pipelex/core/stuffs/stuff_factory.py index 85a9d2c90..aa773a763 100644 --- a/pipelex/core/stuffs/stuff_factory.py +++ b/pipelex/core/stuffs/stuff_factory.py @@ -7,7 +7,6 @@ from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.concepts.concept_factory import ConceptFactory -from pipelex.core.concepts.concept_library import ConceptLibraryConceptNotFoundError from pipelex.core.concepts.concept_native import NativeConceptCode from pipelex.core.stuffs.list_content import ListContent from pipelex.core.stuffs.structured_content import StructuredContent @@ -16,6 +15,7 @@ from pipelex.core.stuffs.text_content import TextContent from pipelex.exceptions import PipelexException from pipelex.hub import get_class_registry, get_concept_library, get_native_concept, get_required_concept +from pipelex.libraries.concept.concept_library import ConceptLibraryConceptNotFoundError from pipelex.tools.typing.pydantic_utils import format_pydantic_validation_error diff --git a/pipelex/hub.py b/pipelex/hub.py index 9d02fdf98..bd3c98ad4 100644 --- a/pipelex/hub.py +++ b/pipelex/hub.py @@ -13,14 +13,14 @@ from pipelex.cogt.models.model_deck import ModelDeck from pipelex.cogt.models.model_manager_abstract import ModelManagerAbstract from pipelex.core.concepts.concept import Concept -from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract from pipelex.core.concepts.concept_native import NativeConceptCode from pipelex.core.domains.domain import Domain -from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract from pipelex.core.pipes.pipe_abstract import PipeAbstract -from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract +from pipelex.libraries.concept.concept_library_abstract import ConceptLibraryAbstract +from pipelex.libraries.domain.domain_library_abstract import DomainLibraryAbstract from pipelex.libraries.library_ids import SpecialLibraryId from pipelex.libraries.library_manager_abstract import LibraryManagerAbstract +from pipelex.libraries.pipe.pipe_library_abstract import PipeLibraryAbstract from pipelex.observer.observer_protocol import ObserverProtocol from pipelex.pipe_run.pipe_router_protocol import PipeRouterProtocol from pipelex.pipeline.activity.activity_manager_protocol import ActivityManagerProtocol diff --git a/pipelex/libraries/concept/__init__.py b/pipelex/libraries/concept/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pipelex/core/concepts/concept_library.py b/pipelex/libraries/concept/concept_library.py similarity index 98% rename from pipelex/core/concepts/concept_library.py rename to pipelex/libraries/concept/concept_library.py index b69e3aae6..416c0bd58 100644 --- a/pipelex/core/concepts/concept_library.py +++ b/pipelex/libraries/concept/concept_library.py @@ -4,10 +4,10 @@ from pipelex.core.concepts.concept import Concept from pipelex.core.concepts.concept_blueprint import ConceptBlueprint from pipelex.core.concepts.concept_factory import ConceptFactory -from pipelex.core.concepts.concept_library_abstract import ConceptLibraryAbstract from pipelex.core.concepts.concept_native import NativeConceptCode from pipelex.core.domains.domain import SpecialDomain from pipelex.exceptions import ConceptLibraryConceptNotFoundError, ConceptLibraryError +from pipelex.libraries.concept.concept_library_abstract import ConceptLibraryAbstract from pipelex.types import Self ConceptLibraryRoot = dict[str, Concept] diff --git a/pipelex/core/concepts/concept_library_abstract.py b/pipelex/libraries/concept/concept_library_abstract.py similarity index 100% rename from pipelex/core/concepts/concept_library_abstract.py rename to pipelex/libraries/concept/concept_library_abstract.py diff --git a/pipelex/libraries/domain/__init__.py b/pipelex/libraries/domain/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pipelex/core/domains/domain_library.py b/pipelex/libraries/domain/domain_library.py similarity index 95% rename from pipelex/core/domains/domain_library.py rename to pipelex/libraries/domain/domain_library.py index 828e793f2..5a112214c 100644 --- a/pipelex/core/domains/domain_library.py +++ b/pipelex/libraries/domain/domain_library.py @@ -2,8 +2,8 @@ from typing_extensions import override from pipelex.core.domains.domain import Domain -from pipelex.core.domains.domain_library_abstract import DomainLibraryAbstract from pipelex.exceptions import DomainLibraryError +from pipelex.libraries.domain.domain_library_abstract import DomainLibraryAbstract from pipelex.types import Self DomainLibraryRoot = dict[str, Domain] diff --git a/pipelex/core/domains/domain_library_abstract.py b/pipelex/libraries/domain/domain_library_abstract.py similarity index 100% rename from pipelex/core/domains/domain_library_abstract.py rename to pipelex/libraries/domain/domain_library_abstract.py diff --git a/pipelex/libraries/library.py b/pipelex/libraries/library.py index 2760c1c65..c6ef37788 100644 --- a/pipelex/libraries/library.py +++ b/pipelex/libraries/library.py @@ -1,14 +1,14 @@ from pydantic import BaseModel -from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.domains.domain_library import DomainLibrary -from pipelex.core.pipes.pipe_library import PipeLibrary from pipelex.exceptions import ( ConceptError, ConceptLibraryConceptNotFoundError, PipeLibraryError, PipeLibraryPipeNotFoundError, ) +from pipelex.libraries.concept.concept_library import ConceptLibrary +from pipelex.libraries.domain.domain_library import DomainLibrary +from pipelex.libraries.pipe.pipe_library import PipeLibrary class Library(BaseModel): diff --git a/pipelex/libraries/library_factory.py b/pipelex/libraries/library_factory.py index 985ff327e..e513bc063 100644 --- a/pipelex/libraries/library_factory.py +++ b/pipelex/libraries/library_factory.py @@ -1,9 +1,9 @@ from pydantic import BaseModel -from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.domains.domain_library import DomainLibrary -from pipelex.core.pipes.pipe_library import PipeLibrary +from pipelex.libraries.concept.concept_library import ConceptLibrary +from pipelex.libraries.domain.domain_library import DomainLibrary from pipelex.libraries.library import Library +from pipelex.libraries.pipe.pipe_library import PipeLibrary class LibraryFactory(BaseModel): diff --git a/pipelex/libraries/library_manager.py b/pipelex/libraries/library_manager.py index c90e22a2b..70e689b3d 100644 --- a/pipelex/libraries/library_manager.py +++ b/pipelex/libraries/library_manager.py @@ -239,6 +239,7 @@ def load_libraries( # Private helper methods ############################################################ + @override def load_from_blueprints(self, library_id: str, blueprints: list[PipelexBundleBlueprint]) -> list[PipeAbstract]: """Load domains, concepts, and pipes from a list of blueprints. @@ -372,7 +373,7 @@ def _load_pipes_from_blueprint(self, blueprint: PipelexBundleBlueprint) -> list[ pipes.append(pipe) return pipes - def remove_from_blueprint(self, library_id: str, blueprint: PipelexBundleBlueprint) -> None: + def _remove_pipes_from_blueprint(self, library_id: str, blueprint: PipelexBundleBlueprint) -> None: library = self.get_library(library_id=library_id) if blueprint.pipe is not None: library.pipe_library.remove_pipes_by_codes(pipe_codes=list(blueprint.pipe.keys())) @@ -385,4 +386,7 @@ def remove_from_blueprint(self, library_id: str, blueprint: PipelexBundleBluepri ] library.concept_library.remove_concepts_by_concept_strings(concept_strings=concept_codes_to_remove) - library.domain_library.remove_domain_by_code(domain_code=blueprint.domain) + @override + def remove_from_blueprints(self, library_id: str, blueprints: list[PipelexBundleBlueprint]) -> None: + for blueprint in blueprints: + self._remove_pipes_from_blueprint(library_id=library_id, blueprint=blueprint) diff --git a/pipelex/libraries/library_manager_abstract.py b/pipelex/libraries/library_manager_abstract.py index ca965669e..190660f55 100644 --- a/pipelex/libraries/library_manager_abstract.py +++ b/pipelex/libraries/library_manager_abstract.py @@ -2,6 +2,9 @@ from pathlib import Path from typing import TYPE_CHECKING +from pipelex.core.bundles.pipelex_bundle_blueprint import PipelexBundleBlueprint +from pipelex.core.pipes.pipe_abstract import PipeAbstract + if TYPE_CHECKING: from pipelex.libraries.library import Library @@ -43,3 +46,11 @@ def load_libraries( library_file_paths: list[Path] | None = None, ) -> None: pass + + @abstractmethod + def load_from_blueprints(self, library_id: str, blueprints: list[PipelexBundleBlueprint]) -> list[PipeAbstract]: + pass + + @abstractmethod + def remove_from_blueprints(self, library_id: str, blueprints: list[PipelexBundleBlueprint]) -> None: + pass diff --git a/pipelex/libraries/pipe/__init__.py b/pipelex/libraries/pipe/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pipelex/core/pipes/pipe_library.py b/pipelex/libraries/pipe/pipe_library.py similarity index 98% rename from pipelex/core/pipes/pipe_library.py rename to pipelex/libraries/pipe/pipe_library.py index 049150df2..ca6e78319 100644 --- a/pipelex/core/pipes/pipe_library.py +++ b/pipelex/libraries/pipe/pipe_library.py @@ -7,8 +7,8 @@ from pipelex import pretty_print from pipelex.core.pipes.pipe_abstract import PipeAbstract -from pipelex.core.pipes.pipe_library_abstract import PipeLibraryAbstract from pipelex.exceptions import PipeLibraryError, PipeLibraryPipeNotFoundError +from pipelex.libraries.pipe.pipe_library_abstract import PipeLibraryAbstract from pipelex.types import Self PipeLibraryRoot = dict[str, PipeAbstract] diff --git a/pipelex/core/pipes/pipe_library_abstract.py b/pipelex/libraries/pipe/pipe_library_abstract.py similarity index 100% rename from pipelex/core/pipes/pipe_library_abstract.py rename to pipelex/libraries/pipe/pipe_library_abstract.py diff --git a/tests/integration/pipelex/pipes/controller/pipe_batch/test_pipe_batch_simple.py b/tests/integration/pipelex/pipes/controller/pipe_batch/test_pipe_batch_simple.py index 137d430ec..054443cd0 100644 --- a/tests/integration/pipelex/pipes/controller/pipe_batch/test_pipe_batch_simple.py +++ b/tests/integration/pipelex/pipes/controller/pipe_batch/test_pipe_batch_simple.py @@ -6,7 +6,8 @@ from pytest import FixtureRequest from pipelex import pretty_print -from pipelex.core.concepts.concept_factory import ConceptBlueprint, ConceptFactory +from pipelex.core.concepts.concept_blueprint import ConceptBlueprint +from pipelex.core.concepts.concept_factory import ConceptFactory from pipelex.core.concepts.concept_native import NativeConceptCode from pipelex.core.memory.working_memory_factory import WorkingMemoryFactory from pipelex.core.pipes.input_requirement_blueprint import InputRequirementBlueprint diff --git a/tests/integration/pipelex/test_libraries.py b/tests/integration/pipelex/test_libraries.py index a990da56f..579203da7 100644 --- a/tests/integration/pipelex/test_libraries.py +++ b/tests/integration/pipelex/test_libraries.py @@ -3,8 +3,8 @@ from rich.console import Console from rich.table import Table -from pipelex.core.concepts.concept_library import ConceptLibrary -from pipelex.core.pipes.pipe_library import PipeLibrary +from pipelex.libraries.concept.concept_library import ConceptLibrary +from pipelex.libraries.pipe.pipe_library import PipeLibrary def pretty_print_all_pipes(