[Executor] Gen flow meta for eager flow (#2027)

# Description This pull request primarily adds function that generating flow meta for eager flow. the flow meta is similar to tool meta, including entry, function name, inputs, outputs and source. The function will be called by SDK and save the metadata to `flow.json`. Expected flow meta: ``` { "function": "my_flow", "entry": "flow_with_trace:my_flow", "inputs": { "text": { "type": "string" }, "models": { "type": "list" } }, "outputs": { "output": { "type": "string" } }, "source": "flow_with_trace.py" } ``` The most important changes are: New Functionality: * [`src/promptflow/promptflow/_core/tool_meta_generator.py`](diffhunk://#diff-5cd3c0319063421c9ff9210ca997e3f001e5d1e2c9f97b912f1a3f233f9ee5a6R103-R111): Added a new function `collect_flow_entry_in_module(m, entry)` to collect flow entries from a module. This function inspects the module members and returns the function corresponding to the provided entry. * [`src/promptflow/promptflow/_core/tool_meta_generator.py`](diffhunk://#diff-5cd3c0319063421c9ff9210ca997e3f001e5d1e2c9f97b912f1a3f233f9ee5a6R322-R340): Introduced another function `generate_flow_meta_dict_by_file(path: str, entry: str, source: str = None)`. This function generates a dictionary containing metadata about a eager flow, including its entry, function name, inputs, outputs, and source if provided. Unit Tests: * [`src/promptflow/tests/executor/unittests/_utils/test_generate_tool_meta_utils.py`](diffhunk://#diff-589487a3ccd77dbcc1241d9ee5d0678946185e6f4fea345c88d90e26dbc53631R38-R46): Added a new function `cd_and_run_generate_flow_meta(working_dir, source_path, entry, source=None)` to test the generation of flow metadata. * [`src/promptflow/tests/executor/unittests/_utils/test_generate_tool_meta_utils.py`](diffhunk://#diff-589487a3ccd77dbcc1241d9ee5d0678946185e6f4fea345c88d90e26dbc53631R100-R113): Included a new test case `test_generate_flow_meta(self, flow_dir, entry_path, entry)` to validate the functionality of the `generate_flow_meta_dict_by_file` function. Test Configurations: * [`src/promptflow/tests/test_configs/eager_flows/dummy_flow_with_trace/flow_with_trace.meta.json`](diffhunk://#diff-ba4a9d95000d35b60cd5133ecf22b71e457730888f2cbf6fd63f3cec96b691f1R1-R17): Added a new JSON file to the test configurations. This file contains metadata for the `my_flow` function from the `flow_with_trace.py` file. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --------- Co-authored-by: Lina Tang <linatang@microsoft.com>
microsoft · Feb 22, 2024 · 4b6c6b1 · 4b6c6b1
1 parent c98ea71
commit 4b6c6b1
Show file tree

Hide file tree

Showing 5 changed files with 102 additions and 7 deletions.
diff --git a/src/promptflow/promptflow/_core/tool_meta_generator.py b/src/promptflow/promptflow/_core/tool_meta_generator.py
@@ -100,6 +100,18 @@ def collect_tool_functions_in_module(m):
     return tools
 
 
+def collect_flow_entry_in_module(m, entry):
+    entry = entry.split(":")[-1]
+    func = getattr(m, entry, None)
+    if isinstance(func, types.FunctionType):
+        return func
+    raise PythonParsingError(
+        message_format="Failed to collect flow entry '{entry}' in module '{module}'.",
+        entry=entry,
+        module=m.__name__,
+    )
+
+
 def collect_tool_methods_in_module(m):
     tools = []
     for _, obj in inspect.getmembers(m):
@@ -120,7 +132,9 @@ def collect_tool_methods_with_init_inputs_in_module(m):
     return tools
 
 
-def _parse_tool_from_function(f, initialize_inputs=None, gen_custom_type_conn=False, skip_prompt_template=False):
+def _parse_tool_from_function(
+    f, initialize_inputs=None, gen_custom_type_conn=False, skip_prompt_template=False, include_outputs=False
+):
     try:
         tool_type = getattr(f, "__type", None) or ToolType.PYTHON
     except Exception as e:
@@ -132,7 +146,7 @@ def _parse_tool_from_function(f, initialize_inputs=None, gen_custom_type_conn=Fa
     if hasattr(f, "__original_function"):
         f = f.__original_function
     try:
-        inputs, _, _, enable_kwargs = function_to_interface(
+        inputs, outputs, _, enable_kwargs = function_to_interface(
             f,
             initialize_inputs=initialize_inputs,
             gen_custom_type_conn=gen_custom_type_conn,
@@ -153,6 +167,7 @@ def _parse_tool_from_function(f, initialize_inputs=None, gen_custom_type_conn=Fa
         name=tool_name or f.__qualname__,
         description=description or inspect.getdoc(f),
         inputs=inputs,
+        outputs=outputs if include_outputs else None,
         type=tool_type,
         class_name=class_name,
         function=f.__name__,
@@ -310,6 +325,29 @@ def generate_tool_meta_dict_by_file(path: str, tool_type: ToolType):
         )
 
 
+def generate_flow_meta_dict_by_file(path: str, entry: str, source: str = None):
+    m = load_python_module_from_file(Path(path))
+    f = collect_flow_entry_in_module(m, entry)
+    # Since the flow meta is generated from the entry function, we leverage the function
+    # _parse_tool_from_function to parse the interface of the entry function to get the inputs and outputs.
+    tool = _parse_tool_from_function(f, include_outputs=True)
+
+    flow_meta = {"entry": entry, "function": f.__name__}
+    if source:
+        flow_meta["source"] = source
+    if tool.inputs:
+        flow_meta["inputs"] = {}
+        for k, v in tool.inputs.items():
+            # We didn't support specifying multiple types for inputs, so we only take the first one.
+            flow_meta["inputs"][k] = {"type": v.type[0].value}
+    if tool.outputs:
+        flow_meta["outputs"] = {}
+        for k, v in tool.outputs.items():
+            # We didn't support specifying multiple types for outputs, so we only take the first one.
+            flow_meta["outputs"][k] = {"type": v.type[0].value}
+    return flow_meta
+
+
 class ToolValidationError(UserErrorException):
     """Base exception raised when failed to validate tool."""
 

diff --git a/src/promptflow/promptflow/_utils/tool_utils.py b/src/promptflow/promptflow/_utils/tool_utils.py
@@ -15,7 +15,15 @@
 from promptflow._utils.utils import is_json_serializable
 from promptflow.exceptions import ErrorTarget, UserErrorException
 
-from ..contracts.tool import ConnectionType, InputDefinition, Tool, ToolFuncCallScenario, ToolType, ValueType
+from ..contracts.tool import (
+    ConnectionType,
+    InputDefinition,
+    OutputDefinition,
+    Tool,
+    ToolFuncCallScenario,
+    ToolType,
+    ValueType
+)
 from ..contracts.types import PromptTemplate
 
 module_logger = logging.getLogger(__name__)
@@ -142,8 +150,15 @@ def function_to_interface(
         input_defs[k] = input_def
         if is_connection:
             connection_types.append(input_def.type)
-    outputs = {}
-    # Note: We don't have output definition now
+    # Resolve output to definition
+    typ = resolve_annotation(sign.return_annotation)
+    if typ is inspect.Signature.empty:
+        output_type = [ValueType.OBJECT]
+    else:
+        # If the output annotation is a union type, then it should be a list.
+        output_type = [ValueType.from_type(t) for t in typ] if isinstance(typ, list) else [ValueType.from_type(typ)]
+    outputs = {"output": OutputDefinition(type=output_type)}
+
     return input_defs, outputs, connection_types, enable_kwargs
 
 

diff --git a/src/promptflow/tests/executor/unittests/_utils/test_generate_tool_meta_utils.py b/src/promptflow/tests/executor/unittests/_utils/test_generate_tool_meta_utils.py
@@ -13,13 +13,15 @@
     NoToolDefined,
     PythonLoadError,
     PythonParsingError,
+    generate_flow_meta_dict_by_file,
     generate_prompt_meta,
     generate_python_meta,
     generate_tool_meta_dict_by_file,
 )
+from promptflow._utils.context_utils import _change_working_dir
 from promptflow._utils.exception_utils import ExceptionPresenter
 
-from ...utils import FLOW_ROOT, load_json
+from ...utils import EAGER_FLOW_ROOT, FLOW_ROOT, load_json
 
 TEST_ROOT = Path(__file__).parent.parent.parent.parent
 TOOLS_ROOT = TEST_ROOT / "test_configs/wrong_tools"
@@ -34,6 +36,14 @@ def cd_and_run(working_dir, source_path, tool_type):
         return f"({e.__class__.__name__}) {e}"
 
 
+def cd_and_run_generate_flow_meta(working_dir, source_path, entry, source=None):
+    with _change_working_dir(working_dir):
+        try:
+            return generate_flow_meta_dict_by_file(source_path, entry, source)
+        except Exception as e:
+            return f"({e.__class__.__name__}) {e}"
+
+
 def cd_and_run_with_read_text_error(working_dir, source_path, tool_type):
     def mock_read_text_error(self: Path, *args, **kwargs):
         raise Exception("Mock read text error.")
@@ -87,6 +97,20 @@ def test_generate_tool_meta_dict_by_file(self, flow_dir, tool_path, tool_type):
             expected_dict["type"] = "llm"  # We use prompt as default for jinja2
         assert meta_dict == expected_dict
 
+    @pytest.mark.parametrize(
+        "flow_dir, entry_path, entry",
+        [
+            ("dummy_flow_with_trace", "flow_with_trace.py", "flow_with_trace:my_flow"),
+        ]
+    )
+    def test_generate_flow_meta(self, flow_dir, entry_path, entry):
+        wd = str((EAGER_FLOW_ROOT / flow_dir).resolve())
+        meta_dict = cd_and_run_generate_flow_meta(wd, entry_path, entry, source=entry_path)
+        assert isinstance(meta_dict, dict), "Call cd_and_run_generate_flow_meta failed:\n" + meta_dict
+        target_file = (Path(wd) / entry_path).with_suffix(".meta.json")
+        expected_dict = load_json(target_file)
+        assert meta_dict == expected_dict
+
     @pytest.mark.parametrize(
         "flow_dir, tool_path, tool_type, func, msg_pattern",
         [

diff --git a/...promptflow/tests/test_configs/eager_flows/dummy_flow_with_trace/flow_with_trace.meta.json b/...promptflow/tests/test_configs/eager_flows/dummy_flow_with_trace/flow_with_trace.meta.json
@@ -0,0 +1,18 @@
+{
+    "function": "my_flow",
+    "entry": "flow_with_trace:my_flow",
+    "inputs": {
+        "text": {
+            "type": "string"
+        },
+        "models": {
+            "type": "list"
+        }
+    },
+    "outputs": {
+        "output": {
+            "type": "string"
+        }
+    },
+    "source": "flow_with_trace.py"
+}
diff --git a/src/promptflow/tests/test_configs/eager_flows/dummy_flow_with_trace/flow_with_trace.py b/src/promptflow/tests/test_configs/eager_flows/dummy_flow_with_trace/flow_with_trace.py
@@ -14,7 +14,7 @@ async def dummy_llm(prompt: str, model: str, wait_seconds: int):
     return prompt
 
 
-async def my_flow(text: str, models: list = []):
+async def my_flow(text: str, models: list = []) -> str:
     tasks = []
     for i, model in enumerate(models):
         tasks.append(asyncio.create_task(dummy_llm(text, model, i + 1)))