From 8a7561fda40784a597750b2b620fa1e0d4d89880 Mon Sep 17 00:00:00 2001
From: BabyCNM <86091026+BabyCNM@users.noreply.github.com>
Date: Fri, 4 Oct 2024 20:36:55 -0700
Subject: [PATCH 1/2] Multimodal: <img x.jpg> will only detect filename +
 ignore HTML syntax

---
 autogen/agentchat/contrib/img_utils.py |  2 +-
 autogen/agentchat/utils.py             | 27 ++++++++++----
 test/agentchat/test_agentchat_utils.py | 50 +++++++++++++++++++++-----
 3 files changed, 63 insertions(+), 16 deletions(-)
diff --git a/autogen/agentchat/contrib/img_utils.py b/autogen/agentchat/contrib/img_utils.py
index b71f69e2105..5f9a2e55b46 100644
--- a/autogen/agentchat/contrib/img_utils.py
+++ b/autogen/agentchat/contrib/img_utils.py
@@ -187,7 +187,7 @@ def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dic
     image_count = 0
 
     # Find all image tags
-    for parsed_tag in utils.parse_tags_from_content("img", prompt):
+    for parsed_tag in utils.parse_tags_from_content("img", prompt, strict_filepath_match=True):
         image_location = parsed_tag["attr"]["src"]
         try:
             if img_format == "pil":
diff --git a/autogen/agentchat/utils.py b/autogen/agentchat/utils.py
index 4bdb01e7736..1e58486e996 100644
--- a/autogen/agentchat/utils.py
+++ b/autogen/agentchat/utils.py
@@ -102,7 +102,9 @@ def aggregate_summary(usage_summary: Dict[str, Any], agent_summary: Dict[str, An
     }
 
 
-def parse_tags_from_content(tag: str, content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Dict[str, str]]]:
+def parse_tags_from_content(
+    tag: str, content: Union[str, List[Dict[str, Any]]], strict_filepath_match=False
+) -> List[Dict[str, Dict[str, str]]]:
     """Parses HTML style tags from message contents.
 
     The parsing is done by looking for patterns in the text that match the format of HTML tags. The tag to be parsed is
@@ -119,6 +121,11 @@ def parse_tags_from_content(tag: str, content: Union[str, List[Dict[str, Any]]])
         tag (str): The HTML style tag to be parsed.
         content (Union[str, List[Dict[str, Any]]]): The message content to parse. Can be a string or a list of content
             items.
+        strict_filepath_match (bool, optional): If False (default), the parser will match all characters between the tag opening
+            and closing, including quotation marks and empty spaces. If True, the parser will stop at the first quotation mark
+            it encounters. Use True when dealing with nested quotes or complex attribute values. If there are
+            lots of HTML tag in the content, it is recommended to set this to True, because then it will only match
+            the only one tag, and will ignore HTML tags in the content.
 
     Returns:
         List[Dict[str, str]]: A list of dictionaries, where each dictionary represents a parsed tag. Each dictionary
@@ -130,20 +137,26 @@ def parse_tags_from_content(tag: str, content: Union[str, List[Dict[str, Any]]])
     """
     results = []
     if isinstance(content, str):
-        results.extend(_parse_tags_from_text(tag, content))
+        results.extend(_parse_tags_from_text(tag, content, strict_filepath_match))
     # Handles case for multimodal messages.
     elif isinstance(content, list):
         for item in content:
             if item.get("type") == "text":
-                results.extend(_parse_tags_from_text(tag, item["text"]))
+                results.extend(_parse_tags_from_text(tag, item["text"], strict_filepath_match))
     else:
         raise ValueError(f"content must be str or list, but got {type(content)}")
 
     return results
 
 
-def _parse_tags_from_text(tag: str, text: str) -> List[Dict[str, str]]:
-    pattern = re.compile(f"<{tag} (.*?)>")
+def _parse_tags_from_text(
+    tag: str, text: str, strict_filepath_match: bool = False
+) -> List[Dict[str, Union[str, Dict[str, str], re.Match]]]:
+    # the regular expression should not contain quotation marks. Otherwise, it is not a match.
+    if strict_filepath_match:
+        pattern = re.compile(f"<{tag} ([^\"' >]+)>")
+    else:
+        pattern = re.compile(f"<{tag} (.*?)>")
 
     results = []
     for match in re.finditer(pattern, text):
@@ -154,12 +167,12 @@ def _parse_tags_from_text(tag: str, text: str) -> List[Dict[str, str]]:
     return results
 
 
-def _parse_attributes_from_tags(tag_content: str):
+def _parse_attributes_from_tags(tag_content: str) -> Dict[str, str]:
     pattern = r"([^ ]+)"
     attrs = re.findall(pattern, tag_content)
     reconstructed_attrs = _reconstruct_attributes(attrs)
 
-    def _append_src_value(content, value):
+    def _append_src_value(content: Dict[str, str], value: str) -> None:
         if "src" in content:
             content["src"] += f" {value}"
         else:
diff --git a/test/agentchat/test_agentchat_utils.py b/test/agentchat/test_agentchat_utils.py
index cfdc609d59c..1fe6ac31656 100644
--- a/test/agentchat/test_agentchat_utils.py
+++ b/test/agentchat/test_agentchat_utils.py
@@ -13,18 +13,22 @@
 TAG_PARSING_TESTS = [
     {
         "message": "Hello agent, can you take a look at this image <img http://example.com/image.png>",
+        "strict_filepath_match": False,
         "expected": [{"tag": "img", "attr": {"src": "http://example.com/image.png"}}],
     },
     {
         "message": "Can you transcribe this audio? <audio http://example.com/au=dio.mp3>",
+        "strict_filepath_match": False,
         "expected": [{"tag": "audio", "attr": {"src": "http://example.com/au=dio.mp3"}}],
     },
     {
         "message": "Can you describe what's in this image <img url='http://example.com/=image.png'>",
+        "strict_filepath_match": False,
         "expected": [{"tag": "img", "attr": {"url": "http://example.com/=image.png"}}],
     },
     {
         "message": "Can you describe what's in this image <img http://example.com/image.png> and transcribe this audio? <audio http://example.com/audio.mp3>",
+        "strict_filepath_match": False,
         "expected": [
             {"tag": "img", "attr": {"src": "http://example.com/image.png"}},
             {"tag": "audio", "attr": {"src": "http://example.com/audio.mp3"}},
@@ -32,10 +36,12 @@
     },
     {
         "message": "Can you generate this audio? <audio text='Hello I'm a robot' prompt='whisper'>",
+        "strict_filepath_match": False,
         "expected": [{"tag": "audio", "attr": {"text": "Hello I'm a robot", "prompt": "whisper"}}],
     },
     {
         "message": "Can you describe what's in this image <img http://example.com/image.png width='100'> and this image <img http://hello.com/image=.png>?",
+        "strict_filepath_match": False,
         "expected": [
             {"tag": "img", "attr": {"src": "http://example.com/image.png", "width": "100"}},
             {"tag": "img", "attr": {"src": "http://hello.com/image=.png"}},
@@ -43,8 +49,34 @@
     },
     {
         "message": "Text with no tags",
+        "strict_filepath_match": False,
         "expected": [],
     },
+    {
+        "message": 'Can you generate this audio? <audio text="Hello I\'m a robot" prompt="whisper">',
+        "strict_filepath_match": False,
+        "expected": [{"tag": "audio", "attr": {"text": "Hello I'm a robot", "prompt": "whisper"}}],
+    },
+    {
+        "message": 'Can you generate this audio? <audio text="Hello I\'m a robot" prompt="whisper">',
+        "strict_filepath_match": True,
+        "expected": [],
+    },
+    {
+        "message": 'Complex nested quotes <img src="http://example.com/image.png" alt="A "quoted" description">',
+        "strict_filepath_match": False,
+        "expected": [{"tag": "img", "attr": {"src": "http://example.com/image.png", "alt": 'A "quoted" description'}}],
+    },
+    {
+        "message": 'Complex nested quotes <img src="http://example.com/image.png" alt="A "quoted" description">',
+        "strict_filepath_match": True,
+        "expected": [],
+    },
+    {
+        "message": "Complex nested quotes <img http://example.com/image.png>",
+        "strict_filepath_match": True,
+        "expected": [{"tag": "img", "attr": {"src": "http://example.com/image.png"}}],
+    },
 ]
 
 
@@ -54,31 +86,33 @@ def _delete_unused_keys(d: Dict) -> None:
 
 
 @pytest.mark.parametrize("test_case", TAG_PARSING_TESTS)
-def test_tag_parsing(test_case: Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]) -> None:
-    """Test the tag_parsing function."""
+def test_tag_parsing(test_case: Dict[str, Union[str, bool, List[Dict[str, Union[str, Dict[str, str]]]]]]) -> None:
+    """Test the strict_filepath_match feature in tag parsing."""
     message = test_case["message"]
+    strict_filepath_match = test_case["strict_filepath_match"]
     expected = test_case["expected"]
-    tags = ["img", "audio", "random"]
+    tags = ["img", "audio"]
 
     result = []
     for tag in tags:
-        parsed_tags = agentchat.utils.parse_tags_from_content(tag, message)
+        parsed_tags = agentchat.utils.parse_tags_from_content(tag, message, strict_filepath_match=strict_filepath_match)
         for item in parsed_tags:
             _delete_unused_keys(item)
-
         result.extend(parsed_tags)
+    print(result, expected, strict_filepath_match)
     assert result == expected
 
     result = []
     for tag in tags:
         content = [{"type": "text", "text": message}]
-        parsed_tags = agentchat.utils.parse_tags_from_content(tag, content)
+        parsed_tags = agentchat.utils.parse_tags_from_content(tag, content, strict_filepath_match=strict_filepath_match)
         for item in parsed_tags:
             _delete_unused_keys(item)
-
         result.extend(parsed_tags)
+    print(result, expected)
     assert result == expected
 
 
 if __name__ == "__main__":
-    test_tag_parsing(TAG_PARSING_TESTS[0])
+    for test_case in TAG_PARSING_TESTS:
+        test_tag_parsing(test_case)

From c775fe65cdd8f67bd5de3e48732f7bd35a204041 Mon Sep 17 00:00:00 2001
From: BabyCNM <86091026+BabyCNM@users.noreply.github.com>
Date: Fri, 4 Oct 2024 20:41:54 -0700
Subject: [PATCH 2/2] Add a few test cases for strict_filepath_match

---
 autogen/agentchat/utils.py             | 10 ++++------
 test/agentchat/test_agentchat_utils.py | 10 ++++++++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/autogen/agentchat/utils.py b/autogen/agentchat/utils.py
index 1e58486e996..c2e3b748e94 100644
--- a/autogen/agentchat/utils.py
+++ b/autogen/agentchat/utils.py
@@ -122,14 +122,12 @@ def parse_tags_from_content(
         content (Union[str, List[Dict[str, Any]]]): The message content to parse. Can be a string or a list of content
             items.
         strict_filepath_match (bool, optional): If False (default), the parser will match all characters between the tag opening
-            and closing, including quotation marks and empty spaces. If True, the parser will stop at the first quotation mark
-            it encounters. Use True when dealing with nested quotes or complex attribute values. If there are
-            lots of HTML tag in the content, it is recommended to set this to True, because then it will only match
-            the only one tag, and will ignore HTML tags in the content.
+            and closing, including quotation marks and spaces. If True, the parser will only match simple tag contents
+            without spaces or quotes. This is useful for parsing filenames or URLs and ignoring complex HTML-like structures.
 
     Returns:
-        List[Dict[str, str]]: A list of dictionaries, where each dictionary represents a parsed tag. Each dictionary
-            contains three key-value pairs: 'type' which is the tag, 'attr' which is a dictionary of the parsed attributes,
+        List[Dict[str, Union[str, Dict[str, str], re.Match]]]: A list of dictionaries, where each dictionary represents a parsed tag. Each dictionary
+            contains three key-value pairs: 'tag' which is the tag name, 'attr' which is a dictionary of the parsed attributes,
             and 'match' which is a regular expression match object.
 
     Raises:
diff --git a/test/agentchat/test_agentchat_utils.py b/test/agentchat/test_agentchat_utils.py
index 1fe6ac31656..7281d9c9fef 100644
--- a/test/agentchat/test_agentchat_utils.py
+++ b/test/agentchat/test_agentchat_utils.py
@@ -77,6 +77,16 @@
         "strict_filepath_match": True,
         "expected": [{"tag": "img", "attr": {"src": "http://example.com/image.png"}}],
     },
+    {
+        "message": 'Can you generate this audio? <audio text="Hello I\'m a robot" prompt="whisper">',
+        "strict_filepath_match": True,
+        "expected": [],  # Empty because strict_filepath_match=True doesn't match tags with spaces or quotes
+    },
+    {
+        "message": 'Complex nested quotes <img src="http://example.com/image.png" alt="A "quoted" description">',
+        "strict_filepath_match": True,
+        "expected": [],  # Empty because strict_filepath_match=True doesn't match tags with spaces or quotes
+    },
 ]