microsoft · robzsaunders · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 26, 2023
diff --git a/autogen/agentchat/contrib/math_user_proxy_agent.py b/autogen/agentchat/contrib/math_user_proxy_agent.py
@@ -289,7 +289,7 @@ def _generate_math_reply(
         message = message.get("content", "")
         code_blocks = extract_code(message)
 
-        if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN:
+        if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN or code_blocks == []:
             # no code block is found, lang should be `UNKNOWN``
             return True, self._default_auto_reply
         is_success, all_success = True, True

diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
@@ -630,7 +630,7 @@ def generate_code_execution_reply(
             if not message["content"]:
                 continue
             code_blocks = extract_code(message["content"])
-            if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN:
+            if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN or code_blocks == []:
                 continue
 
             # found code blocks, execute code and push "last_n_messages" back

diff --git a/autogen/code_utils.py b/autogen/code_utils.py
@@ -46,7 +46,7 @@ def infer_lang(code):
 
 
 def extract_code(
-    text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = False
+    text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = True
 ) -> List[Tuple[str, str]]:
     """Extract code from a text.
 
@@ -62,28 +62,47 @@ def extract_code(
           If there is no code block in the input text, the language would be "unknown".
           If there is code block but the language is not specified, the language would be "".
     """
+
     if not detect_single_line_code:
+        # Some models output CRLF \r\n insteaf or just \n. Cleaning it up to work with this regex.
+        text = re.sub(r'\r\n', '\n', text)
+
         match = re.findall(pattern, text, flags=re.DOTALL)
         return match if match else [(UNKNOWN, text)]
 
+    # First extract detected code blocks to be processed.
+    # ```\w[^`]+``` : Matches codeblocks that start with ```language
+
+    sterilization_pattern = re.compile(r"```\w[^`]+```")
+    sterilized_blocks = sterilization_pattern.findall(text)
+
     # Extract both multi-line and single-line code block, separated by the | operator
     # `{3}(\w+)?\s*([\s\S]*?)`{3}: Matches multi-line code blocks.
     #    The (\w+)? matches the language, where the ? indicates it is optional.
     # `([^`]+)`: Matches inline code.
+
     code_pattern = re.compile(r"`{3}(\w+)?\s*([\s\S]*?)`{3}|`([^`]+)`")
-    code_blocks = code_pattern.findall(text)
+    code_blocks = []
+
+    # Only sterilized blocks with properly appended language prefixes are processed.
+    # TODO: Ensure all supported language inferences are in this list
+    lang_check = ["python", "python3", "bash", "powershell", "sh", "pip", "shell", "ps1"]
+    for each_block in sterilized_blocks:
+       for prefix in lang_check:
+            if each_block.startswith("```"+prefix):
+                code_blocks.append(code_pattern.findall(each_block))
 
     # Extract the individual code blocks and languages from the matched groups
     extracted = []
-    for lang, group1, group2 in code_blocks:
-        if group1:
-            extracted.append((lang.strip(), group1.strip()))
-        elif group2:
-            extracted.append(("", group2.strip()))
+    for every in code_blocks:
+        for lang, group1, group2 in every:
+            if group1:
+                extracted.append((lang.strip(), group1.strip()))
+            elif group2:
+                extracted.append(("", group2.strip()))
 
     return extracted
 
-
 # _FIND_CODE_SYS_MSG = [
 #     {
 #         "role": "system",