diff --git a/autogen/agentchat/contrib/math_user_proxy_agent.py b/autogen/agentchat/contrib/math_user_proxy_agent.py index 7a15e80ec744..7f628a363573 100644 --- a/autogen/agentchat/contrib/math_user_proxy_agent.py +++ b/autogen/agentchat/contrib/math_user_proxy_agent.py @@ -289,7 +289,7 @@ def _generate_math_reply( message = message.get("content", "") code_blocks = extract_code(message) - if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN: + if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN or code_blocks == []: # no code block is found, lang should be `UNKNOWN`` return True, self._default_auto_reply is_success, all_success = True, True diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py index 3a0e19598813..e339eb304924 100644 --- a/autogen/agentchat/conversable_agent.py +++ b/autogen/agentchat/conversable_agent.py @@ -630,7 +630,7 @@ def generate_code_execution_reply( if not message["content"]: continue code_blocks = extract_code(message["content"]) - if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN: + if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN or code_blocks == []: continue # found code blocks, execute code and push "last_n_messages" back diff --git a/autogen/code_utils.py b/autogen/code_utils.py index caaf09072850..9eb91408ea1f 100644 --- a/autogen/code_utils.py +++ b/autogen/code_utils.py @@ -46,7 +46,7 @@ def infer_lang(code): def extract_code( - text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = False + text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = True ) -> List[Tuple[str, str]]: """Extract code from a text. @@ -62,28 +62,47 @@ def extract_code( If there is no code block in the input text, the language would be "unknown". If there is code block but the language is not specified, the language would be "". """ + if not detect_single_line_code: + # Some models output CRLF \r\n insteaf or just \n. Cleaning it up to work with this regex. + text = re.sub(r'\r\n', '\n', text) + match = re.findall(pattern, text, flags=re.DOTALL) return match if match else [(UNKNOWN, text)] + # First extract detected code blocks to be processed. + # ```\w[^`]+``` : Matches codeblocks that start with ```language + + sterilization_pattern = re.compile(r"```\w[^`]+```") + sterilized_blocks = sterilization_pattern.findall(text) + # Extract both multi-line and single-line code block, separated by the | operator # `{3}(\w+)?\s*([\s\S]*?)`{3}: Matches multi-line code blocks. # The (\w+)? matches the language, where the ? indicates it is optional. # `([^`]+)`: Matches inline code. + code_pattern = re.compile(r"`{3}(\w+)?\s*([\s\S]*?)`{3}|`([^`]+)`") - code_blocks = code_pattern.findall(text) + code_blocks = [] + + # Only sterilized blocks with properly appended language prefixes are processed. + # TODO: Ensure all supported language inferences are in this list + lang_check = ["python", "python3", "bash", "powershell", "sh", "pip", "shell", "ps1"] + for each_block in sterilized_blocks: + for prefix in lang_check: + if each_block.startswith("```"+prefix): + code_blocks.append(code_pattern.findall(each_block)) # Extract the individual code blocks and languages from the matched groups extracted = [] - for lang, group1, group2 in code_blocks: - if group1: - extracted.append((lang.strip(), group1.strip())) - elif group2: - extracted.append(("", group2.strip())) + for every in code_blocks: + for lang, group1, group2 in every: + if group1: + extracted.append((lang.strip(), group1.strip())) + elif group2: + extracted.append(("", group2.strip())) return extracted - # _FIND_CODE_SYS_MSG = [ # { # "role": "system",