Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autogen/agentchat/contrib/math_user_proxy_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def _generate_math_reply(
message = message.get("content", "")
code_blocks = extract_code(message)

if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN:
if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN or code_blocks == []:
# no code block is found, lang should be `UNKNOWN``
return True, self._default_auto_reply
is_success, all_success = True, True
Expand Down
2 changes: 1 addition & 1 deletion autogen/agentchat/conversable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ def generate_code_execution_reply(
if not message["content"]:
continue
code_blocks = extract_code(message["content"])
if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN:
if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN or code_blocks == []:
continue

# found code blocks, execute code and push "last_n_messages" back
Expand Down
35 changes: 27 additions & 8 deletions autogen/code_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def infer_lang(code):


def extract_code(
text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = False
text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = True
) -> List[Tuple[str, str]]:
"""Extract code from a text.

Expand All @@ -62,28 +62,47 @@ def extract_code(
If there is no code block in the input text, the language would be "unknown".
If there is code block but the language is not specified, the language would be "".
"""

if not detect_single_line_code:
# Some models output CRLF \r\n insteaf or just \n. Cleaning it up to work with this regex.
text = re.sub(r'\r\n', '\n', text)

match = re.findall(pattern, text, flags=re.DOTALL)
return match if match else [(UNKNOWN, text)]

# First extract detected code blocks to be processed.
# ```\w[^`]+``` : Matches codeblocks that start with ```language

sterilization_pattern = re.compile(r"```\w[^`]+```")
sterilized_blocks = sterilization_pattern.findall(text)

# Extract both multi-line and single-line code block, separated by the | operator
# `{3}(\w+)?\s*([\s\S]*?)`{3}: Matches multi-line code blocks.
# The (\w+)? matches the language, where the ? indicates it is optional.
# `([^`]+)`: Matches inline code.

code_pattern = re.compile(r"`{3}(\w+)?\s*([\s\S]*?)`{3}|`([^`]+)`")
code_blocks = code_pattern.findall(text)
code_blocks = []

# Only sterilized blocks with properly appended language prefixes are processed.
# TODO: Ensure all supported language inferences are in this list
lang_check = ["python", "python3", "bash", "powershell", "sh", "pip", "shell", "ps1"]
for each_block in sterilized_blocks:
for prefix in lang_check:
if each_block.startswith("```"+prefix):
code_blocks.append(code_pattern.findall(each_block))

# Extract the individual code blocks and languages from the matched groups
extracted = []
for lang, group1, group2 in code_blocks:
if group1:
extracted.append((lang.strip(), group1.strip()))
elif group2:
extracted.append(("", group2.strip()))
for every in code_blocks:
for lang, group1, group2 in every:
if group1:
extracted.append((lang.strip(), group1.strip()))
elif group2:
extracted.append(("", group2.strip()))

return extracted


# _FIND_CODE_SYS_MSG = [
# {
# "role": "system",
Expand Down