Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚡️ Speed up method CrewAgentParser._clean_action by 4275x #2382

Conversation

misrasaurabh1
Copy link
Contributor

📄 427,565% (4,275.65x) speedup for CrewAgentParser._clean_action in src/crewai/agents/parser.py

⏱️ Runtime : 919 milliseconds 215 microseconds (best of 581 runs)

📝 Explanation and details

The regex operation is equivalent to the strip operation. Regex substitution will be much slower than simple string operations in Python.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 80 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests Details
import re
from typing import Any

# imports
import pytest  # used for our unit tests
from crewai.agents.parser import CrewAgentParser

# unit tests

@pytest.fixture
def parser():
    """Fixture to create a CrewAgentParser instance."""
    return CrewAgentParser(agent=None)

def test_basic_functionality(parser):
    """Test basic functionality of _clean_action."""
    codeflash_output = parser._clean_action("*Action*")
    codeflash_output = parser._clean_action("**Action**")
    codeflash_output = parser._clean_action("***Action***")
    codeflash_output = parser._clean_action("   *Action*   ")
    codeflash_output = parser._clean_action("  **Action**  ")
    codeflash_output = parser._clean_action(" ***Action*** ")

def test_no_asterisks(parser):
    """Test input without asterisks."""
    codeflash_output = parser._clean_action("Action")
    codeflash_output = parser._clean_action(" Action ")
    codeflash_output = parser._clean_action("  Action  ")

def test_only_asterisks(parser):
    """Test input consisting solely of asterisks."""
    codeflash_output = parser._clean_action("*")
    codeflash_output = parser._clean_action("**")
    codeflash_output = parser._clean_action("***")
    codeflash_output = parser._clean_action("   ***   ")

def test_mixed_content(parser):
    """Test input with asterisks and other characters."""
    codeflash_output = parser._clean_action("*Action* More Text*")
    codeflash_output = parser._clean_action("**Action** More Text**")
    codeflash_output = parser._clean_action("***Action*** More Text***")
    codeflash_output = parser._clean_action("A*ction")
    codeflash_output = parser._clean_action("Ac**tion")
    codeflash_output = parser._clean_action("Act***ion")

def test_edge_cases(parser):
    """Test edge cases."""
    codeflash_output = parser._clean_action("")
    codeflash_output = parser._clean_action(" ")
    codeflash_output = parser._clean_action("   ")
    codeflash_output = parser._clean_action("*Action!@#$%^&*()_+*")
    codeflash_output = parser._clean_action("**1234567890**")

def test_large_scale(parser):
    """Test large scale inputs."""
    long_string_with_asterisks = "*" * 10000 + "Action" + "*" * 10000
    codeflash_output = parser._clean_action(long_string_with_asterisks)
    long_string_without_asterisks = " " * 10000 + "Action" + " " * 10000
    codeflash_output = parser._clean_action(long_string_without_asterisks)
    mixed_long_string = "*" * 5000 + "Action" + " " * 5000 + "More Text" + "*" * 5000
    codeflash_output = parser._clean_action(mixed_long_string)

def test_complex_patterns(parser):
    """Test complex patterns."""
    codeflash_output = parser._clean_action("*A*B*C*D*")
    codeflash_output = parser._clean_action("**A**B**C**D**")
    codeflash_output = parser._clean_action("* Action *")
    codeflash_output = parser._clean_action("** Action **")
    codeflash_output = parser._clean_action("*** Action ***")

def test_unicode_and_non_ascii(parser):
    """Test Unicode and non-ASCII characters."""
    codeflash_output = parser._clean_action("*Äction*")
    codeflash_output = parser._clean_action("**Äction**")
    codeflash_output = parser._clean_action("***Äction***")
    codeflash_output = parser._clean_action("*你好*")
    codeflash_output = parser._clean_action("**你好**")
    codeflash_output = parser._clean_action("***你好***")

def test_multiple_lines(parser):
    """Test multiline strings."""
    codeflash_output = parser._clean_action("*Action*\n*Another Action*")
    codeflash_output = parser._clean_action("**Action**\n**Another Action**")
    codeflash_output = parser._clean_action("***Action***\n***Another Action***")

def test_mixed_whitespace_types(parser):
    """Test mixed whitespace types."""
    codeflash_output = parser._clean_action("\t*Action*\t")
    codeflash_output = parser._clean_action("\n**Action**\n")
    codeflash_output = parser._clean_action("\t\n***Action***\n\t")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

import re
from typing import Any

# imports
import pytest  # used for our unit tests
from crewai.agents.parser import CrewAgentParser

# unit tests

def test_clean_action_removes_leading_and_trailing_asterisks_and_whitespace():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("  ***action***  ")
    codeflash_output = parser._clean_action("*action*")
    codeflash_output = parser._clean_action("  * action *  ")

def test_clean_action_removes_leading_asterisks_and_whitespace():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("  ***action")
    codeflash_output = parser._clean_action("*action")
    codeflash_output = parser._clean_action("  * action")

def test_clean_action_removes_trailing_asterisks_and_whitespace():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("action***  ")
    codeflash_output = parser._clean_action("action*")
    codeflash_output = parser._clean_action("action *  ")

def test_clean_action_handles_empty_string():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("")

def test_clean_action_handles_only_asterisks_and_whitespace():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("***")
    codeflash_output = parser._clean_action("  ***  ")
    codeflash_output = parser._clean_action("* * *")

def test_clean_action_handles_no_asterisks_but_whitespace():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("  action  ")
    codeflash_output = parser._clean_action(" action ")

def test_clean_action_handles_no_asterisks_no_whitespace():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("action")

def test_clean_action_handles_asterisks_in_middle():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("ac*tion")
    codeflash_output = parser._clean_action("ac***tion")

def test_clean_action_handles_multiple_groups_of_asterisks():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("  ***action***another***  ")
    codeflash_output = parser._clean_action("*action*another*")

def test_clean_action_handles_mixed_whitespace_characters():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("\t***action***\n")
    codeflash_output = parser._clean_action("\n***action***\t")
    codeflash_output = parser._clean_action("\t* action *\n")

def test_clean_action_handles_large_strings():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("  " + "*" * 1000 + "action" + "*" * 1000 + "  ")
    codeflash_output = parser._clean_action("*" * 10000 + "action" + "*" * 10000)
    codeflash_output = parser._clean_action("  " + "*" * 5000 + "action" + "*" * 5000 + "  ")

def test_clean_action_handles_special_characters():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("***@ction***")
    codeflash_output = parser._clean_action("***a!c@t#i$o%n^***")
    codeflash_output = parser._clean_action("  ***a!c@t#i$o%n^***  ")

def test_clean_action_handles_unicode_characters():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("***动作***")
    codeflash_output = parser._clean_action("  ***動作***  ")
    codeflash_output = parser._clean_action("***açtïøñ***")

def test_clean_action_is_deterministic():
    parser = CrewAgentParser(agent=None)
    codeflash_output = parser._clean_action("***action***")
    codeflash_output = parser._clean_action("  ***action***  ")
    codeflash_output = parser._clean_action("***action***")
    codeflash_output = parser._clean_action("  ***action***  ")
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

Codeflash

@joaomdmoura
Copy link
Collaborator

Disclaimer: This review was made by a crew of AI Agents.

Code Review Comment for PR #2382

Overview

The changes made in this pull request optimize the _clean_action method in src/crewai/agents/parser.py, aiming for enhanced performance in string cleaning operations. The transition from regular expressions to string methods significantly impacts both performance and readability.

Improvements

  1. Performance Optimization:
    The new implementation replaces a regex pattern match with simple string operations, leading to a claimed performance increase of over 427,000%. This is noteworthy and should be validated through performance benchmarks.

    Original Code:

    def _clean_action(self, text: str) -> str:
        return re.sub(r"^\s*\*+\s*|\s*\*+\s*$", "", text).strip()

    Revised Code:

    def _clean_action(self, text: str) -> str:
        return text.strip().strip("*").strip()
  2. Readability:
    The switch from regex to built-in string methods not only simplifies the logic but also enhances code readability.

Functionality Considerations

While the new approach retains the overall functionality of cleaning strings by removing leading/trailing whitespace and asterisks, it's essential to carefully evaluate potential edge cases due to differences in behavior, especially with multiple asterisks.

Edge Case Analysis

  • The original regex efficiently handled leading and trailing asterisks in a single function call. Conversely, the new method may fall short in scenarios where multiple asterisks are present at both ends, necessitating further .strip("*") calls. This could inadvertently lead to performance degradation in edge cases with extensive asterisk usage.

Recommendations

  1. Input Validation:
    Enhance robustness by validating input types to ensure the function handles non-string inputs gracefully.

    if not isinstance(text, str):
        raise TypeError("Input must be a string")
  2. Handling Multiple Asterisks:
    To maintain consistency in behavior, consider implementing a loop to manage consecutive asterisks effectively.

    cleaned = text.strip()
    while cleaned.startswith("*") or cleaned.endswith("*"):
        cleaned = cleaned.strip("*").strip()
    return cleaned
  3. Add Unit Tests:
    Successful implementation should be backed by comprehensive unit tests that cover various scenarios and edge cases:

    def test_clean_action():
        parser = CrewAgentParser()
        assert parser._clean_action("  ***test***  ") == "test"
        assert parser._clean_action("*  test  *") == "test"
        assert parser._clean_action("test") == "test"
        assert parser._clean_action("*** test with * in middle ***") == "test with * in middle"
  4. Documentation & Performance Benchmarking:
    Update docstrings to reflect the performance improvements and consider adding a performance benchmark test to substantiate the claimed optimization.

Conclusion

The changes in this PR introduce substantial improvements in terms of performance and code maintainability. However, to ensure reliability, it is crucial to implement the suggested recommendations, particularly around robustness and comprehensive testing. Once these enhancements are incorporated, I would advocate for merging this PR.

@bhancockio bhancockio merged commit d3a09c3 into crewAIInc:main Mar 21, 2025
4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants