diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..acace7c --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +.PHONY: format-code check-code + +format-code: + black -l 100 ai_functions.py test_ai_functions.py + isort ai_functions.py test_ai_functions.py + +check-code: + flake8 --max-line-length=100 ai_functions.py test_ai_functions.py + diff --git a/README.md b/README.md index 686db4b..f05c548 100644 --- a/README.md +++ b/README.md @@ -69,14 +69,14 @@ print(result) # Output: 12 The table below shows the success rate of the AI functions with different GPT models: -| Description | GPT-4 Result | GPT-3.5-turbo Result | Reason | -|---------------------------|--------------|----------------------|--------| -| Generate fake people | PASSED | FAILED | Incorrect response format | -| Generate Random Password | PASSED | PASSED | N/A | -| Calculate area of triangle| FAILED | FAILED | Incorrect float value (GPT-4), Incorrect response format (GPT-3.5-turbo) | -| Calculate the nth prime number | PASSED | PASSED | N/A | -| Encrypt text | PASSED | PASSED | N/A | -| Find missing numbers | PASSED | PASSED | N/A | +| Description | GPT-4 Result | GPT-3.5-turbo Result | Reason | +| ------------------------------ | ------------ | -------------------- | ------------------------------------------------------------------------ | +| Generate fake people | PASSED | FAILED | Incorrect response format | +| Generate Random Password | PASSED | PASSED | N/A | +| Calculate area of triangle | FAILED | FAILED | Incorrect float value (GPT-4), Incorrect response format (GPT-3.5-turbo) | +| Calculate the nth prime number | PASSED | PASSED | N/A | +| Encrypt text | PASSED | PASSED | N/A | +| Find missing numbers | PASSED | PASSED | N/A | It's important to note that AI Functions are not suited for certain tasks, particularly those involving mathematical calculations and precision. As observed in the case of calculating the area of a triangle and finding the nth prime number, GPT models can struggle with providing accurate results. The limitations of GPT models in such cases are mainly due to their inherent inability to perform precise arithmetic and the ambiguity in understanding user inputs. @@ -94,6 +94,16 @@ python test_ai_functions.py The test script will output the results of each test case and provide a success rate. +### Code Formatting + +1. Install black `pip install black` and run it with `black -l 100 ai_functions.py test_ai_functions.py` +2. Install isort `pip install isort` and run it with `isort ai_functions.py test_ai_functions.py` +3. Install flake8 `pip install flake8` and run it with `flake8 --max-line-length=100 ai_functions.py test_ai_functions.py` + +Alternatively: +1. you can run `make format-code` to apply `black` and `isort` +2. you can run `make check-code` to apply `flake8` + ## Contributing Contributions are welcome! If you would like to add more test cases or improve the existing code, please feel free to submit a pull request. diff --git a/ai_functions.py b/ai_functions.py index a0d53a3..9415e27 100644 --- a/ai_functions.py +++ b/ai_functions.py @@ -1,14 +1,19 @@ import openai -def ai_function(function, args, description, model = "gpt-4"): + +def ai_function(function, args, description, model="gpt-4"): # parse args to comma separated string args = ", ".join(args) - messages = [{"role": "system", "content": f"You are now the following python function: ```# {description}\n{function}```\n\nOnly respond with your `return` value. Do not include any other explanatory text in your response."},{"role": "user", "content": args}] + messages = [ + { + "role": "system", + "content": "You are now the following python function: " + + f"```# {description}\n{function}```\n\nOnly respond with your " + + "`return` value. Do not include any other explanatory text in your response.", + }, + {"role": "user", "content": args}, + ] - response = openai.ChatCompletion.create( - model=model, - messages=messages, - temperature=0 - ) + response = openai.ChatCompletion.create(model=model, messages=messages, temperature=0) return response.choices[0].message["content"] diff --git a/test_ai_function.py b/test_ai_functions.py similarity index 91% rename from test_ai_function.py rename to test_ai_functions.py index bb2e19d..c211e64 100644 --- a/test_ai_function.py +++ b/test_ai_functions.py @@ -1,14 +1,17 @@ import ast import json import time -import ai_functions -import pytest + import openai +import pytest + +import ai_functions import keys # Initialize the OpenAI API client openai.api_key = keys.OPENAI_API_KEY + # Run all tests, print the results, and return the number of failed tests def run_tests(model): test_functions = [test_1, test_2, test_3, test_4, test_5, test_6] @@ -18,8 +21,8 @@ def run_tests(model): "Calculate area of triangle", "Calculate the nth prime number", "Encrypt text", - "Find missing numbers" -] + "Find missing numbers", + ] failed_tests = [] i = 0 @@ -41,11 +44,12 @@ def run_tests(model): # Print the number of failed tests print(f"Success Rate: {len(test_functions) - len(failed_tests)}/{len(test_functions)}") + # Ai function test 1 def test_1(model): function_string = "def fake_people(n: int) -> list[dict]:" args = ["4"] - description_string = """Generates n examples of fake data representing people, + description_string = """Generates n examples of fake data representing people, each with a name and an age.""" result_string = ai_functions.ai_function(function_string, args, description_string, model) @@ -59,10 +63,10 @@ def test_1(model): print("Testing if result can be parsed as a list of dictionaries...") # Parse the result as a list of dictionaries result = json.loads(result_string) - except Exception as e: + except Exception: # If the result can't be parsed as a list of dictionaries, the test fails assert False - + # Assert the length of the result is equal to the number of people requested print("Testing if the length of the result is equal to the number of people requested...") if result: @@ -70,11 +74,13 @@ def test_1(model): else: assert False + # Ai function test 2 def test_2(model): function_string = "def random_password_generator(length: int, special_chars: bool) -> str:" args = ["12", "True"] - description_string = """Generates a random password of given length with or without special characters.""" + description_string = """Generates a random password of given length with or + without special characters.""" result_string = ai_functions.ai_function(function_string, args, description_string, model) @@ -84,6 +90,7 @@ def test_2(model): print("Testing if the length of the result is equal to the length requested...") assert len(result_string) == int(args[0]) + # Ai function test 3 def test_3(model): function_string = "def calculate_area_of_triangle(base: float, height: float) -> float:" @@ -103,9 +110,13 @@ def test_3(model): # Assert the result is equal to the expected area of the triangle expected_area = (float(args[0]) * float(args[1])) / 2 - print("Testing if the result is equal to the expected area of the triangle, which is: " + str(expected_area)) + print( + "Testing if the result is equal to the expected area of the triangle, which is: " + + str(expected_area) + ) assert float(result_string) == pytest.approx(expected_area) + # Ai function test 4 def test_4(model): function_string = "def get_nth_prime_number(n: int) -> int:" @@ -126,14 +137,19 @@ def test_4(model): # Assert the result is equal to the expected nth prime number expected_prime_number = 29 - print("Testing if the result is equal to the expected nth prime number, which is: " + str(expected_prime_number)) + print( + "Testing if the result is equal to the expected nth prime number, which is: " + + str(expected_prime_number) + ) assert int(result_string) == expected_prime_number + # Ai function test 5 def test_5(model): function_string = "def encrypt_text(text: str, key: str) -> str:" args = ["'Hello, World!'", "'abc123'"] - description_string = """Encrypts the given text using a simple character substitution based on the provided key.""" + description_string = """Encrypts the given text using a simple character + substitution based on the provided key.""" result_string = ai_functions.ai_function(function_string, args, description_string, model) @@ -143,6 +159,7 @@ def test_5(model): print("Testing if the result has the same length as the input text...") assert len(result_string) == len(args[0]) + # Ai function test 6 def test_6(model): function_string = "def find_missing_numbers_in_list(numbers: list[int]) -> list[int]:" @@ -167,5 +184,6 @@ def test_6(model): print("Testing if the result list contains the expected missing numbers...") assert result_list == expected_missing_numbers + run_tests("gpt-4") -run_tests("gpt-3.5-turbo") \ No newline at end of file +run_tests("gpt-3.5-turbo")