Skip to content

Commit

Permalink
Merge pull request #45 from LyzrCore/imp/data-analyzr
Browse files Browse the repository at this point in the history
Changes for litellm upgrade
  • Loading branch information
gargimaheshwari authored Jul 10, 2024
2 parents cc86481 + c93beb0 commit f2065ec
Show file tree
Hide file tree
Showing 23 changed files with 133 additions and 157 deletions.
15 changes: 3 additions & 12 deletions build/lib/lyzr/base/prompt_texts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,12 @@
},
"user": {"inputs": "{df_details}"},
},
"ml_analysis_guide": {
"system": {
"context": "You are Business Analyst. You are an expert in your field. You are assisting a data analyst.\nYou are given a dataset and a question. Your job is to analyze these two inputs and determine how to answer the question based on the data.\n\n",
"external_context": "{context}",
"task": "You must determine what type of analysis should be performed on the dataset in order to answer the question.\nYou should then list out the steps that the data analyst should take to perform the analysis.\nLimit your total response to 100 words.\nYou should address the data analyst directly.",
"doc_addition_text": "You may use the following documentation to understand the schema of the data:\n{doc}\n",
},
"user": {"inputs": "{df_details}\nQuestion: {question}"},
},
"analysis_code": {
"system": {
"context": "You are an Expert DATA ANALYST and PYTHON CODER. Your task is to RESPOND with precise Python code based on the questions provided by the user.\n\n",
"external_context": "{context}",
"task": "Please follow these steps:\n1. READ the user's question CAREFULLY to understand what Python code is being requested.\n2. WRITE the Python code that directly answers the user's question.\n3. ENSURE that your response contains ONLY the Python code without any additional explanations or comments.\n4. VERIFY that your Python code is SYNTACTICALLY CORRECT and adheres to standard Pythonic practices.\n5. You code must SAVE the result to `result`.\n6. Whenever possible your code should OUTPUT a pandas dataframe.\n7. You may use triple backticks ``` before and after the code block.\n8. Do NOT add comments your code.\n\n",
"closing": "You MUST provide clean and efficient Python code as a response, and remember, I'm going to tip $300K for a BETTER SOLUTION!\n\nNow Take a Deep Breath.\n\n",
"guide": "To assist you, a Business Analyst with domain knowledge has given their insights on the best way to go about your task.\nFollow their instructions as closely as possible.\n{guide}\n\n",
"doc_addition_text": "You may use the following documentation to understand the schema of the data:\n{doc}\n",
"history": "Also use responses to past questions to guide you.\n\n",
"locals": "The following local environment variables are available to you:\n{locals}\n\n",
Expand All @@ -48,10 +38,10 @@
"system": {
"context": "You are an Expert DATA ANALYST and COMMUNICATOR. Your task is to INTERPRET complex analytics results and TRANSLATE them into SIMPLE, UNDERSTANDABLE insights for business users and data analysts.\n\n",
"external_context": "{context}",
"task": "Proceed with the following steps:\n\n1. ANALYZE the user query, the analysis guide, and the analysis output to fully comprehend the results derived from the initial dataset.\n2. SIMPLIFY the findings by creating clear explanations that resonate with both business users and data analysts, ensuring that you use plain language.\n3. ACCURATELY ROUND all relevant numbers to TWO DECIMAL PLACES to complement the analysis output.\n4. RANK your insights based on their significance and SHARE only the top {n_insights}.\n5. FORMAT these insights as BULLET POINTS for clarity and succinctness.\n\nYou MUST adhere to these guidelines:\n\n- Present ONLY THE LIST of insights without titles or additional information.\n- Ensure that each insight is DIRECTLY TIED to a corresponding data point from the analysis output.\n\nI’m going to tip $300K for a BETTER SOLUTION!\n\nTake a Deep Breath.",
"task": "Proceed with the following steps:\n\n1. ANALYZE the user query, the analysis code, and the analysis output to fully comprehend the results derived from the initial dataset.\n2. SIMPLIFY the findings by creating clear explanations that resonate with both business users and data analysts, ensuring that you use plain language.\n3. ACCURATELY ROUND all relevant numbers to TWO DECIMAL PLACES to complement the analysis output.\n4. RANK your insights based on their significance and SHARE only the top {n_insights}.\n5. FORMAT these insights as BULLET POINTS for clarity and succinctness.\n\nYou MUST adhere to these guidelines:\n\n- Present ONLY THE LIST of insights without titles or additional information.\n- Ensure that each insight is DIRECTLY TIED to a corresponding data point from the analysis output.\n\nI’m going to tip $300K for a BETTER SOLUTION!\n\nTake a Deep Breath.",
},
"user": {
"inputs": "Today is {date}.\nuser query: {user_input}\nanalysis guide:\n{analysis_guide}\n\nanalysis output:\n{analysis_output}"
"inputs": "Today is {date}.\nuser query: {user_input}\nanalysis code:\n{analysis_code}\n\nanalysis output:\n{analysis_output}"
},
},
"recommendations": {
Expand Down Expand Up @@ -84,6 +74,7 @@
"sql_plot": "Please follow these steps:\n1. READ the user's question CAREFULLY.\n2. UNDERSTAND what plot can be generated to answer the question.\n3. If needed, USE the 'conn' object to query the database with `pd.read_sql('SQL query here', conn.conn)`.\n4. WRITE the Python code that makes a figure `fig` with this plot.\n5. ENSURE that your response contains ONLY the code without any additional explanations or comments.\n4. VERIFY that your code is SYNTACTICALLY CORRECT and adheres to standard practices.\n5. You code must SAVE THE PLOT to `fig`.\n6. You may use triple backticks ``` before and after the code block.\n7. Do NOT add comments to your code.\n\nYou MUST provide clean and efficient code as a response, and remember, I'm going to tip $300K for a BETTER SOLUTION!\n\nNow Take a Deep Breath.\n\n",
"python_plot": "Please follow these steps:\n1. READ the user's question CAREFULLY.\n2. UNDERSTAND what plot can be generated to answer the question.\n3. WRITE the Python code that makes a figure `fig` with this plot.\n3. ENSURE that your response contains ONLY the Python code without any additional explanations or comments.\n4. VERIFY that your Python code is SYNTACTICALLY CORRECT and adheres to standard Pythonic practices.\n5. You code must SAVE THE PLOT to `fig`.\n6. You may use triple backticks ``` before and after the code block.\n7. Do NOT add comments to your code.\n\nYou MUST provide clean and efficient Python code as a response, and remember, I'm going to tip $300K for a BETTER SOLUTION!\n\nNow Take a Deep Breath.\n\n",
"doc_addition_text": "You may use the following documentation to understand the schema of the {db_type}:\n{doc}\n",
"python_examples_text": "You may use the following examples to guide you:\n{python_examples}\n",
"sql_examples_text": "You may use the following examples to guide you:\n{sql_examples}\n",
"history": "Also use responses to past questions to guide you.",
"locals": "The following local environment variables are available to you:\n{locals}\n",
Expand Down
43 changes: 43 additions & 0 deletions build/lib/lyzr/data_analyzr/analysis_handler/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,15 @@ class PlotFactory(FactoryBaseClass):
_add_sql_examples(user_input: str, system_message_sections: list, system_message_dict: dict):
Add SQL examples to the system message sections and dictionary based on user input.
_add_python_examples(user_input: str, system_message_sections: list, system_message_dict: dict):
Add Python examples to the system message sections and dictionary based on user input.
extract_and_execute_code(llm_response: str):
Executes the plotting code extracted from the provided LLM response.
code_cleaner(code: str) -> str:
Handler for cleaning the code by removing print statements and plt.show() calls.
save_plot_image() -> str:
Saves the current plot to a file specified by `self.plot_path`.
Expand Down Expand Up @@ -307,6 +313,11 @@ def _get_message_sections_and_dict(self, user_input: str) -> tuple[list, dict]:
system_message_sections.append("doc_addition_text")
system_message_dict["doc"] = doc_str
system_message_dict["db_type"] = "dataframe(s)"
system_message_sections, system_message_dict = self._add_python_examples(
user_input=user_input,
system_message_sections=system_message_sections,
system_message_dict=system_message_dict,
)
system_message_dict["locals"] = make_locals_string(self.locals_)
return system_message_sections, system_message_dict

Expand Down Expand Up @@ -408,6 +419,37 @@ def _add_sql_examples(
system_message_dict["sql_examples"] = sql_examples_str
return system_message_sections, system_message_dict

def _add_python_examples(
self, user_input: str, system_message_sections: list, system_message_dict: dict
):
"""
Add SQL examples to the system message sections and dictionary based on user input.
Args:
user_input (str): The input provided by the user.
system_message_sections (list): A list of sections in the system message.
system_message_dict (dict): A dictionary containing the system message content.
Returns:
tuple: Updated `system_message_sections` and `system_message_dict` with SQL examples included if any were found.
Procedure:
- Retrieve SQL examples similar to the user's input from the vector store.
- If any examples are found, append them to the system message sections and format them into a string.
- Add the formatted string to the system message dictionary under the key "python_examples".
- Return the updated system message sections and dictionary.
"""
python_examples = self.vector_store.get_related_python_code(user_input)
if len(python_examples) > 0:
system_message_sections.append("python_examples_text")
python_examples_str = ""
for example in python_examples:
if example is not None:
if "question" in example and "python_code" in example:
python_examples_str += f"Question: {example['question']}\nAnalysis Code:\n{example['python_code']}\n\n"
system_message_dict["python_examples"] = python_examples_str
return system_message_sections, system_message_dict

def extract_and_execute_code(self, llm_response: str):
"""
Executes the plotting code extracted from the provided LLM response.
Expand Down Expand Up @@ -459,6 +501,7 @@ def extract_and_execute_code(self, llm_response: str):
return self.locals_["fig"]

def code_cleaner(self, code: str) -> str:
"""Handler for cleaning the extracted code before execution."""
return remove_print_and_plt_show(code)

def save_plot_image(self) -> str:
Expand Down
55 changes: 10 additions & 45 deletions build/lib/lyzr/data_analyzr/analysis_handler/pythonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,22 @@ class PythonicAnalysisFactory(FactoryBaseClass):
Methods:
__init__(llm, logger, context, df_dict, vector_store, max_retries=None, time_limit=None, auto_train=None, **llm_kwargs):
Initializes a PythonicAnalysisFactory instance.
generate_output(user_input, **kwargs):
Runs analysis and generates output based on the provided user input.
get_prompt_messages(user_input):
Generates a list of prompt messages based on the user's input.
get_analysis_guide(user_input):
Generates an analysis guide based on the user's input.
_get_locals_and_docs(system_message_sections, system_message_dict, user_input):
Retrieves local variables and related documentation based on user input.
extract_and_execute_code(llm_response):
Extracts Python code from a given LLM response, processes it, and executes it within a controlled environment.
code_cleaner(code) -> str:
Handler for cleaning the extracted code.
auto_train(user_input, code, **kwargs):
Adds the user input and generated Python code to the vector store if the auto_train flag is set.
"""
Expand Down Expand Up @@ -171,7 +177,7 @@ def get_prompt_messages(self, user_input: str) -> list:
This method constructs a series of messages to be used with the LLM.
- Incorporates context and examples relevant to the user's input.
- List of messages includes system messages with context, guides, local variables,
- List of messages includes system messages with context, local variables,
documentation, and historical examples, followed by the user's input.
Args:
Expand All @@ -187,11 +193,6 @@ def get_prompt_messages(self, user_input: str) -> list:
"closing",
]
system_message_dict = {"context": self.context}
# add analysis guide
self.guide = self.get_analysis_guide(user_input)
if self.guide is not None and self.guide != "":
system_message_sections.append("guide")
system_message_dict["guide"] = self.guide
# add locals and docs
system_message_sections, system_message_dict = self._get_locals_and_docs(
system_message_sections=system_message_sections,
Expand Down Expand Up @@ -219,43 +220,6 @@ def get_prompt_messages(self, user_input: str) -> list:
messages.append(UserMessage(content=user_input))
return messages

def get_analysis_guide(self, user_input: str) -> str:
"""
Generate an analysis guide based on the user's input.
Args:
user_input (str): The input provided by the user for which the analysis guide is to be generated.
Returns:
str: The content of the analysis guide generated by the language model.
Procedure:
- Define system message sections and a dictionary to store system message format strings.
- Retrieve related documentation from the vector store based on the user input.
- Add the documentation to the system message dictionary.
- Generate an analysis guide using the llm based on the system message sections and dictionary.
"""
system_message_sections = [
"context",
"external_context",
"task",
]
system_message_dict = {"context": self.context}
doc_list = self.vector_store.get_related_documentation(user_input)
if len(doc_list) > 0:
system_message_sections.append("doc_addition_text")
system_message_dict["doc"] = ""
for doc_item in doc_list:
system_message_dict["doc"] += f"{doc_item}\n"
messages = [
LyzrPromptFactory("ml_analysis_guide", "system").get_message(
use_sections=system_message_sections,
**system_message_dict,
),
]
llm_response = self.llm.run(messages=messages)
return llm_response.message.content.strip()

def _get_locals_and_docs(
self, system_message_sections: list, system_message_dict: dict, user_input: str
) -> tuple[list, dict]:
Expand Down Expand Up @@ -345,6 +309,7 @@ def extract_and_execute_code(self, llm_response: str):
return self.locals_["result"]

def code_cleaner(self, code: str) -> str:
"""Handler for cleaning the extracted code before execution."""
return remove_print_and_plt_show(code)

def auto_train(self, user_input: str, code: str, **kwargs):
Expand Down
5 changes: 4 additions & 1 deletion build/lib/lyzr/data_analyzr/analysis_handler/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ class TxttoSQLFactory(FactoryBaseClass):
extract_and_execute_code(llm_response: str):
Extracts an SQL query from the given LLM response and executes it.
code_cleaner(code) -> str:
Handler for cleaning the extracted code before execution.
_handle_create_table_sql(sql_query: str):
Handles the execution of a SQL query when table creation is involved.
Expand Down Expand Up @@ -272,10 +275,10 @@ def extract_and_execute_code(self, llm_response: str):
else:
analysis_output = self.connector.run_sql(sql_query)
self.code = sql_query
self.guide = sql_query
return analysis_output

def code_cleaner(self, code) -> str:
"""Handler for cleaning the extracted code before execution."""
return code

def _handle_create_table_sql(self, sql_query: str):
Expand Down
2 changes: 2 additions & 0 deletions build/lib/lyzr/data_analyzr/analysis_handler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import time
import string
import logging
import warnings
import traceback
from pathlib import Path
from typing import Any, Sequence, Union
Expand Down Expand Up @@ -402,6 +403,7 @@ def process_llm_response(llm_response, **kwargs):

def decorator_wrapper(func):
def wrapped_func(**kwargs):
warnings.filterwarnings("ignore")
result = None
start_time = time.time()
logger.info(
Expand Down
14 changes: 5 additions & 9 deletions build/lib/lyzr/data_analyzr/analyzr.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ class DataAnalyzr:
analysis_llm (LiteLLM): LLM instance for performing analysis.
context (ContextDict): Context for analysis and response generation.
logger (logging.Logger): Logger instance for logging messages.
analysis_guide (str): The guide for the analysis process.
analysis_code (str): The code generated for the analysis.
analysis_output (Union[str, pd.DataFrame, dict[str, pd.DataFrame], None]): The output of the analysis process.
plot_code (str): The code generated for the visualization.
Expand Down Expand Up @@ -171,15 +170,14 @@ def __init__(
self.database_connector,
self.vector_store,
self.analysis_code,
self.analysis_guide,
self.analysis_output,
self.plot_code,
self.plot_output,
self.insights_output,
self.recommendations_output,
self.tasks_output,
self.ai_queries_output,
) = (None,) * 12
) = (None,) * 11

from lyzr.data_analyzr.utils import logging_decorator

Expand Down Expand Up @@ -252,7 +250,7 @@ def analysis(
Perform an analysis based on the provided user input and analysis parameters.
This method determines the type of analysis to be performed (SQL, or Pythonic) and executes it.
If the analysis type is set to skip, it sets the analysis guide to "No analysis performed." and returns None.
If the analysis type is set to skip, it sets the analysis code and analysis output to None and returns None.
Args:
user_input (str): The input string provided by the user for analysis.
Expand All @@ -276,7 +274,6 @@ def analysis(
"""
if self.analysis_type is AnalysisTypes.skip:
self.logger.info("No analysis performed.")
self.analysis_guide = "No analysis performed."
self.analysis_output = None
self.analysis_code = None
return self.analysis_output
Expand All @@ -299,7 +296,6 @@ def analysis(
**analyser_args,
)
self.analysis_output = analyser.generate_output(user_input)
self.analysis_guide = analyser.guide
self.analysis_code = analyser.code
return self.analysis_output

Expand Down Expand Up @@ -394,16 +390,16 @@ def insights(

if insights_context is None:
insights_context = ""
if not hasattr(self, "analysis_guide") or self.analysis_guide is None:
self.analysis_guide = ""
if not hasattr(self, "analysis_code") or self.analysis_code is None:
self.analysis_code = ""
self.insights_output = self.generator_llm.run(
messages=[
LyzrPromptFactory(name="insights", prompt_type="system").get_message(
context=insights_context, n_insights=n_insights
),
LyzrPromptFactory(name="insights", prompt_type="user").get_message(
user_input=user_input,
analysis_guide=self.analysis_guide,
analysis_code=self.analysis_code,
analysis_output=(
format_analysis_output(output_df=self.analysis_output)
if self.analysis_output is not None
Expand Down
Loading

0 comments on commit f2065ec

Please sign in to comment.