Skip to content

Commit

Permalink
fix: staticmethod LyzrLLMFactory and recommendations prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
gargimaheshwari committed Apr 18, 2024
1 parent 5847686 commit 9fd078b
Show file tree
Hide file tree
Showing 15 changed files with 36 additions and 34 deletions.
2 changes: 1 addition & 1 deletion build/lib/lyzr/base/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def describe_dataset(
raise ValueError("Please provide a valid pandas DataFrame.")

if model is None:
model = LyzrLLMFactory(
model = LyzrLLMFactory.from_defaults(
api_key=api_key,
api_type=model_type,
model=model_name,
Expand Down
4 changes: 2 additions & 2 deletions build/lib/lyzr/base/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@


class LyzrLLMFactory:

def from_defaults(self, model: str = DEFAULT_LLM, **kwargs) -> LLM:
@staticmethod
def from_defaults(model: str = DEFAULT_LLM, **kwargs) -> LLM:
# model_type -> api_type
# model_name -> model
# model_prompts -> Sequence[ChatMessage]
Expand Down
4 changes: 2 additions & 2 deletions build/lib/lyzr/base/prompt_texts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@
"inputs": "The user asked the following question: {user_input}\nGenerate recommendations that enhance the user's question or are related to it."
}
},
"analysis_guide": {
"ml_analysis_guide": {
"system": {
"context": "You are Business Analyst. You are an expert in your field. You are assisting a data analyst.\nYou are given a dataset and a question. Your job is to analyze these two inputs and determine how to answer the question based on the data.\n\n",
"external_context": "{context}",
"task": "You must determine what type of analysis should be performed on the dataset in order to answer the question.\nYou should then list out the steps that the data analyst should take to perform the analysis.\nLimit your total response to 100 words.\nYou should address the data analyst directly.",
},
"user": {"inputs": "{df_details}\nQuestion: {question}"},
},
"analysis_steps": {
"ml_analysis_steps": {
"system": {
"task": "You are a Senior Data Scientist. You have been asked a question on a dataframe.\nYour job is to analyze the given dataframe `df` to answer the question.\n\nTo assist you, a Business Analyst with domain knowledge has given their insights on the best way to go about your task.\nThe Business Analyst has also shared the names of the columns required in the resultant dataframe.\nFollow their instructions as closely as possible.\n\nMake sure that you clean the data before you analyze it.\n\nYour answer should be in the form of a python JSON object, following the given format:\n{schema}\n\nA. The value of 'analysis_df' should be the name of the dataframe on which this analysis is to be performed.\nB. The value of 'steps' should be a list of dictionaries. Each dictionary should contain the following keys: 'step', 'task', 'type', 'args'.\n The following values are available for these keys. ONLY USE THESE VALUES.\n 1. Step: A number indicating the order of the step. Numbering should start from 1.\n 2. Task: The task to be performed. The task can be one of the following: 'clean_data', 'transform', 'math_operation', 'analysis'\n 3. Type: The type of task to be performed.\n 3a. For task 'clean_data', following types are available: 'convert_to_datetime', 'convert_to_numeric', 'convert_to_categorical'\n 3b. For task 'transform', following types are available: 'one_hot_encode', 'ordinal_encode', 'scale', 'extract_time_period', 'select_indices'\n 3c. For task 'math_operation', following types are available: 'add', 'subtract', 'multiply', 'divide'\n 3d. For task 'analysis', following types are available: 'sortvalues', 'filter', 'mean', 'sum', 'cumsum', 'groupby', 'correlation', 'regression', 'classification', 'clustering', 'forecast'\n 4. Args: The arguments required to perform the task. The arguments should be in the form of a dictionary.\n 4a. For task 'clean_data' - 'columns': list\n 4b. For task 'transform', type 'one_hot_encode', 'ordinal_encode', and 'scale' - 'columns': list\n 4c. For task 'transform', type 'extract_time_period' - 'columns': list, 'period_to_extract': Literal['week', 'month', 'year', 'day', 'hour', 'minute', 'second', 'weekday']\n 4d. For task 'transform', type 'select_indices' - 'columns': list, 'indices': list\n 4e. For task 'math_operation' - 'columns': list, 'result': str (the name of the column to store the result in)\n 4f. For task 'analysis', type 'groupby' - 'columns': list, 'agg': Union[str, list], 'agg_col': Optional[list]\n 4g. For task 'analysis', type 'sortvalues' - columns: list, 'ascending': Optional[bool]\n 4h. For task 'analysis', type 'filter' - 'columns': list, 'values': list[Any] (the values to compare the columns to), 'relations': list[Literal['lessthan', 'greaterthan', 'lessthanorequalto', 'greaterthanorequalto', 'equalto', 'notequalto', 'startswith', 'endswith', 'contains']]\n 4i. For task 'analysis', types 'mean', 'cumsum', and 'sum' - 'columns': list\n 4j. For task 'analysis', type 'correlation' - 'columns': list, 'method': Optional[Literal['pearson', 'kendall', 'spearman']]\n 4k. For task 'analysis', type 'regression' - 'x': list, 'y': list\n 4l. For task 'analysis', type 'classification' - 'x': list, 'y': list\n 4m. For task 'analysis', type 'clustering' - 'x': list, 'y': list\n 4n. For task 'analysis', type 'forecast' - 'time_column': str, 'y_column': str, 'end': Optional[str], 'steps': Optional[int] # you must pass either 'end' - the date until which to forecast or 'steps' - the number of steps to forecast\nC. The value of 'output_columns' should be a list of strings. Each string should be the name of a column in the dataframe. These columns should be the ones that are required to answer the question.\n\nDo not give any explanations. Only give the python JSON as the answer.\nThis JSON will be evaluated using the eval() function in python. Ensure that it is in the correct format, and has no syntax errors.\n\nOnly return this JSON with details of steps. Do not return anything else.\n\nBefore beginning, take a deep breath and relax. You are an expert in your field. You have done this many times before.\nYou may now begin."
},
Expand Down
11 changes: 6 additions & 5 deletions build/lib/lyzr/data_analyzr/analyzr.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __init__(
if analysis_type is None:
raise MissingValueError("`analysis_type` is a required parameter.")
if model is None:
self.model = LyzrLLMFactory().from_defaults(
self.model = LyzrLLMFactory.from_defaults(
model="gpt-4-1106-preview", api_key=api_key, seed=seed
)
elif isinstance(model, LiteLLM):
Expand Down Expand Up @@ -125,7 +125,7 @@ def _legacy_usage(
warnings.warn(
f"The `{param}` parameter is deprecated and will be removed in a future version. Please use the `analysis_model` parameter to set the analysis model, and the `gen_model` parameter to set the generation model."
)
self.model = model or LyzrLLMFactory().from_defaults(
self.model = model or LyzrLLMFactory.from_defaults(
api_key=api_key,
api_type=model_type,
model=model_name or os.environ.get("MODEL_NAME", "gpt-4-1106-preview"),
Expand Down Expand Up @@ -246,7 +246,7 @@ def analysis(
"No analysis performed. Analysis output is the given dataframe."
)
return self.analysis_output
analysis_model = LyzrLLMFactory().from_defaults(model="gpt-3.5-turbo")
analysis_model = LyzrLLMFactory.from_defaults(model="gpt-3.5-turbo")
analysis_model.additional_kwargs["logger"] = self.logger
if self.analysis_type == "sql" and analysis_steps is None:
return self._txt_to_sql_analysis(
Expand Down Expand Up @@ -393,6 +393,7 @@ def recommendations(
if not use_insights:
insights = None
system_message_sections.append("task_no_insights")
user_message_dict["insights"] = ""
else:
system_message_sections.append("task_with_insights")
user_message_dict["insights"] = (
Expand All @@ -412,7 +413,7 @@ def recommendations(
]
elif output_type.lower().strip() == "text":
system_message_sections.append("text_type")
system_message_dict["n_recommendations"] = n_recommendations
system_message_dict["n_recommendations"] = n_recommendations

system_message_sections.append("closing")
self.recommendations_output = self.model.run(
Expand Down Expand Up @@ -461,7 +462,7 @@ def tasks(
context=tasks_context.strip() + "\n\n", n_tasks=n_tasks
),
LyzrPromptFactory(name="tasks", prompt_type="user").get_message(
user_input=user_input or self.user_input,
user_input=user_input,
insights=self.insights_output,
recommendations=self.recommendations_output,
),
Expand Down
12 changes: 6 additions & 6 deletions build/lib/lyzr/data_analyzr/ml_analysis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ def _get_analysis_guide(self, user_input: str) -> str:
output = self.model.run(
messages=[
LyzrPromptFactory(
name="analysis_guide", prompt_type="system"
name="ml_analysis_guide", prompt_type="system"
).get_message(
context=self.context,
),
LyzrPromptFactory(
name="analysis_guide", prompt_type="user"
name="ml_analysis_guide", prompt_type="user"
).get_message(
df_details=print_df_details(self.df_dict, self.df_info_dict),
question=user_input,
Expand Down Expand Up @@ -162,10 +162,10 @@ def _get_analysis_steps_messages_kwargs(self, user_input: str) -> tuple:
"output columns": ["col1", "col2", "col3"],
}
messages = [
LyzrPromptFactory(name="analysis_steps", prompt_type="system").get_message(
schema=schema
),
LyzrPromptFactory(name="analysis_steps", prompt_type="user").get_message(
LyzrPromptFactory(
name="ml_analysis_steps", prompt_type="system"
).get_message(schema=schema),
LyzrPromptFactory(name="ml_analysis_steps", prompt_type="user").get_message(
df_details=print_df_details(self.df_dict, self.df_info_dict),
question=user_input,
context=self.analysis_guide,
Expand Down
Binary file removed dist/lyzr-0.1.33.tar.gz
Binary file not shown.
Binary file not shown.
Binary file added dist/lyzr-0.1.34.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion lyzr.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: lyzr
Version: 0.1.33
Version: 0.1.34
Summary: UNKNOWN
Home-page: UNKNOWN
Author: lyzr
Expand Down
2 changes: 1 addition & 1 deletion lyzr/base/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def describe_dataset(
raise ValueError("Please provide a valid pandas DataFrame.")

if model is None:
model = LyzrLLMFactory(
model = LyzrLLMFactory.from_defaults(
api_key=api_key,
api_type=model_type,
model=model_name,
Expand Down
4 changes: 2 additions & 2 deletions lyzr/base/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@


class LyzrLLMFactory:

def from_defaults(self, model: str = DEFAULT_LLM, **kwargs) -> LLM:
@staticmethod
def from_defaults(model: str = DEFAULT_LLM, **kwargs) -> LLM:
# model_type -> api_type
# model_name -> model
# model_prompts -> Sequence[ChatMessage]
Expand Down
4 changes: 2 additions & 2 deletions lyzr/base/prompt_texts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@
"inputs": "The user asked the following question: {user_input}\nGenerate recommendations that enhance the user's question or are related to it."
}
},
"analysis_guide": {
"ml_analysis_guide": {
"system": {
"context": "You are Business Analyst. You are an expert in your field. You are assisting a data analyst.\nYou are given a dataset and a question. Your job is to analyze these two inputs and determine how to answer the question based on the data.\n\n",
"external_context": "{context}",
"task": "You must determine what type of analysis should be performed on the dataset in order to answer the question.\nYou should then list out the steps that the data analyst should take to perform the analysis.\nLimit your total response to 100 words.\nYou should address the data analyst directly.",
},
"user": {"inputs": "{df_details}\nQuestion: {question}"},
},
"analysis_steps": {
"ml_analysis_steps": {
"system": {
"task": "You are a Senior Data Scientist. You have been asked a question on a dataframe.\nYour job is to analyze the given dataframe `df` to answer the question.\n\nTo assist you, a Business Analyst with domain knowledge has given their insights on the best way to go about your task.\nThe Business Analyst has also shared the names of the columns required in the resultant dataframe.\nFollow their instructions as closely as possible.\n\nMake sure that you clean the data before you analyze it.\n\nYour answer should be in the form of a python JSON object, following the given format:\n{schema}\n\nA. The value of 'analysis_df' should be the name of the dataframe on which this analysis is to be performed.\nB. The value of 'steps' should be a list of dictionaries. Each dictionary should contain the following keys: 'step', 'task', 'type', 'args'.\n The following values are available for these keys. ONLY USE THESE VALUES.\n 1. Step: A number indicating the order of the step. Numbering should start from 1.\n 2. Task: The task to be performed. The task can be one of the following: 'clean_data', 'transform', 'math_operation', 'analysis'\n 3. Type: The type of task to be performed.\n 3a. For task 'clean_data', following types are available: 'convert_to_datetime', 'convert_to_numeric', 'convert_to_categorical'\n 3b. For task 'transform', following types are available: 'one_hot_encode', 'ordinal_encode', 'scale', 'extract_time_period', 'select_indices'\n 3c. For task 'math_operation', following types are available: 'add', 'subtract', 'multiply', 'divide'\n 3d. For task 'analysis', following types are available: 'sortvalues', 'filter', 'mean', 'sum', 'cumsum', 'groupby', 'correlation', 'regression', 'classification', 'clustering', 'forecast'\n 4. Args: The arguments required to perform the task. The arguments should be in the form of a dictionary.\n 4a. For task 'clean_data' - 'columns': list\n 4b. For task 'transform', type 'one_hot_encode', 'ordinal_encode', and 'scale' - 'columns': list\n 4c. For task 'transform', type 'extract_time_period' - 'columns': list, 'period_to_extract': Literal['week', 'month', 'year', 'day', 'hour', 'minute', 'second', 'weekday']\n 4d. For task 'transform', type 'select_indices' - 'columns': list, 'indices': list\n 4e. For task 'math_operation' - 'columns': list, 'result': str (the name of the column to store the result in)\n 4f. For task 'analysis', type 'groupby' - 'columns': list, 'agg': Union[str, list], 'agg_col': Optional[list]\n 4g. For task 'analysis', type 'sortvalues' - columns: list, 'ascending': Optional[bool]\n 4h. For task 'analysis', type 'filter' - 'columns': list, 'values': list[Any] (the values to compare the columns to), 'relations': list[Literal['lessthan', 'greaterthan', 'lessthanorequalto', 'greaterthanorequalto', 'equalto', 'notequalto', 'startswith', 'endswith', 'contains']]\n 4i. For task 'analysis', types 'mean', 'cumsum', and 'sum' - 'columns': list\n 4j. For task 'analysis', type 'correlation' - 'columns': list, 'method': Optional[Literal['pearson', 'kendall', 'spearman']]\n 4k. For task 'analysis', type 'regression' - 'x': list, 'y': list\n 4l. For task 'analysis', type 'classification' - 'x': list, 'y': list\n 4m. For task 'analysis', type 'clustering' - 'x': list, 'y': list\n 4n. For task 'analysis', type 'forecast' - 'time_column': str, 'y_column': str, 'end': Optional[str], 'steps': Optional[int] # you must pass either 'end' - the date until which to forecast or 'steps' - the number of steps to forecast\nC. The value of 'output_columns' should be a list of strings. Each string should be the name of a column in the dataframe. These columns should be the ones that are required to answer the question.\n\nDo not give any explanations. Only give the python JSON as the answer.\nThis JSON will be evaluated using the eval() function in python. Ensure that it is in the correct format, and has no syntax errors.\n\nOnly return this JSON with details of steps. Do not return anything else.\n\nBefore beginning, take a deep breath and relax. You are an expert in your field. You have done this many times before.\nYou may now begin."
},
Expand Down
11 changes: 6 additions & 5 deletions lyzr/data_analyzr/analyzr.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __init__(
if analysis_type is None:
raise MissingValueError("`analysis_type` is a required parameter.")
if model is None:
self.model = LyzrLLMFactory().from_defaults(
self.model = LyzrLLMFactory.from_defaults(
model="gpt-4-1106-preview", api_key=api_key, seed=seed
)
elif isinstance(model, LiteLLM):
Expand Down Expand Up @@ -125,7 +125,7 @@ def _legacy_usage(
warnings.warn(
f"The `{param}` parameter is deprecated and will be removed in a future version. Please use the `analysis_model` parameter to set the analysis model, and the `gen_model` parameter to set the generation model."
)
self.model = model or LyzrLLMFactory().from_defaults(
self.model = model or LyzrLLMFactory.from_defaults(
api_key=api_key,
api_type=model_type,
model=model_name or os.environ.get("MODEL_NAME", "gpt-4-1106-preview"),
Expand Down Expand Up @@ -246,7 +246,7 @@ def analysis(
"No analysis performed. Analysis output is the given dataframe."
)
return self.analysis_output
analysis_model = LyzrLLMFactory().from_defaults(model="gpt-3.5-turbo")
analysis_model = LyzrLLMFactory.from_defaults(model="gpt-3.5-turbo")
analysis_model.additional_kwargs["logger"] = self.logger
if self.analysis_type == "sql" and analysis_steps is None:
return self._txt_to_sql_analysis(
Expand Down Expand Up @@ -393,6 +393,7 @@ def recommendations(
if not use_insights:
insights = None
system_message_sections.append("task_no_insights")
user_message_dict["insights"] = ""
else:
system_message_sections.append("task_with_insights")
user_message_dict["insights"] = (
Expand All @@ -412,7 +413,7 @@ def recommendations(
]
elif output_type.lower().strip() == "text":
system_message_sections.append("text_type")
system_message_dict["n_recommendations"] = n_recommendations
system_message_dict["n_recommendations"] = n_recommendations

system_message_sections.append("closing")
self.recommendations_output = self.model.run(
Expand Down Expand Up @@ -461,7 +462,7 @@ def tasks(
context=tasks_context.strip() + "\n\n", n_tasks=n_tasks
),
LyzrPromptFactory(name="tasks", prompt_type="user").get_message(
user_input=user_input or self.user_input,
user_input=user_input,
insights=self.insights_output,
recommendations=self.recommendations_output,
),
Expand Down
Loading

0 comments on commit 9fd078b

Please sign in to comment.