diff --git a/openbb_terminal/econometrics/econometrics_controller.py b/openbb_terminal/econometrics/econometrics_controller.py index 6929150af062..1e39c7d83b43 100644 --- a/openbb_terminal/econometrics/econometrics_controller.py +++ b/openbb_terminal/econometrics/econometrics_controller.py @@ -1,7 +1,7 @@ """Econometrics Controller Module""" __docformat__ = "numpy" -# pylint: disable=too-many-lines, too-many-branches, inconsistent-return-statements +# pylint: disable=too-many-arguments,too-many-lines,too-many-branches,inconsistent-return-statements,R0904 import argparse import logging @@ -18,6 +18,7 @@ from openbb_terminal.custom_prompt_toolkit import NestedCompleter from openbb_terminal.decorators import log_start_end from openbb_terminal.econometrics import ( + econometrics_helpers, econometrics_model, econometrics_view, regression_model, @@ -54,12 +55,16 @@ class EconometricsController(BaseController): "show", "type", "desc", + "corr", + "season", "index", "clean", "add", + "eval", "delete", "combine", "rename", + "lag", "ols", "norm", "root", @@ -177,10 +182,13 @@ def __init__(self, queue: Optional[List[str]] = None): "root", "granger", "coint", + "corr", + "season", + "lag", ]: choices[feature] = dict() - # Inititialzie this for regressions to be able to use -h flag + # Initialize this for regressions to be able to use -h flag choices["regressions"] = {} self.choices = choices @@ -202,6 +210,8 @@ def update_runtime_choices(self): "norm", "root", "coint", + "season", + "lag", "regressions", "ols", "panel", @@ -216,6 +226,7 @@ def update_runtime_choices(self): "remove", "combine", "rename", + "corr", ]: self.choices[feature] = {c: {} for c in self.files} @@ -258,12 +269,16 @@ def print_help(self): mt.add_cmd("plot", self.files) mt.add_cmd("type", self.files) mt.add_cmd("desc", self.files) + mt.add_cmd("corr", self.files) + mt.add_cmd("season", self.files) mt.add_cmd("index", self.files) mt.add_cmd("clean", self.files) mt.add_cmd("add", self.files) + mt.add_cmd("eval", self.files) mt.add_cmd("delete", self.files) mt.add_cmd("combine", self.files) mt.add_cmd("rename", self.files) + mt.add_cmd("lag", self.files) mt.add_cmd("export", self.files) mt.add_info("_tests_") mt.add_cmd("norm", self.files) @@ -670,9 +685,123 @@ def call_desc(self, other_args: List[str]): else: console.print("Empty dataset") + @log_start_end(log=logger) + def call_corr(self, other_args: List[str]): + """Process correlation command""" + parser = argparse.ArgumentParser( + add_help=False, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + prog="corr", + description="Plot correlation coefficients.", + ) + parser.add_argument( + "-d", + "--dataset", + help="The name of the dataset you want to select", + dest="target_dataset", + type=str, + choices=list(self.datasets.keys()), + ) + + # if user does not put in --dataset + if other_args and "-" not in other_args[0][0]: + other_args.insert(0, "--dataset") + + ns_parser = self.parse_known_args_and_warn( + parser, + other_args, + EXPORT_ONLY_FIGURES_ALLOWED, + ) + + if ns_parser: + # check proper file name is provided + if not ns_parser.target_dataset: + console.print("[red]Please enter valid dataset.\n[/red]") + return + + data = self.datasets[ns_parser.target_dataset] + + econometrics_view.display_corr( + data, + ns_parser.export, + ) + + @log_start_end(log=logger) + def call_season(self, other_args: List[str]): + """Process season command""" + parser = argparse.ArgumentParser( + add_help=False, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + prog="season", + description="The seasonality for a given column", + ) + parser.add_argument( + "-v", + "--values", + help="Dataset.column values to be displayed in a plot", + dest="values", + choices={ + f"{dataset}.{column}": {column: None, dataset: None} + for dataset, dataframe in self.datasets.items() + for column in dataframe.columns + }, + type=str, + ) + parser.add_argument( + "-m", + help="A time lag to highlight on the plot", + dest="m", + type=int, + default=None, + ) + parser.add_argument( + "--max_lag", + help="The maximal lag order to consider", + dest="max_lag", + type=int, + default=24, + ) + parser.add_argument( + "-a", + "--alpha", + help="The confidence interval to display", + dest="alpha", + type=float, + default=0.05, + ) + if other_args and "-" not in other_args[0][0]: + other_args.insert(0, "-v") + ns_parser = self.parse_known_args_and_warn( + parser, other_args, export_allowed=EXPORT_ONLY_FIGURES_ALLOWED + ) + + if not ns_parser: + return + + if not ns_parser.values: + console.print("[red]Please enter valid dataset.\n[/red]") + return + + try: + dataset, col = ns_parser.values.split(".") + data = self.datasets[dataset] + data.name = dataset + except ValueError: + console.print("[red]Please enter 'dataset'.'column'.[/red]\n") + return + + econometrics_view.display_seasonality( + data=data, + column=col, + export=ns_parser.export, + m=ns_parser.m, + max_lag=ns_parser.max_lag, + alpha=ns_parser.alpha, + ) + @log_start_end(log=logger) def call_type(self, other_args: List[str]): - """Process type""" + """Process type command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -746,7 +875,7 @@ def call_type(self, other_args: List[str]): @log_start_end(log=logger) def call_index(self, other_args: List[str]): - """Process index""" + """Process index command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -864,7 +993,7 @@ def call_index(self, other_args: List[str]): @log_start_end(log=logger) def call_clean(self, other_args: List[str]): - """Process clean""" + """Process clean command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -914,7 +1043,7 @@ def call_clean(self, other_args: List[str]): @log_start_end(log=logger) def call_add(self, other_args: List[str]): - """Process add""" + """Process add command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -1017,9 +1146,109 @@ def call_add(self, other_args: List[str]): self.update_loaded() console.print() + @log_start_end(log=logger) + def call_lag(self, other_args: List[str]): + """Process lag command""" + parser = argparse.ArgumentParser( + add_help=False, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + prog="lag", + description="Add lag to a variable by shifting a column.", + ) + parser.add_argument( + "-v", + "--values", + help="Dataset.column values to add lag to.", + dest="values", + choices={ + f"{dataset}.{column}": {column: None, dataset: None} + for dataset, dataframe in self.datasets.items() + for column in dataframe.columns + }, + type=str, + required="-h" not in other_args, + ) + parser.add_argument( + "-l", + "--lags", + action="store", + dest="lags", + type=check_positive, + default=5, + help="How many periods to lag the selected column.", + required="-h" not in other_args, + ) + parser.add_argument( + "-f", + "--fill-value", + action="store", + dest="fill_value", + help="The value used for filling the newly introduced missing values.", + ) + + if other_args and "-" not in other_args[0][0]: + other_args.insert(0, "-v") + ns_parser = self.parse_known_args_and_warn( + parser, other_args, export_allowed=NO_EXPORT + ) + + if not ns_parser: + return + + try: + dataset, col = ns_parser.values.split(".") + data = self.datasets[dataset] + except ValueError: + console.print("[red]Please enter 'dataset'.'column'.[/red]\n") + return + + data[col + "_with_" + str(ns_parser.lags) + "_lags"] = data[col].shift( + ns_parser.lags, fill_value=ns_parser.fill_value + ) + self.datasets[dataset] = data + + self.update_runtime_choices() + + @log_start_end(log=logger) + def call_eval(self, other_args): + """Process eval command""" + parser = argparse.ArgumentParser( + add_help=False, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + prog="eval", + description="""Create custom data column from loaded datasets. Can be mathematical expressions supported + by pandas.eval() function. + + Example. If I have loaded `fred DGS2,DGS5` and I want to create a new column that is the difference + between these two, I can create a new column by doing `eval spread = DGS2 - DGS5`. + Notice that the command is case sensitive, i.e., `DGS2` is not the same as `dgs2`. + """, + ) + parser.add_argument( + "-q", + "--query", + type=str, + nargs="+", + dest="query", + required="-h" not in other_args, + help="Query to evaluate on loaded datasets", + ) + if other_args and "-" not in other_args[0][0]: + other_args.insert(0, "-q") + + ns_parser = self.parse_known_args_and_warn( + parser, other_args, export_allowed=EXPORT_ONLY_RAW_DATA_ALLOWED + ) + if ns_parser: + self.datasets = econometrics_helpers.create_new_entry( + self.datasets, " ".join(ns_parser.query) + ) + self.update_runtime_choices() + self.update_loaded() + @log_start_end(log=logger) def call_delete(self, other_args: List[str]): - """Process add""" + """Process delete command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -1060,7 +1289,7 @@ def call_delete(self, other_args: List[str]): @log_start_end(log=logger) def call_combine(self, other_args: List[str]): - """Process combine""" + """Process combine command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -1118,7 +1347,7 @@ def call_combine(self, other_args: List[str]): @log_start_end(log=logger) def call_rename(self, other_args: List[str]): - """Process rename""" + """Process rename command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, diff --git a/openbb_terminal/econometrics/econometrics_helpers.py b/openbb_terminal/econometrics/econometrics_helpers.py index a706c12eefdb..63b9df280f95 100644 --- a/openbb_terminal/econometrics/econometrics_helpers.py +++ b/openbb_terminal/econometrics/econometrics_helpers.py @@ -2,6 +2,8 @@ import pandas as pd +from openbb_terminal.rich_config import console + def get_datasets(data: Dict[str, pd.DataFrame]): datasets = {} @@ -18,3 +20,71 @@ def get_ending(dataset: str, column: str) -> str: if column: ending += f" of '{column}'" return ending + + +def create_new_entry(dataset: Dict[str, pd.DataFrame], query: str) -> Dict: + """Create a new series based off previously loaded columns + + Parameters + ---------- + dataset: Dict[str,pd.DataFrame] + Economy datasets that are loaded + query: str + Query to execute + + Returns + ------- + Dict[str, pd.DataFrame] + """ + # Create a single dataframe from dictionary of dataframes + columns = [] + data = pd.DataFrame() + for _, df in dataset.items(): + if not df.empty: + columns.extend(df.columns) + data = pd.concat([data, df], axis=1) + # In order to account for potentially different index time steps, lets dropNans here. + # Potentially problematic down the road + data = data.dropna(axis=0) + + # Eval the query to generate new sequence + # if there is an = in the query, then there will be a new named column + if "=" in query: + new_column = query.split("=")[0].replace(" ", "") + if new_column in data.columns: + query = query.replace(new_column, new_column + "_duplicate") + new_column += "_duplicate" + # Wrap the eval in a syntax error in case the user does something not allowed + try: + new_df = data.eval(query) + except SyntaxError: + console.print( + "[red]Invalid syntax in query. Please enter something of the form `newcol=col1 + col2`[/red]\n" + ) + return dataset + except pd.errors.UndefinedVariableError as e: + console.print(f"[red]{e}[/red]") + return dataset + + # If custom exists in the dictionary, we need to append the current dataframe + if "custom" in dataset: + dataset["custom"] = pd.concat([dataset["custom"], new_df[[new_column]]]) + else: + dataset["custom"] = new_df[[new_column]] + return dataset + + # If there is not an equal (namely .eval(colA + colB), the result will be a series + # and not a dataframe. We can just call this custom_exp + + try: + data = pd.DataFrame(data.eval(query), columns=["custom_exp"]) + dataset["custom"] = data + except SyntaxError: + console.print( + "Invalid syntax in query. Please enter something of the form `newcol=col1 + col2`" + ) + return dataset + except pd.errors.UndefinedVariableError as e: + console.print(f"[red]{e}[/red]") + return dataset + return dataset diff --git a/openbb_terminal/econometrics/econometrics_model.py b/openbb_terminal/econometrics/econometrics_model.py index 3d033d667e88..60809a343dc2 100644 --- a/openbb_terminal/econometrics/econometrics_model.py +++ b/openbb_terminal/econometrics/econometrics_model.py @@ -61,6 +61,24 @@ def get_options( return option_tables +@log_start_end(log=logger) +def get_corr_df(data: pd.DataFrame) -> pd.DataFrame: + """Returns correlation for a given df + + Parameters + ---------- + data: pd.DataFrame + The df to produce statistics for + + Returns + ------- + df: pd.DataFrame + The df with the new data + """ + corr = data.corr(numeric_only=True) + return corr + + @log_start_end(log=logger) def clean( dataset: pd.DataFrame, diff --git a/openbb_terminal/econometrics/econometrics_view.py b/openbb_terminal/econometrics/econometrics_view.py index 7e6fd12a71a2..2f80a9f0f543 100644 --- a/openbb_terminal/econometrics/econometrics_view.py +++ b/openbb_terminal/econometrics/econometrics_view.py @@ -136,6 +136,123 @@ def display_plot( return None +@log_start_end(log=logger) +def display_corr( + dataset: pd.DataFrame, + export: str = "", + sheet_name: Optional[str] = None, + external_axes: bool = False, +) -> Union[OpenBBFigure, None]: + """Plot correlation coefficients for dataset features + + Parameters + ---------- + dataset : pd.DataFrame + The dataset fore calculating correlation coefficients + sheet_name: str + Optionally specify the name of the sheet the data is exported to. + export: str + Format to export image + external_axes : bool, optional + Whether to return the figure object or not, by default False + """ + + fig = OpenBBFigure() + + # correlation + correlation = econometrics_model.get_corr_df(dataset) + fig.add_heatmap( + z=correlation, + x=correlation.columns, + y=correlation.index, + zmin=correlation.values.min(), + zmax=1, + showscale=True, + text=correlation, + texttemplate="%{text:.2f}", + colorscale="electric", + colorbar=dict( + thickness=10, + thicknessmode="pixels", + x=1.2, + y=1, + xanchor="right", + yanchor="top", + xpad=10, + ), + xgap=1, + ygap=1, + ) + fig.update_yaxes(autorange="reversed") + fig.update_layout(margin=dict(l=0, r=120, t=0, b=0), title="Correlation Matrix") + + export_data( + export, + os.path.dirname(os.path.abspath(__file__)), + "plot", + sheet_name=sheet_name, + figure=fig, + ) + return fig.show(external=external_axes) + + +@log_start_end(log=logger) +def display_seasonality( + data: pd.DataFrame, + column: str = "close", + export: str = "", + sheet_name: Optional[str] = None, + m: Optional[int] = None, + max_lag: int = 24, + alpha: float = 0.05, + external_axes: bool = False, +) -> Union[OpenBBFigure, None]: + """Plot seasonality from a dataset + + Parameters + ---------- + data: pd.DataFrame + The dataframe to plot + column: str + The column of the dataframe to analyze + sheet_name: str + Optionally specify the name of the sheet the data is exported to. + export: str + Format to export image + m: Optional[int] + Optionally, a time lag to highlight on the plot. Default is none. + max_lag: int + The maximal lag order to consider. Default is 24. + alpha: float + The confidence interval to display. Default is 0.05. + external_axes : bool, optional + Whether to return the figure object or not, by default False + """ + + if data.empty: + return console.print("No data to plot") + + series = data[column] + + ending = get_ending(data.name, column) + + fig = OpenBBFigure() + fig.set_title(f"Seasonality{ending}", wrap=True, wrap_width=55) + fig.add_corr_plot(series, m=m, max_lag=max_lag, alpha=alpha) + fig.update_xaxes(autorange=False, range=[-1, max_lag + 1]) + fig.add_legend_label() + + export_data( + export, + os.path.dirname(os.path.abspath(__file__)), + "plot", + sheet_name=sheet_name, + figure=fig, + ) + + return fig.show(external=external_axes) + + @log_start_end(log=logger) def display_norm( data: pd.Series, diff --git a/openbb_terminal/miscellaneous/i18n/en.yml b/openbb_terminal/miscellaneous/i18n/en.yml index 671c686ed801..14fbcf8bfc82 100644 --- a/openbb_terminal/miscellaneous/i18n/en.yml +++ b/openbb_terminal/miscellaneous/i18n/en.yml @@ -960,13 +960,17 @@ en: econometrics/plot: plot data from a dataset econometrics/type: change types of the columns or display their types econometrics/desc: show descriptive statistics of a dataset + econometrics/corr: Plot the correlation coefficients for dataset features + econometrics/season: Plot the seasonality for a dataset column econometrics/index: set (multi) index based on columns econometrics/clean: clean a dataset by filling or dropping NaNs econometrics/modify: combine columns of datasets and delete or rename columns econometrics/add: Add columns to dataset with option to use formulas + econometrics/eval: create new series by performing operations on loaded data econometrics/delete: Delete columns from dataset econometrics/combine: Combine columns from different datasets econometrics/rename: Rename column from dataset + econometrics/lag: Add lag to a variable by shifting a column econometrics/_regression_: Regression econometrics/_regression_tests_: Regression Tests econometrics/ols: fit a (multi) linear regression model diff --git a/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics_base.openbb b/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics_base.openbb index 71dceee00547..465fcb0b0f2c 100644 --- a/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics_base.openbb +++ b/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics_base.openbb @@ -5,11 +5,15 @@ econometrics load nile desc nile load nile -a nile_2 +eval double_volume = volume * 2 +lag -v custom.double -l 5 combine nile -c nile_2.volume load -f 20221008_221605_dataset.xlsx rename -d 20221008_221605_dataset -o population -n pop load -e load -f wage_panel -a wp +corr -d wp +season wp.hours add -n wp.after_1990 -b wp.year -s > -c 1990 add -n wp.after_1985 -b wp.year -s > -c 1985 show -n wp -s year -l 20