Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more input types to data processing commands #5453

Merged
merged 14 commits into from
Nov 8, 2023
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
"""OpenBB filters."""

import pandas as pd

from openbb_core.app.utils import df_to_basemodel
from openbb_core.app.utils import convert_to_basemodel


def filter_inputs(**kwargs) -> dict:
def filter_inputs(data_processing: bool = False, **kwargs) -> dict:
"""Filter command inputs."""
for key, value in kwargs.items():
if isinstance(value, pd.DataFrame):
kwargs[key] = df_to_basemodel(value, index=True)
if data_processing and key == "data":
kwargs[key] = convert_to_basemodel(value)

return kwargs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
Optional,
OrderedDict,
Type,
TypeVar,
Union,
get_args,
get_origin,
get_type_hints,
)

import numpy as np
import pandas as pd
from importlib_metadata import entry_points
from openbb_provider.abstract.data import Data
from pydantic.fields import FieldInfo
from pydantic_core import PydanticUndefined
from starlette.routing import BaseRoute
Expand All @@ -36,15 +39,28 @@
from openbb_core.app.router import CommandMap, RouterLoader
from openbb_core.env import Env

DataProcessingSupportedTypes = TypeVar(
"DataProcessingSupportedTypes",
list,
dict,
pd.DataFrame,
List[pd.DataFrame],
pd.Series,
List[pd.Series],
np.ndarray,
Data,
)


class Console:
"""Console to be used by builder and linters."""

def __init__(self, verbose: bool):
"""Initialize the console."""
self.verbose = verbose

def log(self, message: str, **kwargs):
"""Console log method"""
"""Console log method."""
if self.verbose or Env().DEBUG_MODE:
print(message, **kwargs) # noqa: T201

Expand All @@ -55,6 +71,7 @@ class PackageBuilder:
def __init__(
self, directory: Optional[Path] = None, lint: bool = True, verbose: bool = False
) -> None:
"""Initialize the package builder."""
self.directory = directory or Path(__file__).parent
self.lint = lint
self.verbose = verbose
Expand Down Expand Up @@ -86,7 +103,7 @@ def build(
self.run_linters()

def get_extension_map(self) -> Dict[str, List[str]]:
"""Get map of extensions available at build time"""
"""Get map of extensions available at build time."""
groups = ("openbb_core_extension", "openbb_provider_extension")
ext_map = {
g: sorted(
Expand All @@ -100,7 +117,7 @@ def get_extension_map(self) -> Dict[str, List[str]]:
return ext_map

def save_extension_map(self, ext_map: Dict[str, List[str]]) -> None:
"""Save the map of extensions available at build time"""
"""Save the map of extensions available at build time."""
code = dumps(obj=dict(sorted(ext_map.items())), indent=4)
self.console.log("Writing extension map...")
self.write_to_package(code=code, name="extension_map", extension="json")
Expand Down Expand Up @@ -252,6 +269,7 @@ def build(cls, path: str) -> str:
# TODO: Find a better way to handle this. This is a temporary solution.
code += "\nimport openbb_provider"
code += "\nimport pandas"
code += "\nimport numpy"
code += "\nimport datetime"
code += "\nimport pydantic"
code += "\nfrom pydantic import BaseModel"
Expand Down Expand Up @@ -681,6 +699,12 @@ def is_annotated_dc(annotation) -> bool:
annotation.__args__[0], "__dataclass_fields__"
)

@staticmethod
def is_data_processing_function(path: str) -> bool:
"""Check if the function is a data processing function."""
methods = PathHandler.build_route_map()[path].methods
return "POST" in methods

@staticmethod
def reorder_params(params: Dict[str, Parameter]) -> "OrderedDict[str, Parameter]":
"""Reorder the params."""
Expand All @@ -707,7 +731,7 @@ def format_params(
# will need to add some conversion code in the input filter.
TYPE_EXPANSION = {
"symbol": List[str],
"data": pd.DataFrame,
"data": DataProcessingSupportedTypes,
"start_date": str,
"end_date": str,
"provider": None,
Expand Down Expand Up @@ -752,11 +776,16 @@ def format_params(
)
else:
new_type = TYPE_EXPANSION.get(name, ...)
updated_type = (
param.annotation
if new_type is ...
else Union[param.annotation, new_type]
)

if hasattr(new_type, "__constraints__"):
types = new_type.__constraints__ + (param.annotation,)
updated_type = Union[types] # type: ignore
else:
updated_type = (
param.annotation
if new_type is ...
else Union[param.annotation, new_type]
)

formatted[name] = Parameter(
name=name,
Expand Down Expand Up @@ -895,6 +924,10 @@ def build_command_method_implementation(path: str, func: Callable):
code += "},\n"
else:
code += f" {name}={name},\n"

if MethodDefinition.is_data_processing_function(path):
code += " data_processing=True,\n"

code += " )\n\n"
code += " return self._run(\n"
code += f""" "{path}",\n"""
Expand Down Expand Up @@ -1002,6 +1035,7 @@ class Linters:
"""Run the linters for the Platform."""

def __init__(self, directory: Path, verbose: bool = False) -> None:
"""Initialize the linters."""
self.directory = directory
self.verbose = verbose
self.console = Console(verbose)
Expand Down
54 changes: 53 additions & 1 deletion openbb_platform/platform/core/openbb_core/app/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import ast
import json
from typing import Iterable, List, Optional, Union
from typing import Dict, Iterable, List, Optional, Union

import numpy as np
import pandas as pd
from openbb_provider.abstract.data import Data
from pydantic import ValidationError

from openbb_core.app.model.preferences import Preferences
from openbb_core.app.model.system_settings import SystemSettings
Expand Down Expand Up @@ -57,6 +59,56 @@ def df_to_basemodel(
]


def list_to_basemodel(data_list: List) -> List[Data]:
"""Convert a list to a list of BaseModel."""
base_models = []
for item in data_list:
if isinstance(item, Data) or issubclass(type(item), Data):
base_models.append(item)
elif isinstance(item, dict):
base_models.append(Data(**item))
elif isinstance(item, (pd.DataFrame, pd.Series)):
base_models.extend(df_to_basemodel(item))
else:
raise ValueError(f"Unsupported list item type: {type(item)}")
return base_models


def dict_to_basemodel(data_dict: Dict) -> Data:
"""Convert a dictionary to BaseModel."""
try:
return Data(**data_dict)
except ValidationError as e:
raise ValueError(
f"Validation error when converting dict to BaseModel: {e}"
) from e


def ndarray_to_basemodel(array: np.ndarray) -> List[Data]:
"""Convert a NumPy array to list of BaseModel."""
# Assuming a 2D array where rows are records
if array.ndim != 2:
raise ValueError("Only 2D arrays are supported.")
return [
Data(**{f"column_{i}": value for i, value in enumerate(row)}) for row in array
]


def convert_to_basemodel(data) -> Union[Data, List[Data]]:
"""Dispatch function to convert different types to BaseModel."""
if isinstance(data, Data) or issubclass(type(data), Data):
return data
if isinstance(data, list):
return list_to_basemodel(data)
if isinstance(data, dict):
return dict_to_basemodel(data)
if isinstance(data, (pd.DataFrame, pd.Series)):
return df_to_basemodel(data)
if isinstance(data, np.ndarray):
return ndarray_to_basemodel(data)
raise ValueError(f"Unsupported data type: {type(data)}")


def get_target_column(df: pd.DataFrame, target: str) -> pd.Series:
"""Get target column from time series data."""
if target not in df.columns:
Expand Down
57 changes: 52 additions & 5 deletions openbb_platform/platform/core/tests/app/static/test_filters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Test the filters.py file."""

import numpy as np
import pandas as pd
import pytest
from openbb_core.app.static.filters import filter_inputs
from pandas import DataFrame
from openbb_provider.abstract.data import Data


def test_filter_inputs_not_df():
Expand All @@ -14,8 +17,52 @@ def test_filter_inputs_not_df():

def test_filter_inputs_df():
"""Test filter_inputs."""
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
kwargs = {"df": df}
kwargs = filter_inputs(**kwargs)
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
kwargs = {"data": df}
kwargs = filter_inputs(data_processing=True, **kwargs)

assert isinstance(kwargs["data"], list)


# Example instances of each supported type for testing
example_dict = {"a": 1, "b": 2}
example_list = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
example_series = pd.Series([1, 2, 3])
example_dataframe = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
example_ndarray = np.array([[1, 2], [3, 4]])
example_data_list_series = [pd.Series([4, 5, 6])]
example_data_list_df = [pd.DataFrame({"col3": [5, 6], "col4": [7, 8]})]

# Create a list of scenarios to test
test_data = [
example_dict,
example_list,
example_series,
example_dataframe,
example_ndarray,
example_data_list_series,
example_data_list_df,
]


@pytest.mark.parametrize("input_data", test_data)
def test_filter_inputs(
input_data,
):
result = filter_inputs(data=input_data, data_processing=True)

# Assert that the result is a dictionary
assert isinstance(result, dict), "filter_inputs should return a dictionary"

# Assert that the 'data' key is present in the result
assert "data" in result, "Resulting dictionary should have a 'data' key"

assert isinstance(kwargs["df"], list)
# Assert that the type of 'data' in the result is the expected type
if isinstance(result["data"], list):
assert isinstance(
result["data"][0], Data
), f"The 'data' key should be a list of {Data.__name__}"
else:
assert isinstance(
result["data"], Data
), f"The 'data' key should be of type {Data.__name__}"
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from dataclasses import dataclass
from inspect import _empty
from unittest.mock import patch

import pandas
import pytest
Expand Down Expand Up @@ -300,9 +301,13 @@ def some_func():
"""Do some func doc."""
return 42

output = method_definition.build_command_method_implementation(
path="openbb_core.app.static.container.Container", func=some_func
)
with patch(
"openbb_core.app.static.package_builder.MethodDefinition.is_data_processing_function",
**{"return_value": False},
):
output = method_definition.build_command_method_implementation(
path="openbb_core.app.static.container.Container", func=some_func
)

assert output
assert isinstance(output, str)
Expand All @@ -315,11 +320,15 @@ def some_func():
"""Do some func doc."""
return 42

output = method_definition.build_command_method(
path="openbb_core.app.static.container.Container",
func=some_func,
model_name=None,
)
with patch(
"openbb_core.app.static.package_builder.MethodDefinition.is_data_processing_function",
**{"return_value": False},
):
output = method_definition.build_command_method(
path="openbb_core.app.static.container.Container",
func=some_func,
model_name=None,
)

assert output
assert isinstance(output, str)
Expand Down
Loading
Loading