Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #912: [PPSS, Aimodel] predictoor_ss.aimodel_ss.input_feeds is … #913

Merged
merged 15 commits into from
Apr 19, 2024
35 changes: 20 additions & 15 deletions pdr_backend/aimodel/aimodel_data_factory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import sys
from typing import Optional, Tuple
from typing import List, Optional, Tuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -68,8 +68,8 @@ def create_xy(
self,
mergedohlcv_df: pl.DataFrame,
testshift: int,
feed: ArgFeed,
feeds: Optional[ArgFeeds] = None,
predict_feed: ArgFeed,
train_feeds: Optional[ArgFeeds] = None,
do_fill_nans: bool = True,
) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame, np.ndarray]:
"""
Expand All @@ -80,6 +80,8 @@ def create_xy(
@arguments
mergedohlcv_df -- *polars* DataFrame. See class docstring
testshift -- to simulate across historical test data
predict_feed -- feed to predict
train_feeds -- feeds to use for model inputs. If None use predict feed
do_fill_nans -- if any values are nan, fill them? (Via interpolation)
If you turn this off and mergedohlcv_df has nans, then X/y/etc gets nans

Expand All @@ -94,27 +96,30 @@ def create_xy(
assert "timestamp" in mergedohlcv_df.columns
assert "datetime" not in mergedohlcv_df.columns

# every column should be ordered with oldest first, youngest last.
# let's verify! The timestamps should be in ascending order
# condition mergedohlcv_df
# - every column should be ordered with oldest first, youngest last.
# let's verify! The timestamps should be in ascending order
uts = mergedohlcv_df["timestamp"].to_list()
assert uts == sorted(uts, reverse=False)

# condition inputs
if do_fill_nans and has_nan(mergedohlcv_df):
mergedohlcv_df = fill_nans(mergedohlcv_df)
ss = self.ss.aimodel_ss
x_dim_len = 0
if not feeds:
x_dim_len = ss.n
feeds = ss.feeds

# condition other inputs
train_feeds_list: List[ArgFeed]
if train_feeds:
train_feeds_list = train_feeds
else:
x_dim_len = len(feeds) * ss.autoregressive_n
train_feeds_list = [predict_feed]
ss = self.ss.aimodel_ss
x_dim_len = len(train_feeds_list) * ss.autoregressive_n

# main work
x_df = pd.DataFrame() # build this up
xrecent_df = pd.DataFrame() # ""

target_hist_cols = [
f"{feed.exchange}:{feed.pair}:{feed.signal}" for feed in feeds
f"{train_feed.exchange}:{train_feed.pair}:{train_feed.signal}"
for train_feed in train_feeds_list
]
for hist_col in target_hist_cols:
assert hist_col in mergedohlcv_df.columns, f"missing data col: {hist_col}"
Expand Down Expand Up @@ -146,7 +151,7 @@ def create_xy(

# y is set from yval_{exch_str, signal_str, pair_str}
# eg y = [BinEthC_-1, BinEthC_-2, ..., BinEthC_-450, BinEthC_-451]
hist_col = f"{feed.exchange}:{feed.pair}:{feed.signal}"
hist_col = f"{predict_feed.exchange}:{predict_feed.pair}:{predict_feed.signal}"
z = mergedohlcv_df[hist_col].to_list()
y = np.array(_slice(z, -testshift - N_train - 1, -testshift))

Expand Down
Loading
Loading