Skip to content

Commit

Permalink
remove useless PIT code
Browse files Browse the repository at this point in the history
  • Loading branch information
you-n-g committed Mar 10, 2022
1 parent 9c67303 commit 69cf2ab
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 1,031 deletions.
1 change: 1 addition & 0 deletions docs/advanced/PIT.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,4 @@ The statements are soted by the `date` in ascending order from the beginning of
Known limitations
- Currently, the PIT database is designed for quarterly or annually factors, which can handle fundamental data of financial reports in most markets.
Qlib leverage the file name to identify the type of the data. File with name like `XXX_q.data` corresponds to quarterly data. File with name like `XXX_a.data` corresponds to annual data
- The caclulation of PIT is not performed in the optimal way. There is great potential to boost the performance of PIT data calcuation.
311 changes: 0 additions & 311 deletions qlib/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,314 +272,3 @@ class ExpressionOps(Expression):
"""

pass


class PExpression(abc.ABC):
"""PExpression base class"""

def __str__(self):
return type(self).__name__

def __repr__(self):
return str(self)

def __gt__(self, other):
if isinstance(other, Expression):
from .ops import Gt

return Gt(self, other)
else:
from .ops_period import PGt

return PGt(self, other)

def __ge__(self, other):
if isinstance(other, Expression):
from .ops import Ge

return Ge(self, other)
else:
from .ops_period import PGe

return PGe(self, other)

def __lt__(self, other):
if isinstance(other, Expression):
from .ops import Lt

return Lt(self, other)
else:
from .ops_period import PLt

return PLt(self, other)

def __le__(self, other):
if isinstance(other, Expression):
from .ops import Le

return Le(self, other)
else:
from .ops_period import PLe

return PLe(self, other)

def __eq__(self, other):
if isinstance(other, Expression):
from .ops import Eq

return Eq(self, other)
else:
from .ops_period import PEq

return PEq(self, other)

def __ne__(self, other):
if isinstance(other, Expression):
from .ops import Ne

return Ne(self, other)
else:
from .ops_period import PNe

return PNe(self, other)

def __add__(self, other):
if isinstance(other, Expression):
from .ops import Add

return Add(self, other)
else:
from .ops_period import PAdd

return PAdd(self, other)

def __radd__(self, other):
if isinstance(other, Expression):
from .ops import Add

return Add(other, self)
else:
from .ops_period import PAdd

return PAdd(other, self)

def __sub__(self, other):
if isinstance(other, Expression):
from .ops import Sub

return Sub(self, other)
else:
from .ops_period import PSub

return PSub(self, other)

def __rsub__(self, other):
if isinstance(other, Expression):
from .ops import Sub

return Sub(other, self)
else:
from .ops_period import PSub

return PSub(other, self)

def __mul__(self, other):
if isinstance(other, Expression):
from .ops import Mul

return Mul(self, other)
else:
from .ops_period import PMul

return PMul(self, other)

def __rmul__(self, other):
if isinstance(other, Expression):
from .ops import Mul

return Mul(other, self)
else:
from .ops_period import PMul

return PMul(other, self)

def __div__(self, other):
if isinstance(other, Expression):
from .ops import Div

return Div(self, other)
else:
from .ops_period import PDiv

return PDiv(self, other)

def __rdiv__(self, other):
if isinstance(other, Expression):
from .ops import Div

return Div(other, self)
else:
from .ops_period import PDiv

return PDiv(other, self)

def __truediv__(self, other):
if isinstance(other, Expression):
from .ops import Div

return Div(self, other)
else:
from .ops_period import PDiv

return PDiv(self, other)

def __rtruediv__(self, other):
if isinstance(other, Expression):
from .ops import Div

return Div(other, self)
else:
from .ops_period import PDiv

return PDiv(other, self)

def __pow__(self, other):
if isinstance(other, Expression):
from .ops import Power

return Power(self, other)
else:
from .ops_period import PPower

return PPower(self, other)

def __and__(self, other):
if isinstance(other, Expression):
from .ops import And

return And(self, other)
else:
from .ops_period import PAnd

return PAnd(self, other)

def __rand__(self, other):
if isinstance(other, Expression):
from .ops import And

return And(other, self)
else:
from .ops_period import PAnd

return PAnd(other, self)

def __or__(self, other):
if isinstance(other, Expression):
from .ops import Or

return Or(self, other)
else:
from .ops_period import POr

return POr(self, other)

def __ror__(self, other):
if isinstance(other, Expression):
from .ops import Or

return Or(other, self)
else:
from .ops_period import POr

return POr(other, self)

@abc.abstractmethod
def load_period_data(self, instrument, start_offset, end_offset, cur_index, **kwargs):
raise NotImplementedError("This function must be implemented in your newly defined feature")

@abc.abstractmethod
def get_period_offset(self, cur_index):
raise NotImplementedError("This function must be implemented in your newly defined feature")

def check_feature_exist(self, instrument):
child_exist_list = [
v.check_feature_exist(instrument) for k, v in self.__dict__.items() if isinstance(v, PExpression)
]
return all(child_exist_list)

def load(self, instrument, start_index, end_index, freq):

if not self.check_feature_exist(instrument):
get_module_logger("base").warning(f"WARN: period data not found for {str(self)}")
return pd.Series(dtype="float32", name=str(self))

from .cache import H

# cache
args = str(self), instrument, start_index, end_index, freq
if args in H["f"]:
return H["f"][args]
if start_index is None or end_index is None or start_index > end_index:
raise ValueError("Invalid index range: {} {}".format(start_index, end_index))

from .data import Cal

_calendar = Cal.calendar(freq=freq)
resample_data = np.empty(end_index - start_index + 1, dtype="float32")

for cur_index in range(start_index, end_index + 1):
cur_time = _calendar[cur_index]
# To load expression accurately, more historical data are required
start_offset = self.get_period_offset(cur_index)
# The calculated value will always the last element, so the end_offset is zero.
try:
resample_data[cur_index - start_index] = self.load_period_data(
instrument, start_offset, 0, cur_time, info=(start_index, end_index, cur_index)
).iloc[-1]
except FileNotFoundError:
get_module_logger("base").warning(f"WARN: period data not found for {str(self)}")
return pd.Series(dtype="float32", name=str(self))

resample_series = pd.Series(
resample_data, index=pd.RangeIndex(start_index, end_index + 1), dtype="float32", name=str(self)
)
H["f"][args] = resample_series
return resample_series

def get_longest_back_rolling(self):
return 0

def get_extended_window_size(self):
return 0, 0


# class PFeature(PExpression):
# def __init__(self, name=None):
# if name:
# self._name = name.lower()
# else:
# self._name = type(self).__name__.lower()
#
# def __str__(self):
# return "$$" + self._name
#
# def load_period_data(self, instrument, start_offset, end_offset, cur_index, **kwargs):
# # BUG: cur_idnex is a date!!!!!
# ### Zhou Code
# from .data import PITD
#
# return PITD.period_feature(instrument, str(self), start_offset, end_offset, cur_index, **kwargs)
# # return pd.Series([1, 2, 3]) # fot test
#
# def get_period_offset(self, cur_index):
# return 0


class PExpressionOps(PExpression):
"""Operator Expression
This kind of feature will use operator for feature
construction on the fly.
"""

pass
Loading

0 comments on commit 69cf2ab

Please sign in to comment.