Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve issues while running Automatic update of daily frequency data (from yahoo finance) for US region #1358

Merged
merged 10 commits into from
Dec 5, 2022
25 changes: 24 additions & 1 deletion scripts/data_collector/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,30 @@ def __init__(

def _executor(self, file_path: Path):
file_path = Path(file_path)
df = pd.read_csv(file_path)
df = pd.read_csv(
file_path,
dtype={self._symbol_field_name: str},
keep_default_na=False,
HyeongminMoon marked this conversation as resolved.
Show resolved Hide resolved
na_values=[
"",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
"NULL",
"NaN",
"n/a",
"nan",
"null",
],
)
df = self._normalize_obj.normalize(df)
if df is not None and not df.empty:
if self._end_date is not None:
Expand Down
6 changes: 5 additions & 1 deletion scripts/data_collector/yahoo/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,10 @@ class YahooNormalizeUS1d(YahooNormalizeUS, YahooNormalize1d):
pass


class YahooNormalizeUS1dExtend(YahooNormalizeUS, YahooNormalize1dExtend):
pass


class YahooNormalizeUS1min(YahooNormalizeUS, YahooNormalize1minOffline):
CALC_PAUSED_NUM = False

Expand Down Expand Up @@ -1196,7 +1200,7 @@ def update_data_to_bin(
importlib.import_module(f"data_collector.{_region}_index.collector"), "get_instruments"
)
for _index in index_list:
get_instruments(str(qlib_data_1d_dir), _index)
get_instruments(str(qlib_data_1d_dir), _index, market_index=f"{_region}_index")


if __name__ == "__main__":
Expand Down