Skip to content

Commit

Permalink
Fixing fails to deliver bug and resulting NaN values in dataframe bug (
Browse files Browse the repository at this point in the history
…#1956)

* Fixing fails to deliver bug and resulting NaN values in dataframe bug

* Taking out print statement

Co-authored-by: didierlopes.eth <dro.lopes@campus.fct.unl.pt>
  • Loading branch information
simmonsj330 and DidierRLopes authored Jun 20, 2022
1 parent aa53f49 commit 0c60d70
Show file tree
Hide file tree
Showing 2 changed files with 441 additions and 391 deletions.
42 changes: 41 additions & 1 deletion openbb_terminal/stocks/dark_pool_shorts/sec_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,42 @@
logger = logging.getLogger(__name__)


@log_start_end(log=logger)
def catching_diff_url_formats(ftd_urls: list) -> list:
"""Catches if URL for SEC data is one of the few URLS that are not in the
standard format. Catches are for either specific date ranges that have a different
format or singular URLs that have a different format.
Parameters
----------
ftd_urls : list
list of urls of sec data
"""
feb_mar_apr_catch = ["202002", "202003", "202004"]
for i, ftd_url in enumerate(ftd_urls):
# URLs with dates prior to the first half of June 2017 have different formats
if int(ftd_url[58:64]) < 201706 or "201706a" in ftd_url:
ftd_urls[i] = ftd_url.replace(
"fails-deliver-data",
"frequently-requested-foia-document-fails-deliver-data",
)
# URLs between february, march, and april of 2020 have different formats
elif any(x in ftd_urls[i] for x in feb_mar_apr_catch):
ftd_urls[i] = ftd_url.replace(
"data/fails-deliver-data", "node/add/data_distribution"
)
# First half of october 2019 has a different format
elif (
ftd_url
== "https://www.sec.gov/files/data/fails-deliver-data/cnsfails201910a.zip"
):
ftd_urls[
i
] = "https://www.sec.gov/files/data/fails-deliver-data/cnsfails201910a_0.zip"

return ftd_urls


@log_start_end(log=logger)
def get_fails_to_deliver(
ticker: str,
Expand Down Expand Up @@ -109,6 +145,10 @@ def get_fails_to_deliver(

ftd_urls = [base_url + ftd_date + ".zip" for ftd_date in ftd_dates]

# Calling function that catches a handful of urls that are slightly
# different than the standard format
ftd_urls = catching_diff_url_formats(ftd_urls)

for ftd_link in ftd_urls:
all_ftds = pd.read_csv(
ftd_link,
Expand All @@ -117,7 +157,7 @@ def get_fails_to_deliver(
engine="python",
skipfooter=2,
usecols=[0, 2, 3, 5],
dtype={"QUANTITY (FAILS)": "int"},
dtype={"QUANTITY (FAILS)": "Int64"},
encoding="iso8859",
)
tmp_ftds = all_ftds[all_ftds["SYMBOL"] == ticker]
Expand Down
Loading

0 comments on commit 0c60d70

Please sign in to comment.