Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] SEC ETF Holdings - Try Catch for RemoteDisconnect Error #6359

Merged
merged 4 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions openbb_platform/providers/sec/openbb_sec/models/etf_holdings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# pylint: disable =[unused-argument,too-many-locals,too-many-branches]

import asyncio
from datetime import date as dateType
from typing import Any, Dict, List, Optional, Union
from warnings import warn
Expand Down Expand Up @@ -329,9 +330,22 @@ async def aextract_data(
**kwargs: Any,
) -> Dict:
"""Return the raw data from the SEC endpoint."""
filings = await get_nport_candidates(
symbol=query.symbol, use_cache=query.use_cache
)
# Implement a retry mechanism in case of RemoteDiconnected Error.
retries = 3
for i in range(retries):
filings = []
try:
filings = await get_nport_candidates(
symbol=query.symbol, use_cache=query.use_cache
)
if filings:
break
except Exception as e:
if i < retries - 1:
warn(f"Error: {e}. Retrying...")
await asyncio.sleep(1)
continue
raise e
filing_candidates = pd.DataFrame.from_records(filings)
if filing_candidates.empty:
raise ValueError(f"No N-Port records found for {query.symbol}.")
Expand Down
88 changes: 1 addition & 87 deletions openbb_platform/providers/sec/openbb_sec/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,16 @@

# pylint: disable =unused-argument

from datetime import timedelta
from io import BytesIO
from typing import Dict, List, Optional, Union
from zipfile import ZipFile

import pandas as pd
import requests
import requests_cache
from aiohttp_client_cache import SQLiteBackend
from aiohttp_client_cache.session import CachedSession
from openbb_core.app.utils import get_user_cache_directory
from openbb_core.provider.utils.helpers import amake_request, make_request
from openbb_sec.utils.definitions import HEADERS, QUARTERS, SEC_HEADERS, TAXONOMIES
from openbb_sec.utils.definitions import HEADERS, SEC_HEADERS


async def sec_callback(response, session):
Expand Down Expand Up @@ -167,89 +164,6 @@ async def cik_map(cik: Union[str, int], use_cache: bool = True) -> str:
return symbol


def get_frame( # pylint: disable =too-many-arguments
year: int,
quarter: Optional[QUARTERS] = None,
taxonomy: TAXONOMIES = "us-gaap",
units: str = "USD",
fact: str = "Revenues",
instantaneous: bool = False,
use_cache: bool = True,
) -> Dict:
"""Get a frame of data for a given fact.

The xbrl/frames API aggregates one fact for each reporting entity
that is last filed that most closely fits the calendrical period requested.

This API supports for annual, quarterly and instantaneous data:

https://data.sec.gov/api/xbrl/frames/us-gaap/AccountsPayableCurrent/USD/CY2019Q1I.json

Where the units of measure specified in the XBRL contains a numerator and a denominator,
these are separated by “-per-” such as “USD-per-shares”. Note that the default unit in XBRL is “pure”.

CY####Q# for quarterly data (duration 91 days +/- 30 days).
Because company financial calendars can start and end on any month or day and even change in length from quarter to
quarter according to the day of the week, the frame data is assembled by the dates that best align with a calendar
quarter or year. Data users should be mindful different reporting start and end dates for facts contained in a frame.

Example facts:
Revenues
GrossProfit
CostOfRevenue
DividendsCash
DistributedEarnings
AccountsPayableCurrent
OperatingExpenses
OperatingIncomeLoss
NoninterestIncome
InterestAndDebtExpense
IncomeTaxExpenseBenefit
NetIncomeLoss

Facts where units are, "shares":
WeightedAverageNumberOfDilutedSharesOutstanding
"""
if fact in ["WeightedAverageNumberOfDilutedSharesOutstanding"]:
units = "shares"
sec_session_frames = requests_cache.CachedSession(
f"{get_user_cache_directory()}/http/sec_frames", expire_after=timedelta(days=2)
)
url = f"https://data.sec.gov/api/xbrl/frames/{taxonomy}/{fact}/{units}/CY{year}"

if quarter:
url = url + f"Q{quarter}"

if instantaneous:
url = url + "I"
url = url + ".json"
r = (
requests.get(url, headers=HEADERS, timeout=5)
if use_cache is False
else sec_session_frames.get(url, headers=HEADERS, timeout=5)
)

if r.status_code != 200:
raise RuntimeError(f"Request failed with status code {r.status_code}")

response = r.json()

data = sorted(response["data"], key=lambda x: x["val"], reverse=True)
metadata = {
"frame": response["ccp"],
"tag": response["tag"],
"label": response["label"],
"description": response["description"],
"taxonomy": response["taxonomy"],
"unit": response["uom"],
"count": response["pts"],
}

results = {"metadata": metadata, "data": data}

return results


def get_schema_filelist(query: str = "", url: str = "", use_cache: bool = True) -> List:
"""Get a list of schema files from the SEC website."""
results: List = []
Expand Down
Loading