Skip to content

Commit

Permalink
[BugFix] Intrinio News (#6336)
Browse files Browse the repository at this point in the history
* add default=None

* patch intrinio news

* pylint

* mypy

* no need to assign articles var before len

* more pylint

* other assignment
  • Loading branch information
deeleeramone authored Apr 24, 2024
1 parent 9bfa378 commit 3872738
Show file tree
Hide file tree
Showing 7 changed files with 939 additions and 7,732 deletions.
22 changes: 20 additions & 2 deletions openbb_platform/extensions/news/integration/test_news_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ def headers():
"limit": 20,
"start_date": None,
"end_date": None,
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down Expand Up @@ -164,8 +173,17 @@ def test_news_world(params, headers):
"provider": "intrinio",
"symbol": "AAPL",
"limit": 20,
"start_date": None,
"end_date": None,
"start_date": "2024-01-02",
"end_date": "2024-01-03",
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down
26 changes: 22 additions & 4 deletions openbb_platform/extensions/news/integration/test_news_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,17 @@ def obb(pytestconfig): # pylint: disable=inconsistent-return-statements
{
"provider": "intrinio",
"limit": 20,
"start_date": None,
"end_date": None,
"start_date": "2024-01-02",
"end_date": "2024-01-03",
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down Expand Up @@ -146,8 +155,17 @@ def test_news_world(params, obb):
"provider": "intrinio",
"symbol": "AAPL",
"limit": 20,
"start_date": None,
"end_date": None,
"start_date": "2024-01-02",
"end_date": "2024-01-03",
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
"""Intrinio Company News Model."""

import asyncio
from datetime import datetime
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Literal, Optional, Union

from openbb_core.provider.abstract.fetcher import Fetcher
from openbb_core.provider.standard_models.company_news import (
CompanyNewsData,
CompanyNewsQueryParams,
)
from openbb_core.provider.utils.errors import EmptyDataError
from openbb_core.provider.utils.helpers import (
ClientResponse,
amake_requests,
filter_by_dates,
amake_request,
get_querystring,
)
from openbb_intrinio.utils.helpers import get_data
from openbb_intrinio.utils.references import IntrinioSecurity
from pydantic import Field, field_validator


Expand All @@ -23,26 +25,133 @@ class IntrinioCompanyNewsQueryParams(CompanyNewsQueryParams):
Source: https://docs.intrinio.com/documentation/web_api/get_company_news_v2
"""

__alias_dict__ = {"symbol": "symbols", "limit": "page_size"}
__alias_dict__ = {
"limit": "page_size",
"source": "specific_source",
}
__json_schema_extra__ = {"symbol": ["multiple_items_allowed"]}

source: Optional[
Literal["yahoo", "moody", "moody_us_news", "moody_us_press_releases"]
] = Field(
default=None,
description="The source of the news article.",
)
sentiment: Union[None, Literal["positive", "neutral", "negative"]] = Field(
default=None,
description="Return news only from this source.",
)
language: Optional[str] = Field(
default=None,
description="Filter by language. Unsupported for yahoo source.",
)
topic: Optional[str] = Field(
default=None,
description="Filter by topic. Unsupported for yahoo source.",
)
word_count_greater_than: Optional[int] = Field(
default=None,
description="News stories will have a word count greater than this value."
+ " Unsupported for yahoo source.",
)
word_count_less_than: Optional[int] = Field(
default=None,
description="News stories will have a word count less than this value."
+ " Unsupported for yahoo source.",
)
is_spam: Optional[bool] = Field(
default=None,
description="Filter whether it is marked as spam or not."
+ " Unsupported for yahoo source.",
)
business_relevance_greater_than: Optional[float] = Field(
default=None,
description="News stories will have a business relevance score more than this value."
+ " Unsupported for yahoo source.",
)
business_relevance_less_than: Optional[float] = Field(
default=None,
description="News stories will have a business relevance score less than this value."
+ " Unsupported for yahoo source.",
)


class IntrinioCompanyNewsData(CompanyNewsData):
"""Intrinio Company News Data."""

__alias_dict__ = {
"symbols": "symbol",
"date": "publication_date",
"text": "summary",
"sentiment": "article_sentiment",
"sentiment_confidence": "article_sentiment_confidence",
"symbols": "symbol",
}

source: Optional[str] = Field(
default=None,
description="The source of the news article.",
)
summary: Optional[str] = Field(
default=None,
description="The summary of the news article.",
)
topics: Optional[str] = Field(
default=None,
description="The topics related to the news article.",
)
word_count: Optional[int] = Field(
default=None,
description="The word count of the news article.",
)
business_relevance: Optional[float] = Field(
default=None,
description=" How strongly correlated the news article is to the business",
)
sentiment: Optional[str] = Field(
default=None,
description="The sentiment of the news article - i.e, negative, positive.",
)
sentiment_confidence: Optional[float] = Field(
default=None,
description="The confidence score of the sentiment rating.",
)
language: Optional[str] = Field(
default=None,
description="The language of the news article.",
)
spam: Optional[bool] = Field(
default=None,
description="Whether the news article is spam.",
)
copyright: Optional[str] = Field(
default=None,
description="The copyright notice of the news article.",
)
id: str = Field(description="Article ID.")
security: Optional[IntrinioSecurity] = Field(
default=None,
description="The Intrinio Security object. Contains the security details related to the news article.",
)

@field_validator("publication_date", mode="before", check_fields=False)
def date_validate(cls, v): # pylint: disable=E0213
@classmethod
def date_validate(cls, v):
"""Return the date as a datetime object."""
return datetime.strptime(v, "%Y-%m-%dT%H:%M:%S.000Z")

@field_validator("topics", mode="before", check_fields=False)
@classmethod
def topics_validate(cls, v):
""" "Parse the topics as a string."""
if v:
topics = [t.get("name") for t in v if t and t not in ["", " "]]
return ", ".join(topics)
return None

@field_validator("copyright", mode="before", check_fields=False)
@classmethod
def copyright_validate(cls, v):
"""Clean empty strings"""
return None if v in ["", " "] else v


class IntrinioCompanyNewsFetcher(
Fetcher[
Expand All @@ -67,34 +176,75 @@ async def aextract_data(
api_key = credentials.get("intrinio_api_key") if credentials else ""

base_url = "https://api-v2.intrinio.com/companies"
query_str = get_querystring(
query.model_dump(by_alias=True), ["symbols", "page"]
ignore = (
["symbol", "page_size", "is_spam"]
if not query.source or query.source == "yahoo"
else ["symbol", "page_size"]
)

async def callback(response: ClientResponse, _: Any) -> List[Dict]:
"""Return the response."""
if response.status != 200:
return []

query_str = get_querystring(query.model_dump(by_alias=True), ignore)
symbols = query.symbol.split(",")
news: List = []

async def callback(response, session):
"""Response callback."""
result = await response.json()
if "error" in result:
raise RuntimeError(f"Intrinio Error Message -> {result['error']}")
symbol = response.url.parts[-2]
data = await response.json()

if isinstance(data, dict):
return [{**d, "symbol": symbol} for d in data.get("news", [])]
return []

urls = [
f"{base_url}/{symbol}/news?{query_str}&api_key={api_key}"
for symbol in [s.strip() for s in getattr(query, "symbol", "").split(",")]
]

return await amake_requests(urls, callback, **kwargs)
_data = result.get("news", [])
data = []
data.extend([{"symbol": symbol, **d} for d in _data])
articles = len(data)
next_page = result.get("next_page")
while next_page and query.limit > articles:
url = f"{base_url}/{symbol}/news?{query_str}&api_key={api_key}&next_page={next_page}"
result = await get_data(url, session=session, **kwargs)
_data = result.get("news", [])
if _data:
data.extend([{"symbol": symbol, **d} for d in _data])
articles = len(data)
next_page = result.get("next_page")
# Remove duplicates based on URL
return data

seen = set()

async def get_one(symbol):
"""Get the data for one symbol."""
# TODO: Change page_size to a more appropriate value when Intrinio fixes the bug in this param.
url = f"{base_url}/{symbol}/news?{query_str}&page_size=99&api_key={api_key}"
data = await amake_request(url, response_callback=callback, **kwargs)
if data:
data = [x for x in data if not (x["url"] in seen or seen.add(x["url"]))] # type: ignore
news.extend(
sorted(data, key=lambda x: x["publication_date"], reverse=True)[
: query.limit
]
)

tasks = [get_one(symbol) for symbol in symbols]

await asyncio.gather(*tasks)

if not news:
raise EmptyDataError("Error: The request was returned as empty.")

return news

# pylint: disable=unused-argument
@staticmethod
def transform_data(
query: IntrinioCompanyNewsQueryParams, data: List[Dict], **kwargs: Any
) -> List[IntrinioCompanyNewsData]:
"""Return the transformed data."""
modeled_data = [IntrinioCompanyNewsData.model_validate(d) for d in data]
return filter_by_dates(modeled_data, query.start_date, query.end_date)
results: List[IntrinioCompanyNewsData] = []
for item in data:
body = item.get("body", {})
if not body:
item["text"] = item.pop("summary")
if body:
_ = item.pop("body")
item["publication_date"] = body.get("publication_date", None)
item["text"] = body.get("body", None)
results.append(IntrinioCompanyNewsData.model_validate(item))
return results
Loading

0 comments on commit 3872738

Please sign in to comment.