Skip to content

Commit

Permalink
Handle ambiguous timezones so no fallback needed. Also rearrange for …
Browse files Browse the repository at this point in the history
…cleaner merge
  • Loading branch information
ValueRaider committed Aug 19, 2024
1 parent 3e35ae7 commit 9a37c7f
Showing 1 changed file with 49 additions and 13 deletions.
62 changes: 49 additions & 13 deletions yfinance/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@
from __future__ import print_function

import json as _json
import re
import warnings
from io import StringIO
from typing import Optional, Union
from urllib.parse import quote as urlencode

import pandas as pd
import pytz
import requests

from . import utils, cache
Expand Down Expand Up @@ -599,20 +597,58 @@ def get_ticker_tz():

# Parse earnings date string
cn = "Earnings Date"
# - remove AM/PM and timezone from date string
tzinfo = dates[cn].str.extract('([AP]M[a-zA-Z]*)$')
dates[cn] = dates[cn].replace(' [AP]M[a-zA-Z]*$', '', regex=True)
# - split AM/PM from timezone
tzinfo = tzinfo[0].str.extract('([AP]M)([a-zA-Z]*)', expand=True)
tzinfo.columns = ["AM/PM", "TZ"]
# - combine and parse
dates[cn] = dates[cn] + ' ' + tzinfo["AM/PM"]
dates[cn] = pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")

# Try to remap all ambiguous timezone values:
tzinfo['TZ'] = tzinfo['TZ'].str.replace('BST', 'Europe/London')
tzinfo['TZ'] = tzinfo['TZ'].str.replace('GMT', 'Europe/London')
if '.' not in self.ticker:
tzinfo['TZ'] = tzinfo['TZ'].str.replace('EST', 'America/New_York')
elif self.ticker.endswith(".AX"):
tzinfo['TZ'] = tzinfo['TZ'].str.replace('EST', 'Australia/Sydney')
tzinfo['TZ'] = tzinfo['TZ'].str.replace('MST', 'America/Denver')
tzinfo['TZ'] = tzinfo['TZ'].str.replace('PST', 'America/Los_Angeles')
if'.' not in self.ticker:
tzinfo['TZ'] = tzinfo['TZ'].str.replace('CST', 'America/Chicago')
else:
# Revisit if Cuba get a stock exchange
tzinfo['TZ'] = tzinfo['TZ'].str.replace('CST', 'Asia/Shanghai')
if self.ticker.endswith('.TA'):
tzinfo['TZ'] = tzinfo['TZ'].str.replace('IST', 'Asia/Jerusalem')
elif self.ticker.endswith('.IR'):
tzinfo['TZ'] = tzinfo['TZ'].str.replace('IST', 'Europe/Dublin')
elif self.ticker.endswith('.NS'):
tzinfo['TZ'] = tzinfo['TZ'].str.replace('IST', 'Asia/Kolkata')

# But in case still ambiguity that pytz cannot parse, have a backup:
self._quote.proxy = proxy or self.proxy
tz_backup = self._get_ticker_tz(proxy=proxy, timeout=30)

def map_date(time_str: str):
tz_match = re.search('([AP]M)([a-zA-Z]*)$', time_str)
tz_str = tz_match.group(2).strip()
# - remove AM/PM and timezone from date string
time_str = time_str.replace(tz_str, "")
if len(tzinfo['TZ'].unique())==1:
try:
tz = pytz.timezone(tz_str)
except pytz.UnknownTimeZoneError:
tz = get_ticker_tz()

return pd.to_datetime(time_str, format="%b %d, %Y, %I %p").tz_localize(tz)
dates[cn] = dates[cn].dt.tz_localize(tzinfo['TZ'].iloc[0])
except Exception:
dates[cn] = dates[cn].dt.tz_localize(tz_backup)
else:
dates2 = []
for i in range(len(dates)):
dt = dates[cn].iloc[i]
tz = tzinfo['TZ'].iloc[i]
try:
dt = dt.tz_localize(tz)
except Exception:
dt = dt.tz_localize(tz_backup)
dates2.append(dt)
dates[cn] = pd.to_datetime(dates2)

dates[cn] = dates[cn].map(map_date)
dates = dates.set_index("Earnings Date")

self._earnings_dates[limit] = dates
Expand Down

0 comments on commit 9a37c7f

Please sign in to comment.