-
Notifications
You must be signed in to change notification settings - Fork 15
/
pytrends_daily.py
98 lines (77 loc) · 3.32 KB
/
pytrends_daily.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from datetime import date, timedelta
from functools import partial
from time import sleep
from calendar import monthrange
import argparse
import pandas as pd
from pytrends.exceptions import ResponseError
from pytrends.request import TrendReq
def get_last_date_of_month(year: int, month: int) -> date:
return date(year, month, monthrange(year, month)[1])
def convert_dates_to_timeframe(start: date, stop: date) -> str:
return f"{start.strftime('%Y-%m-%d')} {stop.strftime('%Y-%m-%d')}"
def _fetch_data(pytrends, build_payload, timeframe: str) -> pd.DataFrame:
attempts, fetched = 0, False
while not fetched:
try:
build_payload(timeframe=timeframe)
except ResponseError as err:
print(err)
print(f'Trying again in {60 + 5 * attempts} seconds.')
sleep(60 + 5 * attempts)
attempts += 1
if attempts > 3:
print('Failed after 3 attemps, abort fetching.')
break
else:
fetched = True
return pytrends.interest_over_time()
def get_daily_unscaled_data(word: str,
start_year: int,
start_mon: int,
stop_year: int,
stop_mon: int,
geo: str = 'US',
verbose: bool = True,
cat:int=0,
wait_time: float = 5.0) -> pd.DataFrame:
start_date = date(start_year, start_mon, 1)
stop_date = get_last_date_of_month(stop_year, stop_mon)
pytrends = TrendReq(hl='en-US', tz=360)
build_payload = partial(pytrends.build_payload,
kw_list=[word], cat=cat, geo=geo, gprop='')
results = {}
current = start_date
while current < stop_date:
last_date_of_month = get_last_date_of_month(current.year, current.month)
timeframe = convert_dates_to_timeframe(current, last_date_of_month)
if verbose:
print(f'{word}:{timeframe}')
results[current] = _fetch_data(pytrends, build_payload, timeframe)
current = last_date_of_month + timedelta(days=1)
sleep(wait_time) # don't go too fast or Google will send 429s
daily = pd.concat(results.values()).drop(columns=['isPartial'])
return daily
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Download Pytrends data')
parser.add_argument('-n','--name',type=str,help='Name of the asset that you want to download',default='Apple')
parser.add_argument('-t','--ticker',type=str,help='Ticker name',default='AAPL')
parser.add_argument('-say','--start_year',type=int,default=2012)
parser.add_argument('-sam','--start_month',type=int,default=1)
parser.add_argument('-soy','--stop_year',type=int,default=2021)
parser.add_argument('-som','--stop_month',type=int,default=10)
parser.add_argument('-c','--cat',type=int,help='Category for the asset',default=0)
# parser.add_argument('-t','--ticker',type=str,help='ticker symbol',default='AAPL')
args = parser.parse_args()
name = args.name
cat = args.cat
df = get_daily_unscaled_data(
word=name,
start_year=args.start_year,
start_mon=args.start_month,
stop_year=args.stop_year,
stop_mon=args.stop_month,
geo='',
cat=cat,
)
df.to_csv(f'Pytrends/{args.ticker}_{cat}.csv')