-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmypocket.py
193 lines (152 loc) · 5.42 KB
/
mypocket.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import api
import os
import pickle
import urllib2
import threading
from urllib2 import HTTPError
ckey = os.environ['POCKET_CONSUMER_KEY']
source_string = "[Source: "
end_source_string = "]"
initial_html = """
### Market Analysis and Industry News
### Product Releases
### Company Announcements
### Talent
### Exits and Acquisitions
### Venture Capital
### Financings
"""
def get_initial_html():
return initial_html
def fill_in_source(item):
source = ""
# first look for a pando-daily style [Source: ] string
if source_string in item['text']:
source_string_start = item['text'].find(source_string) + len(source_string)
source_string_end = item['text'].find(end_source_string)
source = item['text'][source_string_start:source_string_end]
# also remove the source notation from the text
item['text'] = item['text'][0:item['text'].find(source_string)]
else:
# not found, so let's just figure it out from the url
url = item['url']
domain_start = url.find("http://")
if domain_start == -1:
domain_start = url.find("https://") + len("https://")
else:
domain_start += len("http://")
domain_end = url.find("/", domain_start)
domain = url[domain_start:domain_end] # just split out the domain
domain = domain[0:domain.rfind(".")] # and remove the last bit (.com etc)
if "." in domain:
domain = domain[domain.find(".")+1:]
if len(domain) < 4:
domain = domain.upper()
else:
domain = domain[0].upper() + domain[1:]
source = domain
item['source'] = source
# if it's a pando one, use the new download (but slower) method
if is_pando_source(item):
item = fill_in_pando_source(item)
return item
# may do nothing if it couldn't understand the page, in which case it just returns the item
def fill_in_pando_source(item):
try:
data = urllib2.urlopen(item['url'])
except HTTPError as error:
print "failed to load url: %s [%s]" % (item['url'], error)
return item
html = data.read()
s = html.split('[Source: ')[1].split('</a>')[0].split('">')
link = s[0].split('="')[1]
source = s[1]
if link[0:7] == 'http://':
print "updated a pando source"
item['url'] = link
item['source'] = source
return item
def is_pando_source(item):
return item['url'][0:27] == u'http://pandodaily.com/news/'
def count_items():
pocket = setup_pocket()
items = get_items(pocket)
return len(items['list'])
def get_items(pocket):
items = pocket.get(sort='oldest')
return items
def setup_pocket():
pocket = ''
f = os.path.expanduser(os.path.join('~', '.config', 'pocket.txt'))
if os.path.exists(f):
with file(f, 'r') as target:
data = pickle.load(target)
consumerkey = data['consumer_key']
pocket = api.API(consumerkey)
pocket.authenticate(data['access_token'])
elif 'POCKET_ACCESS_KEY' in os.environ:
pocket = api.API(ckey)
pocket.authenticate(os.environ['POCKET_ACCESS_KEY'])
else:
print "AUTHENTICATION FAILED - UNDEFINED FOR NOW"
pocket = api.API(ckey)
pocket.authenticate()
data = {'consumer_key': ckey,
'access_token': api.access_token,
'contentType': 'article',
'detailType': 'complete'}
with file(f, 'w') as target:
pickle.dump(data, target)
return pocket
# v2 with threading!
def parse_items(items):
# convert into something more usable
new_items = []
for index in items['list']:
item = items['list'][index]
# create the new item
new_item = {'text': item.get('excerpt', ''),
'url': item.get('resolved_url', item.get('given_url', '')),
'title': item.get('resolved_title', item.get('given_title', ''))}
# figure out the source
# new_item = fill_in_source(new_item)
new_items.append(new_item)
threads = [threading.Thread(target=fill_in_source, args=(item,)) for item in new_items]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
return new_items
def convert_to_markdown(items):
# convert into markdown
text = ""
sources = ""
for item in items:
text += "_%s_:: %s [%s][%d]\n\n" % (item['title'], item['text'], item['source'], items.index(item))
sources += "\n[%d]: %s" % (items.index(item), item['url'])
return text + "\n" + sources
def gimme_markdown(include_html=True):
pocket = setup_pocket()
items = get_items(pocket)
new_items = parse_items(items)
markdown = convert_to_markdown(new_items)
if include_html:
markdown = initial_html + markdown
return markdown, len(items['list'])
def archive_all_items():
pocket = setup_pocket()
items = get_items(pocket)
if len(items['list']) > 0:
status = archive_items_commit(pocket, items['list'].keys())
if status['status'] == 1:
return 'Archived %d items' % (len(items['list']))
else:
return status['action_results']
else:
return 'Nothing to Archive'
def archive_items_commit(pocket, item_ids):
for item_id in item_ids:
pocket.actions.append(('archive', {'action': 'archive', 'item_id': item_id}))
return pocket.commit()
if __name__ == '__main__':
print gimme_markdown()