-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrushup.py
47 lines (33 loc) · 1.26 KB
/
brushup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#! /usr/bin/env python
# coding: utf-8
# -*- coding: utf_8 -*-
import csv
import requests
from BeautifulSoup import BeautifulSoup
HOST = 'http://brushup.narihiro.info/'
def getPageData(url):
res = requests.get(url)
if not res.ok:
return [], None
soup = BeautifulSoup(res.content, fromEncoding='utf-8')
titles = soup.findAll('div', attrs={'class': 'show-title'})
titles = [el.find('a').text.encode('utf-8') for el in titles]
bodies = soup.findAll('div', attrs={'class': 'show-body'})
bodies = [reduce(lambda a,b: a+b.__str__('utf-8'), el.findAll('p'), '') for el in bodies]
next = soup.find('a', attrs={'class': 'next_page'})
return zip(titles, bodies), next['href'] if next else None
def getPagesData(host, url):
pages = []
while url:
page, url = getPageData(host + url)
pages += page
return pages
def getBrushupData(user, filename):
with open(filename, 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter='\t')
writer.writerows(getPagesData(HOST, user + '/completed'))
writer.writerows(getPagesData(HOST, user + '/list'))
writer.writerows(getPagesData(HOST, user + '/today'))
if __name__ == '__main__':
getBrushupData('kojit', 'brushup.csv')
# vim: set et :