-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWebScaper.py
102 lines (53 loc) · 1.76 KB
/
WebScaper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 11 16:13:39 2019
@author: nachiketpusalkar
"""
#git changed something
from bs4 import BeautifulSoup
import requests
import csv
source = requests.get('https://wowwiki.fandom.com/wiki/Warcraft_III_hero_units').text
soup = BeautifulSoup(source, 'lxml')
#print(soup.prettify())
total_list = []
y = []
z= []
spells = []
for content in soup.find_all('article', class_= 'WikiaMainContent'):
listofcontents = content.find('div', class_= 'mw-content-ltr mw-content-text')
for heros in listofcontents.find_all('li'):
total_list.append(heros.text)
#print(total_list[0][:-1])
for x in heros.find_all('a'):
y.append(x.get('href'))
for i in y:
if i[0] == '/':
z.append(i)
z = [z for z in z if z[6:8] != 'Wa']
z = [z for z in z if z[6:9] != 'Ran']
z = z[0:15]
for links in z:
sources = 'https://wowwiki.fandom.com/' + links
source2 = requests.get(sources).text
soup = BeautifulSoup(source2, 'lxml')
for content2 in soup.find_all('h3', class_ = ''):
spells.append(content2.span.text)
#print(spells[0])
for j in range(0, len(spells)):
if spells[j][0] == ' ':
spells[j] = spells[j][1:]
spells2 = [spells[i:i+4] for i in range(0, len(spells), 4)]
csv_file = open('wc3.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Heroes', 'Spell 1', 'Spell 2', 'Spell 3', 'Ultimate'])
total_list = total_list[0:15]
i = 0
total_list.remove(total_list[-2])
for names in total_list:
names = names.split('\n')
names = names[0]
csv_writer.writerow([names, spells2[i][0], spells2[i][1], spells2[i][2], spells2[i][3]])
i += 1
csv_file.close()