forked from kislaykumarkk/Scraper
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathScraper_ODD.py
129 lines (92 loc) · 3.24 KB
/
Scraper_ODD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as BS
import csv
import sys
import os
import pdfkit
# The data Entry and Populate Section
roll_list = [] #Blank Roll List
initial_roll = 10400212001 #Enter the starting Roll Number of General Students
count = 135 #Enter the approx count of students
while count > 0:
roll_list.append(initial_roll) #Populate List
initial_roll = initial_roll + 1
count = count -1
lateral_roll = 10400213122 #Enter the starting Roll Number of Lateral Students
count = 25 #Enter the approx count of students
while count > 0:
roll_list.append(lateral_roll) #Populate List
lateral_roll = lateral_roll + 1
count = count -1
# End of Data Entry And Populate Section
csvout = csv.writer(open("results.csv", "w"))
csvout.writerow(("Roll","Name","SGPA"))
for roll in roll_list:
#Chrome driver to open Browser
#Donload from https://sites.google.com/a/chromium.org/chromedriver/downloads
#Add correct path for ChromeDriver here
driver = webdriver.Chrome('/home/kaustav/chromedriver')
#URL of the website to Scrap
driver.get("http://www.wbutech.net/result_odd1516.php")
def find_by_xpath(locator):
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, locator))
)
return element
class FormPage(object):
def fill_form(self, roll):
#Find Form field to enter the roll no
find_by_xpath('//input[@name = "rollno"]').send_keys(roll)
return self # makes it so you can call .submit() after calling this function
def submit(self):
#Click Javascript button
#Change value for diff semester
with myWait(driver):
driver.find_element_by_name("sem7").click()
class myWait(object):
def __init__(self, driver):
self.driver = driver
def __enter__(self):
pass
def __exit__(self, *_):
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "html"))
)
FormPage().fill_form(roll).submit()
#Parse HTML source using BeautifulSoup
soup = BS(driver.page_source)
#Path to store the HTML files
path = '/home/kaustav/Result/' + str(roll) +'.html'
#Fetching the tables
name = ""
roll_no = ""
sgpa = 0.0
soup4tables=soup.find_all('table')
for tab in soup4tables:
soup4rows=tab.find_all('tr')
if(len(soup4rows) ==3):
str1 = str(soup4rows[1])
name = str1[((str1.index("Name :"))+7):str1.index("</th>\n<th style=")].strip()
roll_no1 = str1[((str1.index("Roll No. :"))+11):str1.index("</th>\n</tr>")].strip()
roll_no = long(roll_no1)
soup4rows=tab.find_all('tr')
if(len(soup4rows) ==2):
str1 = str(soup4rows[0])
str2 = str1[((str1.index("SEMESTER :"))+10):str1.index("</td>")].strip()
sgpa = float(str2)
csvout.writerow((roll_no,name,sgpa))
with open(path, 'w') as f:
for line in soup.prettify('utf-8',):
f.write(str(line))
'''
#Path to store the output PDF file
out = '/home/kaustav/Result/' + str(roll) + '.pdf'
#Output as PDF
pdfkit.from_file(path, out)
'''
driver.quit() # closes the webbrowser
os.remove(path)
#Status