-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgetBruinSchedule.py
222 lines (190 loc) · 8.94 KB
/
getBruinSchedule.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#-----------------------------------------------------------------------------
# Name: getBruinSchedule.py
# Purpose: contains functions to retrieve class information from the UCLA registrar website
#
# Author: <Samping Chuang>
# Email : sampingchuang@gmail.com
#
# Created: 2013/01/18
# Copyright: (c) 2013
# Licence:
#-----------------------------------------------------------------------------
import urllib2
from BeautifulSoup import BeautifulSoup
import json
#return the avaliable ucla terms and majors
def get_terms_majors():
#initialize
result = {}
result['terms'] = {}
result['majors'] = {}
#retrieve page
url = "http://www.registrar.ucla.edu/schedule/schedulehome.aspx"
webPage = urllib2.urlopen(url).read()
soup = BeautifulSoup(webPage)
#parse terms
terms = soup.find("select", {"id":"ctl00_BodyContentPlaceHolder_SOCmain_lstTermDisp"}).findChildren()
for term in terms:
result['terms'][term['value']] = term.contents[0]
#parse class
majors = soup.find("select", {"id":"ctl00_BodyContentPlaceHolder_SOCmain_lstSubjectArea"}).findChildren()
for major in majors:
result['majors'][major['value']] = major.contents[0]
#return json.dumps(result, indent=4)
return result
#returns the classes avaliable given the terms and majors
def get_major_classes(term, major):
result = {}
result['term_sel'] = term
result['major_sel'] = major
result['classes'] = {}
#retrieve page
url = 'http://www.registrar.ucla.edu/schedule/crsredir.aspx?'+'termsel='+term+'&subareasel='+major
#print url
webPage = urllib2.urlopen(url).read()
soup = BeautifulSoup(webPage)
#get classes
resClasses = soup.find("select", {"id":"ctl00_BodyContentPlaceHolder_crsredir1_lstCourseNormal"})
if resClasses is None:
result['has_class']='false';
else:
result['has_class']='true';
resClasses = resClasses.findChildren()
for resClass in resClasses:
result['classes'][resClass['value']] = resClass.contents[0]
#return json.dumps(result, indent=4)
return result
#returns the class information
def get_class_info(term, major, crs):
#initialize
result = {}
result['term_sel'] = term
result['major_sel'] = major
result['idxcrs']= crs
result['lectures'] = []
#retrieve page
url = 'http://www.registrar.ucla.edu/schedule/detselect.aspx?'+'termsel='+term+'&subareasel='+major+'&idxcrs='+crs
webPage = urllib2.urlopen(url).read()
soup = BeautifulSoup(webPage)
#problem with find on div using element id (when nested too many levels, the content are lost)
#see http://stackoverflow.com/questions/2136267/beautiful-soup-and-extracting-a-div-and-its-contents-by-id
#break down wrapper by wrapper
#<body> -> <form> -> <center> -> <div> -> <center> -> <table> -> <tbody> -> 2nd tr -> 2nd td -> div
sec = soup.find('body').find('form').find('center').find('div').find('center').findChildren('table')
if len(sec) == 1:
result['has_class']='false';
else:
result['has_class']='true';
#the fist element in test is bs
tables = sec[1:]
# structure Rules (example: cs32)
# <table class = "tblClassListingBody">
# - Class Information/Notes
# <table class = "dgdTemplateGrid" id ="dgdCourseHeaderCOM SCI0032">
# - Course Header (pretty much just the title)
# <table class = "dgdTemplateGrid" id ="dgdDeptURLsCOM SCI0032">
# - Catalog / Definition Links
#
# --------The following repeats when there are more than 1 lecture
#
# <table class = "dgdTemplateGrid" id ="dgdLectureHeaderCOM SCI0032">
# - Lecture 1 Nachenberg
# <table class = "dgdTemplateGrid" id ="dgdClassURLHeaderCOM SCI0032">
# - Course Webpage Library Reserves Textbooks
# <table class = "dgdTemplateGrid">
# - Actual Enrollment
for t in tables:
t_id = t.get('id')
t_class = t.get('class')
new_sec = {}
#for actual enrollment
if t_id is None and t_class == "dgdTemplateGrid":
t_tr = t.findAll('tr')
lecture_td = t_tr[1].findAll('td')
#grab lecture information
for i in range(0, len(lecture_td)):
td_class = lecture_td[i].get('class')
td_val = lecture_td[i].find('span',{'class':'bold'})
if td_val is not None:
td_val = td_val.string
if td_class == "dgdClassDataColumnIDNumber":
new_sec['ID']= td_val
elif td_class == "dgdClassDataActType":
new_sec['type']= td_val
elif td_class == "dgdClassDataSectionNumber":
new_sec['sec']= td_val
elif td_class == "dgdClassDataDays":
new_sec['days']= td_val
elif td_class == "dgdClassDataTimeStart":
new_sec['time_start']= td_val
elif td_class == "dgdClassDataTimeEnd":
new_sec['time_end']= td_val
elif td_class == "dgdClassDataBuilding":
new_sec['building']= td_val
elif td_class == "dgdClassDataRoom":
new_sec['room']= td_val;
elif td_class == "dgdClassDataRestrict":
new_sec['restrict']= td_val
elif td_class == "dgdClassDataEnrollTotal":
new_sec['enroll_total']= td_val
elif td_class == "dgdClassDataEnrollCap":
new_sec['enroll_cap']= td_val
elif td_class == "dgdClassDataWaitListTotal":
new_sec['waitl_total']= td_val
elif td_class == "dgdClassDataWaitListCap":
new_sec['waitl_cap']= td_val
elif td_class == "dgdClassDataStatus":
new_sec['status']= td_val
#grab secion information
#sec_num = 0
if len(t_tr) <=2:
new_sec['has_section'] = False
else:
new_sec['has_section'] = True
sections = []
for i in range(2, len(t_tr)):
sec_td = t_tr[i].findAll('td')
s = {}
#grab lecture information
for j in range(0, len(sec_td)):
td_class = sec_td[j].get('class')
td_val = sec_td[j].find('span')
if td_val is not None:
td_val = td_val.string
if td_class == "dgdClassDataColumnIDNumber":
s['ID']= td_val
elif td_class == "dgdClassDataActType":
s['type']= td_val
elif td_class == "dgdClassDataSectionNumber":
s['sec']= td_val
elif td_class == "dgdClassDataDays":
s['days']= td_val
elif td_class == "dgdClassDataTimeStart":
s['time_start']= td_val
elif td_class == "dgdClassDataTimeEnd":
s['time_end']= td_val
elif td_class == "dgdClassDataBuilding":
s['building']= td_val
elif td_class == "dgdClassDataRoom":
s['room']= td_val;
elif td_class == "dgdClassDataRestrict":
s['restrict']= td_val
elif td_class == "dgdClassDataEnrollTotal":
s['enroll_total']= td_val
elif td_class == "dgdClassDataEnrollCap":
s['enroll_cap']= td_val
elif td_class == "dgdClassDataWaitListTotal":
s['waitl_total']= td_val
elif td_class == "dgdClassDataWaitListCap":
s['waitl_cap']= td_val
elif td_class == "dgdClassDataStatus":
s['status']= td_val
sections.append(s)
#sec_num+=1
if sections:
new_sec['class_sections'] = sections
if new_sec:
result['lectures'].append(new_sec)
#return in json form
#return json.dumps(result, indent=4)
return result