Skip to content

Commit

Permalink
added searchable index
Browse files Browse the repository at this point in the history
  • Loading branch information
au5ton committed Feb 10, 2023
1 parent fe5ebd1 commit 1c249b8
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 5 deletions.
21 changes: 18 additions & 3 deletions bundler/bundle/generate_sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,24 @@ def process(destination: Path):
# create the output file
with open(destination / 'sitemap.txt', 'w') as outfile:
# write basic stuff
outfile.write('https://cougargrades.io/\n')
outfile.write('https://cougargrades.io/about\n')
outfile.write('https://cougargrades.io/faq\n')
# outfile.write('https://cougargrades.io/\n')
# outfile.write('https://cougargrades.io/about\n')
# outfile.write('https://cougargrades.io/faq\n')
outfile.write('''https://cougargrades.io/
https://cougargrades.io/about
https://cougargrades.io/faq
https://cougargrades.io/faq/data-accuracy
https://cougargrades.io/faq/data-authenticity
https://cougargrades.io/faq/data-updates
https://cougargrades.io/faq/github-sponsors
https://cougargrades.io/faq/instructor-shaming
https://cougargrades.io/faq/interim-grading-theory
https://cougargrades.io/faq/uh-affiliation
https://cougargrades.io/top/enrolled-courses
https://cougargrades.io/top/enrolled-instructors
https://cougargrades.io/top/viewed-courses
https://cougargrades.io/top/viewed-instructors
''')
print('Writing groups...')
with alive_bar(len(KNOWN_GROUPS)) as bar:
for item in sorted(list(KNOWN_GROUPS)):
Expand Down
17 changes: 17 additions & 0 deletions bundler/bundle/grade_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,23 @@ def process(source: Path, destination: Path, csv_path_pattern: str = '*.csv'):
# https://github.com/cougargrades/types/blob/b545a814fc0c68e3be3387152eb890cdeabc875e/src/GradeDistributionCSVRow.ts#L43-L59
meta = sorted(list(set([ f'{row["INSTR LAST NAME"].strip()}, {row["INSTR FIRST NAME"].strip()}' for row in rows ])))
metaFile.write(json.dumps(meta, indent=2))
print(f'Generating search-optimized data: instructors.json')
searchable_destination = destination / '..' / 'io.cougargrades.searchable'
searchable_destination.mkdir(exist_ok=True)
with open(searchable_destination / 'instructors.json', 'w') as metaFile, open(destination / 'records.csv', 'r') as records:
rows = [row for row in csv.DictReader(records)]
names = sorted(list(set([(row["INSTR FIRST NAME"].strip(), row["INSTR LAST NAME"].strip()) for row in rows])))
results = []
for (firstName, lastName) in names:
legalName = f'{lastName}, {firstName}'
search_result_item = {
"href": f'/i/{legalName}',
"firstName": firstName,
"lastName": lastName,
"legalName": legalName,
}
results.append(search_result_item)
metaFile.write(json.dumps({ "data": results }, indent=2))
print('Done')

def count_distinct_by_keys(rows: List[Dict], keys: List[str]) -> int:
Expand Down
2 changes: 1 addition & 1 deletion bundler/bundle/patch/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def createKeywordsWithPermutations(a_sentence):
# excerpt from original python: https://github.com/cougargrades/importer-python/blob/5c4995ebad68ca28f8c00a43a6faf3d7d69f75e5/cougargrades/util.py
def generatePermutations(a_sentence) -> List[str]:
words = a_sentence.split(' ')
print(words)
#print(words)
permutations = []
results = []
for i in range(1, len(words)+1):
Expand Down
76 changes: 76 additions & 0 deletions bundler/bundle/publications_courses.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import csv
import json
from bundle import util
from pathlib import Path
from bs4 import BeautifulSoup
from urllib.parse import quote
from alive_progress import alive_bar
from colorama import init
init()
Expand Down Expand Up @@ -49,6 +52,79 @@ def process(source: Path, destination: Path):
})
bar()

# TODO: create searchable courses
print(f'\t{Style.DIM}Generating search-optimized data: courses.json{Style.RESET_ALL}')
searchable_destination = destination / '..' / 'io.cougargrades.searchable'
searchable_destination.mkdir(exist_ok=True)
with open(searchable_destination / 'courses.json', 'w') as outfile, open(destination / 'pairs.csv', 'r') as pairs_file, open(destination / '..' / 'edu.uh.grade_distribution' / 'records.csv') as records_file:
pairs = [row for row in csv.DictReader(pairs_file)]
records = [row for row in csv.DictReader(records_file)]

unique_courses_with_descriptions = sorted(list(set([(
f'{row["SUBJECT"].strip()} {row["CATALOG NBR"].strip()}',
row["COURSE DESCR"]
) for row in records])))

results = []
with alive_bar(len(unique_courses_with_descriptions)) as bar:
for (courseName, description) in unique_courses_with_descriptions:
search_result_item = {
"href": f'/c/{courseName}',
"courseName": courseName,
"description": description,
"publicationTextContent": ""
}
matching_pairs = [pair for pair in pairs if f'{pair["department"]} {pair["catalogNumber"]}' == courseName]
for matched_pair in matching_pairs:
break # TODO: maybe remove this if it proves useful
with open(source / matched_pair["catoid"] / f'{matched_pair["catoid"]}-{matched_pair["coid"]}.html') as htmlFile:
# get primary content area
html = BeautifulSoup(htmlFile.read(), features='html5lib')
# compute content
content = ""
h3 = html.select_one('.coursepadding div h3')
afterElems = []
for item in h3.next_siblings:
# change URLs that point to other courses to a CougarGrades URL
if item.name == 'a' and item['href'] != None and item['href'].startswith('preview_course_nopop.php'):
item.attrs.clear()
item['href'] = quote(f'/c/{item.string.strip()}')
# skip spammy tooltip divs
if item.name != None and item.name != '' and item.has_attr('style') and item['style'] != None and 'display:none' in "".join(item['style'].split()).lower():
continue
# replace the <hr /> with <br />
if item.name == 'hr':
item.name = 'br'
# add to list
afterElems += [ item ]

# convert elements to a single single
content = ''.join([ str(item) for item in afterElems ]).strip()
innerHtml = BeautifulSoup(content, features='html5lib')
innerTextContent = ' '.join(innerHtml.find_all(text=True, recursive=True)).strip()
search_result_item["publicationTextContent"] += innerTextContent

results.append(search_result_item)
bar()

# write the results to a file
outfile.write(json.dumps({ "data": results }, indent=2))


# with alive_bar(len(KNOWN_COURSES)) as bar:
# for courseName in KNOWN_COURSES:

# Output structure
sample = {
"href": "/c/AAMS 2300",
"courseName": "AAMS 2300",
"description": "Intro Asian American Studies",
"publicationTextContent": "",
}

# TODO: write the data
#outfile.write(json.dumps([], indent=2))

# sort output file
sortedlist = []
with open(destination / 'pairs.csv', 'r') as infile:
Expand Down
3 changes: 2 additions & 1 deletion npm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"bundle/edu.uh.publications.courses/**",
"bundle/edu.uh.publications.core/**",
"bundle/edu.uh.publications.subjects/**",
"bundle/io.cougargrades.groups/**"
"bundle/io.cougargrades.groups/**",
"bundle/io.cougargrades.searchable/**"
],
"scripts": {},
"author": "",
Expand Down

0 comments on commit 1c249b8

Please sign in to comment.