added searchable index

cougargrades · Feb 10, 2023 · 1c249b8 · 1c249b8
1 parent fe5ebd1
commit 1c249b8
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 5 deletions.
diff --git a/bundler/bundle/generate_sitemap.py b/bundler/bundle/generate_sitemap.py
@@ -42,9 +42,24 @@ def process(destination: Path):
   # create the output file
   with open(destination / 'sitemap.txt', 'w') as outfile:
     # write basic stuff
-    outfile.write('https://cougargrades.io/\n')
-    outfile.write('https://cougargrades.io/about\n')
-    outfile.write('https://cougargrades.io/faq\n')
+    # outfile.write('https://cougargrades.io/\n')
+    # outfile.write('https://cougargrades.io/about\n')
+    # outfile.write('https://cougargrades.io/faq\n')
+    outfile.write('''https://cougargrades.io/
+https://cougargrades.io/about
+https://cougargrades.io/faq
+https://cougargrades.io/faq/data-accuracy
+https://cougargrades.io/faq/data-authenticity
+https://cougargrades.io/faq/data-updates
+https://cougargrades.io/faq/github-sponsors
+https://cougargrades.io/faq/instructor-shaming
+https://cougargrades.io/faq/interim-grading-theory
+https://cougargrades.io/faq/uh-affiliation
+https://cougargrades.io/top/enrolled-courses
+https://cougargrades.io/top/enrolled-instructors
+https://cougargrades.io/top/viewed-courses
+https://cougargrades.io/top/viewed-instructors
+''')
     print('Writing groups...')
     with alive_bar(len(KNOWN_GROUPS)) as bar:
       for item in sorted(list(KNOWN_GROUPS)):

diff --git a/bundler/bundle/grade_distribution.py b/bundler/bundle/grade_distribution.py
@@ -94,6 +94,23 @@ def process(source: Path, destination: Path, csv_path_pattern: str = '*.csv'):
         # https://github.com/cougargrades/types/blob/b545a814fc0c68e3be3387152eb890cdeabc875e/src/GradeDistributionCSVRow.ts#L43-L59
         meta = sorted(list(set([ f'{row["INSTR LAST NAME"].strip()}, {row["INSTR FIRST NAME"].strip()}' for row in rows ])))
         metaFile.write(json.dumps(meta, indent=2))
+  print(f'Generating search-optimized data: instructors.json')
+  searchable_destination = destination / '..' / 'io.cougargrades.searchable'
+  searchable_destination.mkdir(exist_ok=True)
+  with open(searchable_destination / 'instructors.json', 'w') as metaFile, open(destination / 'records.csv', 'r') as records:
+    rows = [row for row in csv.DictReader(records)]
+    names = sorted(list(set([(row["INSTR FIRST NAME"].strip(), row["INSTR LAST NAME"].strip()) for row in rows])))
+    results = []
+    for (firstName, lastName) in names:
+      legalName = f'{lastName}, {firstName}'
+      search_result_item = {
+        "href": f'/i/{legalName}',
+        "firstName": firstName,
+        "lastName": lastName,
+        "legalName": legalName,
+      }
+      results.append(search_result_item)
+    metaFile.write(json.dumps({ "data": results }, indent=2))
   print('Done')
 
 def count_distinct_by_keys(rows: List[Dict], keys: List[str]) -> int:

diff --git a/bundler/bundle/patch/util.py b/bundler/bundle/patch/util.py
@@ -40,7 +40,7 @@ def createKeywordsWithPermutations(a_sentence):
 # excerpt from original python: https://github.com/cougargrades/importer-python/blob/5c4995ebad68ca28f8c00a43a6faf3d7d69f75e5/cougargrades/util.py
 def generatePermutations(a_sentence) -> List[str]:
   words = a_sentence.split(' ')
-  print(words)
+  #print(words)
   permutations = []
   results = []
   for i in range(1, len(words)+1):

diff --git a/bundler/bundle/publications_courses.py b/bundler/bundle/publications_courses.py
@@ -1,6 +1,9 @@
 import csv
+import json
 from bundle import util
 from pathlib import Path
+from bs4 import BeautifulSoup
+from urllib.parse import quote
 from alive_progress import alive_bar
 from colorama import init
 init()
@@ -49,6 +52,79 @@ def process(source: Path, destination: Path):
                 })
             bar()
 
+  # TODO: create searchable courses
+  print(f'\t{Style.DIM}Generating search-optimized data: courses.json{Style.RESET_ALL}')
+  searchable_destination = destination / '..' / 'io.cougargrades.searchable'
+  searchable_destination.mkdir(exist_ok=True)
+  with open(searchable_destination / 'courses.json', 'w') as outfile, open(destination / 'pairs.csv', 'r') as pairs_file, open(destination / '..' / 'edu.uh.grade_distribution' / 'records.csv') as records_file:
+    pairs = [row for row in csv.DictReader(pairs_file)]
+    records = [row for row in csv.DictReader(records_file)]
+
+    unique_courses_with_descriptions = sorted(list(set([(
+      f'{row["SUBJECT"].strip()} {row["CATALOG NBR"].strip()}',
+      row["COURSE DESCR"]
+    ) for row in records])))
+
+    results = []
+    with alive_bar(len(unique_courses_with_descriptions)) as bar:
+      for (courseName, description) in unique_courses_with_descriptions:
+        search_result_item = {
+          "href": f'/c/{courseName}',
+          "courseName": courseName,
+          "description": description,
+          "publicationTextContent": ""
+        }
+        matching_pairs = [pair for pair in pairs if f'{pair["department"]} {pair["catalogNumber"]}' == courseName]
+        for matched_pair in matching_pairs:
+          break # TODO: maybe remove this if it proves useful
+          with open(source / matched_pair["catoid"] / f'{matched_pair["catoid"]}-{matched_pair["coid"]}.html') as htmlFile:
+            # get primary content area
+            html = BeautifulSoup(htmlFile.read(), features='html5lib')
+            # compute content
+            content = ""
+            h3 = html.select_one('.coursepadding div h3')
+            afterElems = []
+            for item in h3.next_siblings:
+              # change URLs that point to other courses to a CougarGrades URL
+              if item.name == 'a' and item['href'] != None and item['href'].startswith('preview_course_nopop.php'):
+                item.attrs.clear()
+                item['href'] = quote(f'/c/{item.string.strip()}')
+              # skip spammy tooltip divs
+              if item.name != None and item.name != '' and item.has_attr('style') and item['style'] != None and 'display:none' in "".join(item['style'].split()).lower():
+                continue
+              # replace the <hr /> with <br />
+              if item.name == 'hr':
+                item.name = 'br'
+              # add to list
+              afterElems += [ item ]
+
+            # convert elements to a single single
+            content = ''.join([ str(item) for item in afterElems ]).strip()
+            innerHtml = BeautifulSoup(content, features='html5lib')
+            innerTextContent = ' '.join(innerHtml.find_all(text=True, recursive=True)).strip()
+            search_result_item["publicationTextContent"] += innerTextContent
+
+        results.append(search_result_item)
+        bar()
+
+    # write the results to a file
+    outfile.write(json.dumps({ "data": results }, indent=2))
+
+
+    # with alive_bar(len(KNOWN_COURSES)) as bar:
+    #   for courseName in KNOWN_COURSES:
+
+      # Output structure
+    sample = {
+      "href": "/c/AAMS 2300",
+      "courseName": "AAMS 2300",
+      "description": "Intro Asian American Studies",
+      "publicationTextContent": "",
+    }
+
+    # TODO: write the data
+    #outfile.write(json.dumps([], indent=2))
+
   # sort output file
   sortedlist = []
   with open(destination / 'pairs.csv', 'r') as infile:

diff --git a/npm/package.json b/npm/package.json
@@ -10,7 +10,8 @@
     "bundle/edu.uh.publications.courses/**",
     "bundle/edu.uh.publications.core/**",
     "bundle/edu.uh.publications.subjects/**",
-    "bundle/io.cougargrades.groups/**"
+    "bundle/io.cougargrades.groups/**",
+    "bundle/io.cougargrades.searchable/**"
   ],
   "scripts": {},
   "author": "",