diff --git a/rpi_data/modules/course.py b/rpi_data/modules/course.py index 5d6036b9c..b14a2789f 100644 --- a/rpi_data/modules/course.py +++ b/rpi_data/modules/course.py @@ -40,7 +40,7 @@ def __init__(self, info): self.section = info[3] self.credits = info[4] self.name = info[5] - self.days = info[6] + self.days = info[6].strip() self.stime = info[7] self.etime = info[8] self.max = info[9] diff --git a/rpi_data/modules/headless_login.py b/rpi_data/modules/headless_login.py index 715d19d4f..0027a88d8 100644 --- a/rpi_data/modules/headless_login.py +++ b/rpi_data/modules/headless_login.py @@ -51,12 +51,6 @@ def login(driver): submit.click() while len(driver.find_elements(By.XPATH, '/html/body/div/div/div[1]/div/div[2]/div[7]/a'))==0: time.sleep(.1) - options = driver.find_element(By.XPATH, '/html/body/div/div/div[1]/div/div[2]/div[7]/a') - options.click() - while len(driver.find_elements(By.XPATH, '/html/body/div/div/div[1]/div/div[1]/ul/li[1]/a')) == 0: - time.sleep(.1) - duo_option = driver.find_element(By.XPATH, '/html/body/div/div/div[1]/div/div[1]/ul/li[1]/a') - duo_option.click() while len(driver.find_elements(By.XPATH, '/html/body/div/div/div[1]/div/div[2]/div[3]')) == 0: time.sleep(.1) print("Your DUO code: "+ driver.find_element(by= By.XPATH, value = "/html/body/div/div/div[1]/div/div[2]/div[3]").text) # print the duo code @@ -65,8 +59,19 @@ def login(driver): trust_button = driver.find_element(By.XPATH, '//*[@id="trust-browser-button"]') #find and click it trust_button.click() time.sleep(3) + while ("https://shib.auth.rpi.edu" in driver.current_url): + driver.get("https://sis.rpi.edu/rss/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu") if (driver.current_url == "https://sis.rpi.edu/rss/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu"): # check if we're in the right place return "Success" else: print("login failed") return "Failure" + + +if __name__ == "__main__": + options = Options() + options.add_argument('--user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1') + driver = webdriver.Firefox(options=options) + driver.implicitly_wait(2) + login(driver) + diff --git a/rpi_data/modules/new_parse.py b/rpi_data/modules/new_parse.py index ff3c7aa95..2f79fcec0 100755 --- a/rpi_data/modules/new_parse.py +++ b/rpi_data/modules/new_parse.py @@ -4,7 +4,7 @@ from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.chrome.options import Options +from selenium.webdriver.firefox.options import Options from selenium.webdriver.support.ui import Select import time from bs4 import BeautifulSoup as bs @@ -46,9 +46,9 @@ def genBasevalue(term): #this function returns the code sis uses for a specific basevalue += year * 100 #this makes the basevalue show our year return basevalue -def sisCourseSearch(driver, term): #main loop of the parser, goes to the course search, selects the desired term, and then loops through each subject to grab the course tables +def sisCourseSearch(driver, term, course_codes_dict): #main loop of the parser, goes to the course search, selects the desired term, and then loops through each subject to grab the course tables info = list() - course_codes_dict = findAllSubjectCodes(driver) + url = "https://sis.rpi.edu/rss/bwskfcls.p_sel_crse_search" driver.get(url) select = Select(driver.find_element(by=By.ID, value = "term_input_id")) # term selection dropdown @@ -70,7 +70,7 @@ def sisCourseSearch(driver, term): #main loop of the parser, goes to the course print("Getting course info") courses = getCourseInfo(driver, key, course_codes_dict) # creates a list of course objects with ThreadPoolExecutor(max_workers=50) as pool: - pool.map(getReqForClass, courses) + pool.map(getReqForClass, courses, course_codes_dict.keys()) [info.append(i) for i in courses] # appends each course to our final list subject = info[len(info)-1].major # gets the subject we just parsed driver.get(url) # goes back to the start @@ -231,6 +231,7 @@ def processRow(data: list[str], prevrow: list[str], year: int) -> list[str]: return info #Some admin and grad courses won't have days of the week #Also the backend doesn't like the days of the week being TBA + info[6] = info[6].strip('\xa0') if (info[6] == '\xa0' or info[6] == "TBA"): info[6] = "" #Generally speaking methods that affect info should come in the order that the affect elements, ie @@ -284,6 +285,18 @@ def getCourseInfo(driver, year:str, schools : dict) -> list: c.addSchool("Interdisciplinary and Other") courses.append(c) return courses +# takes a raw phrase and returns a list of all of the course codes included, with repeats +def findCourseCodes(raw, subject_codes) -> list: + course_codes = [] + for i in subject_codes: + while (i in raw): + find = raw.find(i) + text = raw[find:find + 9] + raw = raw[:find] + raw[find + 9:] + if (text[4] != " " or not text[5].isdigit()): + continue + course_codes.append(text) + return course_codes #Given a url for a course, as well as the course code and major, return a list of prereqs, coreqs, and description of the course #Eg. ITWS 2110 - https://sis.rpi.edu/rss/bwckctlg.p_disp_course_detail?cat_term_in=202401&subj_code_in=ITWS&crse_numb_in=2110 # Prereqs - ITWS 1100 @@ -293,7 +306,8 @@ def getCourseInfo(driver, year:str, schools : dict) -> list: # The course uses a hands-on approach in which students actively develop Web-based software systems. # Additional topics include installation, configuration, and management of Web servers. # Students are required to have access to a PC on which they can install software such as a Web server and various programming environments. -def getReqFromLink(webres, courseCode, major) -> list: + +def getReqFromLink(webres, subject_codes) -> list: page = webres.content soup = bs(page, "html.parser") body = soup.find('td', class_='ntdefault') @@ -303,44 +317,73 @@ def getReqFromLink(webres, courseCode, major) -> list: while '\n' in classInfo[i]: #Some \n's can make it into the parsed data, so we need to get rid of them. classInfo[i] = classInfo[i].replace('\n','') - key = "Prerequisites/Corequisites" + key = "Prerequisites/Corequisites: " preKey = "Prerequisite" - prereqs = "" - coreqs = "" + coKey = "Corequisite" + extraKey = "Co-listed" + creditKey = "Credit Hours" + prereqs = [] + coreqs = [] raw = "" desc = classInfo[0] + # uses full so that we can just get all info + full = "".join(classInfo).strip() + # look for starting key + if (key in full): + raw = full.split(key)[1].split(creditKey)[0] + else: + raw = full + if (key not in raw and coKey not in raw and preKey not in raw): + return [str([]), str([]), "", desc] #If the course does not have a description, usually this menas that classInfo[0] will be the credit value. if desc.strip()[0].isdigit(): desc = "" - for i in range(1, len(classInfo)): - if key in classInfo[i].strip(): - combo = classInfo[i].strip() - combo = combo[len(key):] - coKey = "Corequisite" - if coKey in combo and preKey in combo: - coreqs = combo[combo.find(coKey) + len(coKey):] - prereqs = combo[len(preKey): combo.find(coKey)] - elif coKey in combo: - coreqs = combo[combo.find(coKey) + len(coKey):] - elif preKey in combo: - prereqs = combo[len(preKey):] - else: - #Default case where someone forgets the words we're looking for - #Note that there are still more edge cases(looking at you csci 6560 and 2110 in spring 2024) - prereqs = combo - prereqs = prereqs[prereqs.find(' '):255].strip() - coreqs = coreqs[coreqs.find(' '):255].strip() - if classInfo[i].strip() == (preKey + "s:"): - raw = classInfo[i+1].strip() - retList = [prereqs, coreqs, raw, desc] + #removes Prereq/Coreq starting keyphrase so we can focus on just coreqs, just prereqs, or both if it isn't distinguished + raw = raw.replace(key, "") + raw_prereqs = "" + raw_coreqs = "" + # checks if courses are prereqs, coreqs or both + if (preKey in raw and coKey in raw): + if (raw.find(coKey) < raw.find(preKey)): + raw_coreqs = raw.split(coKey)[1].split(preKey)[0] + raw_prereqs = raw.split(preKey)[1] + else: + raw_prereqs = raw.split(preKey)[1].split(coKey)[0] + raw_coreqs = raw.split(coKey)[1] + elif (preKey in raw): + raw_prereqs = raw + elif (coKey in raw): + raw_coreqs = raw + else: + raw_prereqs = raw + raw_coreqs = raw + #checks for co-listed courses to not include + if (extraKey in raw_prereqs): + raw_prereqs = raw_prereqs.split(extraKey)[0] + + if (extraKey in raw_coreqs): + raw_prereqs = raw_coreqs.split(extraKey)[0] + # look for course codes + prereqs = findCourseCodes(raw_prereqs, subject_codes) + coreqs = findCourseCodes(raw_coreqs, subject_codes) + # take out repeats + prereqs = list(set(prereqs)) + coreqs = list(set(coreqs)) + # makes raw both prereqs and coreqs if they are different + if (raw_prereqs != raw_coreqs): + raw = raw_prereqs + " " + raw_coreqs + else: + if (extraKey in raw): + raw = raw.split(extraKey) + retList = [str(prereqs), str(coreqs), raw, desc] return retList #Add the prereqs for a course to that course -def getReqForClass(course: Course) -> None: +def getReqForClass(course: Course, course_codes: list) -> None: semester = getSemester(course) url = "https://sis.rpi.edu/rss/bwckctlg.p_disp_course_detail?cat_term_in={}&subj_code_in={}&crse_numb_in={}".format(semester, course.major, course.code) session = requests.session() webres = session.get(url) - course.addReqsFromList(getReqFromLink(webres, course.code, course.major)) + course.addReqsFromList(getReqFromLink(webres, course_codes)) #Given a course, return the basevalue of that course, eg 2024-01 is returned as 202401 def getSemester(course: Course) -> int: dates = course.sdate.split("-") @@ -369,20 +412,28 @@ def writeCSV(info:list, filename: str): # This main function is helpful for running the full parser standalone, without needing environmental variables. -def main(): +if __name__ == "__main__": options = Options() - #options.add_argument("--no-sandbox") - #options.add_argument("--disable-dev-shm-usage") - #options.add_argument("--headless") - #options.add_argument("--remote-debugging-port=9222") - driver = webdriver.Firefox() - driver.implicitly_wait(2) - login.login(driver) - start = time.time() - final = sisCourseSearch(driver, "spring2024") - end = time.time() - writeCSV(final, "test.csv") - print("Total Elapsed: " + str(end - start)) + fp = webdriver.FirefoxProfile() + # fp.set_preference("network.cookie.cookieBehavior", 2) + fp.set_preference( + "general.useragent.override", + "Mozilla/5.0 (Android 4.4; Mobile; rv:41.0) Gecko/41.0 Firefox/41.0", + ) + options.profile = fp + driver = webdriver.Firefox(options) + driver.delete_all_cookies() + try: + driver.implicitly_wait(2) + course_codes_dict = findAllSubjectCodes(driver) + login.login(driver) + start = time.time() + final = sisCourseSearch(driver, "spring2024", course_codes_dict) + end = time.time() + writeCSV(final, "test.csv") + print("Total Elapsed: " + str(end - start)) + driver.quit() + except: + driver.quit() -#main() diff --git a/src/api/app.py b/src/api/app.py index 6a7419114..ecb1c1265 100644 --- a/src/api/app.py +++ b/src/api/app.py @@ -27,11 +27,9 @@ import os from constants import Constants -""" -NOTE: on caching -on add of semester of change of data from GET -do a cache.clear() to ensure data integrity -""" +# NOTE: on caching +# on add of semester of change of data from GET +# do a cache.clear() to ensure data integrity app = FastAPI() app.add_middleware(SessionMiddleware, @@ -47,7 +45,7 @@ date_range_map = DateMapping.semester_date_mapping(db_conn) admin_info = AdminInfo.Admin(db_conn) course_select = CourseSelect.student_course_selection(db_conn) -semester_info = SemesterInfo.semester_info(db_conn) +semester_info = SemesterInfo.semester_info(db_conn, FastAPICache) professor_info = All_professors.Professor(db_conn, FastAPICache) users = UserModel.User() @@ -144,9 +142,8 @@ def set_defaultSemester(semester_set: DefaultSemesterSetPydantic): success, error = admin_info.set_semester_default(semester_set.default) if success: return Response(status_code=200) - else: - print(error) - return Response(error.__str__(), status_code=500) + print(error) + return Response(error.__str__(), status_code=500) #Parses the data from the .csv data files @app.post('/api/bulkCourseUpload') @@ -177,9 +174,8 @@ async def uploadHandler( isSuccess, error = courses.populate_from_csv(csv_file) if (isSuccess): return Response(status_code=200) - else: - print(error) - return Response(error.__str__(), status_code=500) + print(error) + return Response(error.__str__(), status_code=500) @app.post('/api/bulkProfessorUpload') async def uploadJSON( @@ -208,10 +204,15 @@ async def uploadJSON( if isSuccess: print("SUCCESS") return Response(status_code=200) - else: - print("NOT WORKING") - print(error) - return Response(error.__str__(), status_code=500) + print("NOT WORKING") + print(error) + return Response(error.__str__(), status_code=500) + +@app.delete('/api/semester/{semester_id}') +async def remove_semester(semester_id: str): + print(semester_id) + semester, error = semester_info.delete_semester(semester=semester_id) + return Response(status_code=200) if not error else Response(str(error), status_code=500) @app.post('/api/final') async def uploadHandler( @@ -246,7 +247,6 @@ async def deleteHandler(semester: str): _, error = finals.delete_by_semester(semester) return Response(error.__str__(), status_code=500) if error else Response("Delete Successful", status_code=200) - @app.post('/api/mapDateRangeToSemesterPart') async def map_date_range_to_semester_part_handler(request: Request): # This depends on date_start, date_end, and semester_part_name being @@ -267,8 +267,7 @@ async def map_date_range_to_semester_part_handler(request: Request): semester_info.upsert(semester_title, is_publicly_visible) if (not error): return Response(status_code=200) - else: - return Response(error, status_code=500) + return Response(error, status_code=500) return Response("Did not receive proper form data", status_code=500) @app.get('/api/user/course') diff --git a/src/api/db/admin.py b/src/api/db/admin.py index 32d8cd988..1dc1c6b64 100644 --- a/src/api/db/admin.py +++ b/src/api/db/admin.py @@ -5,7 +5,7 @@ def __init__(self, db_conn): self.interface_name = 'admin_info' def get_semester_default(self): - # NOTE: COALESCE takes first non-null vaue from the list + # NOTE: COALESCE takes first non-null value from the list result, error = self.db_conn.execute(""" SELECT admin.semester FROM admin_settings admin UNION ALL @@ -21,8 +21,7 @@ def get_semester_default(self): if error: return (None, error) - else: - return (default_semester, error) + return (default_semester, error) def set_semester_default(self, semester): try: @@ -40,5 +39,4 @@ def set_semester_default(self, semester): if response != None: return(True, None) - else: - return (False, error) + return (False, error) diff --git a/src/api/db/semester_info.py b/src/api/db/semester_info.py index 5d4222ee7..b01f90da8 100644 --- a/src/api/db/semester_info.py +++ b/src/api/db/semester_info.py @@ -1,7 +1,20 @@ -class semester_info: +import asyncio - def __init__(self, db_wrapper): +class semester_info: + def __init__(self, db_wrapper, cache): self.db = db_wrapper + self.cache = cache + + def clear_cache(self): + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop and loop.is_running(): + loop.create_task(self.cache.clear(namespace="API_CACHE")) + else: + asyncio.run(self.cache.clear("API_CACHE")) def upsert(self, semester, isPublic): self.db.execute(""" @@ -28,3 +41,15 @@ def is_public(self, semester): if data is not None and len(data) > 0: return data[0]['public'] return False + + def delete_semester(self, semester): + # clear cache so this semester does not come up again + self.clear_cache() + return self.db.execute(""" + BEGIN TRANSACTION; + DELETE FROM semester_info + WHERE semester=%(Semester)s; + COMMIT; + """, { + "Semester": semester + }, isSELECT=False) \ No newline at end of file diff --git a/src/api/migrations/versions/2024-11-01_semester_course_foreign_key.py b/src/api/migrations/versions/2024-11-01_semester_course_foreign_key.py new file mode 100644 index 000000000..d5be3f692 --- /dev/null +++ b/src/api/migrations/versions/2024-11-01_semester_course_foreign_key.py @@ -0,0 +1,34 @@ +"""semester course foreign key + +Revision ID: 032a3a88b6d2 +Revises: c959c263997f +Create Date: 2024-11-01 21:24:08.801806 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '032a3a88b6d2' +down_revision = 'c959c263997f' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('course', sa.Column('professor', sa.VARCHAR(length=255), nullable=True)) + op.add_column('course', sa.Column('semester_id', sa.VARCHAR(length=255), nullable=True)) + op.create_foreign_key(None, 'course', 'semester_info', ['semester_id'], ['semester'], ondelete='CASCADE') + op.drop_column('course', 'semester') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('course', sa.Column('semester', sa.VARCHAR(length=255), autoincrement=False, nullable=True)) + op.drop_constraint(None, 'course', type_='foreignkey') + op.drop_column('course', 'semester_id') + op.drop_column('course', 'professor') + # ### end Alembic commands ### diff --git a/src/api/migrations/versions/2024-11-05_semester_course_relationship.py b/src/api/migrations/versions/2024-11-05_semester_course_relationship.py new file mode 100644 index 000000000..32c011e5b --- /dev/null +++ b/src/api/migrations/versions/2024-11-05_semester_course_relationship.py @@ -0,0 +1,34 @@ +"""semester course relationship + +Revision ID: f62dd5e95906 +Revises: 032a3a88b6d2 +Create Date: 2024-11-05 21:04:15.417023 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'f62dd5e95906' +down_revision = '032a3a88b6d2' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('course', sa.Column('semester', sa.VARCHAR(length=255), nullable=True)) + op.drop_constraint('course_semester_id_fkey', 'course', type_='foreignkey') + op.create_foreign_key(None, 'course', 'semester_info', ['semester'], ['semester'], ondelete='CASCADE') + op.drop_column('course', 'semester_id') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('course', sa.Column('semester_id', sa.VARCHAR(length=255), autoincrement=False, nullable=True)) + op.drop_constraint(None, 'course', type_='foreignkey') + op.create_foreign_key('course_semester_id_fkey', 'course', 'semester_info', ['semester_id'], ['semester'], ondelete='CASCADE') + op.drop_column('course', 'semester') + # ### end Alembic commands ### diff --git a/src/api/tables/course.py b/src/api/tables/course.py index a249dfbaa..1e42b6363 100644 --- a/src/api/tables/course.py +++ b/src/api/tables/course.py @@ -1,4 +1,5 @@ -from sqlalchemy import Column +from sqlalchemy import Column, ForeignKey +from sqlalchemy.orm import relationship from sqlalchemy.dialects.postgresql import TEXT, INTEGER, VARCHAR, DATE, TSVECTOR from .database import Base @@ -8,7 +9,12 @@ class Course(Base): crn = Column(VARCHAR(length=255), primary_key=True) section = Column(VARCHAR(length=255)) - semester = Column(VARCHAR(length=255)) + + semester = Column(VARCHAR(length=255), ForeignKey("semester_info.semester", ondelete="CASCADE")) + semester_r = relationship("SemesterInfo", back_populates="courses") + + professor = Column(VARCHAR(length=255)) + min_credits = Column(INTEGER) max_credits = Column(INTEGER) date_start = Column(DATE) diff --git a/src/api/tables/semester_info.py b/src/api/tables/semester_info.py index 74d49678a..da302b663 100644 --- a/src/api/tables/semester_info.py +++ b/src/api/tables/semester_info.py @@ -1,4 +1,5 @@ -from sqlalchemy import Column, PrimaryKeyConstraint +from sqlalchemy import Column +from sqlalchemy.orm import relationship from sqlalchemy.dialects.postgresql import VARCHAR, BOOLEAN from .database import Base @@ -8,3 +9,10 @@ class SemesterInfo(Base): semester = Column(VARCHAR(length=255), primary_key=True) public = Column(BOOLEAN) + + courses = relationship( + "Course", + back_populates="semester_info", + cascade="all, delete", + passive_deletes=True, + ) \ No newline at end of file diff --git a/src/web/.gitignore b/src/web/.gitignore index b047fbdd1..18610ef23 100644 --- a/src/web/.gitignore +++ b/src/web/.gitignore @@ -3,6 +3,8 @@ node_modules /dist docs +.venv/ + # local env files .env.local .env.*.local