Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Whatsdue: fixed to work with the new JAC system #207

Merged
merged 2 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 96 additions & 121 deletions uqcsbot/utils/uq_course_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime
from dateutil import parser
from bs4 import BeautifulSoup, element
from typing import List, Dict, Optional, Literal, Tuple
from typing import Optional, Literal
from dataclasses import dataclass
import json
import re
Expand All @@ -27,23 +27,23 @@ class Offering:

CampusType = Literal["St Lucia", "Gatton", "Herston"]
# The codes used internally within UQ systems
campus_codes: Dict[CampusType, str] = {
campus_codes: dict[CampusType, str] = {
"St Lucia": "STLUC",
"Gatton": "GATTN",
"Herston": "HERST",
}

ModeType = Literal["Internal", "External", "Flexible Delivery", "Intensive"]
# The codes used internally within UQ systems
mode_codes: Dict[ModeType, str] = {
mode_codes: dict[ModeType, str] = {
"Internal": "IN",
"External": "EX",
"Flexible Delivery": "FD",
"Intensive": "IT",
}

SemesterType = Literal["1", "2", "Summer"]
semester_codes: Dict[SemesterType, int] = {"1": 1, "2": 2, "Summer": 3}
semester_codes: dict[SemesterType, int] = {"1": 1, "2": 2, "Summer": 3}

semester: SemesterType
campus: CampusType
Expand Down Expand Up @@ -111,54 +111,59 @@ def estimate_current_semester() -> SemesterType:
@dataclass
class AssessmentItem:
course_name: str
category: str
task: str
due_date: str
task_details_url: str
due_date: str # This often also contains a lot of description
weight: str

def get_parsed_due_date(self):
def get_parsed_due_date(self) -> Optional[tuple[datetime, datetime]]:
"""
Returns the parsed due date for the given assessment item as a datetime
49Indium marked this conversation as resolved.
Show resolved Hide resolved
object. If the date cannot be parsed, a DateSyntaxException is raised.
49Indium marked this conversation as resolved.
Show resolved Hide resolved
"""
if self.due_date == "Examination Period":
if self.due_date.startswith("End of Semester Exam Period"):
return get_current_exam_period()
parser_info = parser.parserinfo(dayfirst=True)
try:
# If a date range is detected, attempt to split into start and end
# dates. Else, attempt to just parse the whole thing.
if " - " in self.due_date:
start_date, end_date = self.due_date.split(" - ", 1)
start_datetime = parser.parse(start_date, parser_info)
end_datetime = parser.parse(end_date, parser_info)
return start_datetime, end_datetime
due_datetime = parser.parse(self.due_date, parser_info)
return due_datetime, due_datetime
except Exception:
raise DateSyntaxException(self.due_date, self.course_name)
potential_date_strings: list[str] = re.findall(
r"\d\d?/\d\d?/\d\d\d\d( \d\d?(:\d\d)?( [ap]m)?)?", self.due_date
)
dates: list[datetime] = []

for potential_date_string in potential_date_strings:
try:
date = parser.parse(potential_date_string, parser_info)
dates.append(date)
except Exception:
# No need to do anything if the date cannot be parsed
pass

if dates:
return min(dates), max(dates)
return None

def is_after(self, cutoff: datetime):
49Indium marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns whether the assessment occurs after the given cutoff.
49Indium marked this conversation as resolved.
Show resolved Hide resolved
"""
try:
start_datetime, end_datetime = self.get_parsed_due_date()
except DateSyntaxException:
date_range = self.get_parsed_due_date()
if date_range is None:
# If we can't parse a date, we're better off keeping it just in case.
return True
return end_datetime >= cutoff if end_datetime else start_datetime >= cutoff

_, end_datetime = date_range
return end_datetime >= cutoff

def is_before(self, cutoff: datetime):
"""
Returns whether the assessment occurs before the given cutoff.
"""
try:
start_datetime, _ = self.get_parsed_due_date()
except DateSyntaxException:
# TODO bot.logger.error(e.message)
date_range = self.get_parsed_due_date()
if date_range is None:
# If we can't parse a date, we're better off keeping it just in case.
# TODO(mitch): Keep track of these instances to attempt to accurately
# parse them in future. Will require manual detection + parsing.
return True

start_datetime, _ = date_range
return start_datetime <= cutoff

def get_weight_as_int(self) -> Optional[int]:
Expand All @@ -171,18 +176,6 @@ def get_weight_as_int(self) -> Optional[int]:
return None


class DateSyntaxException(Exception):
"""
Raised when an unparsable date syntax is encountered.
"""

def __init__(self, date: str, course_name: str):
self.message = f"Could not parse date '{date}' for course '{course_name}'."
self.date = date
self.course_name = course_name
super().__init__(self.message, self.date, self.course_name)


class CourseNotFoundException(Exception):
"""
Raised when a given course cannot be found for UQ.
Expand Down Expand Up @@ -214,15 +207,27 @@ class AssessmentNotFoundException(Exception):
Raised when the assessment table cannot be found for assess page.
"""

def __init__(self, course_names: List[str], offering: Optional[Offering] = None):
def __init__(self, course_name: str, offering: Optional[Offering] = None):
49Indium marked this conversation as resolved.
Show resolved Hide resolved
if offering is None:
self.message = (
f"Could not find the assessment table for '{', '.join(course_names)}'."
)
self.message = f"Could not find the assessment table for '{course_name}'."
else:
self.message = f"Could not find the assessment table for '{', '.join(course_names)}' during semester {offering.semester} at {offering.campus} done in mode '{offering.mode}'."
self.course_names = course_names
super().__init__(self.message, self.course_names)
self.message = f"Could not find the assessment table for '{course_name}' during semester {offering.semester} at {offering.campus} done in mode '{offering.mode}'."
self.course_name = course_name
super().__init__(self.message, self.course_name)


class AssessmentNotParseableException(Exception):
"""
Raised when the assessment cannot be parsed from the ECP.
"""

def __init__(self, course_name: str, course_profile_url: str):
self.message = (
f"Could not parse an assessment for '{course_name}': {course_profile_url}"
)
self.course_name = course_name
self.course_profile_url = course_profile_url
super().__init__(self.message, self.course_name)


class HttpException(Exception):
Expand All @@ -239,7 +244,7 @@ def __init__(self, url: str, status_code: int):


def get_uq_request(
url: str, params: Optional[Dict[str, str]] = None
url: str, params: Optional[dict[str, str]] = None
) -> requests.Response:
"""
Handles specific error handelling and header provision for requests.get to
49Indium marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -296,16 +301,6 @@ def get_course_profile_url(
return url


def get_course_profile_id(course_name: str, offering: Optional[Offering] = None) -> int:
"""
Returns the ID to the latest course profile for the given course.
"""
profile_url = get_course_profile_url(course_name, offering=offering)
# The profile url looks like this
# https://course-profiles.uq.edu.au/student_section_loader/section_1/100728
return int(profile_url[profile_url.rindex("/") + 1 :])


def get_current_exam_period():
"""
Returns the start and end datetimes for the current semester's exam period.
Expand All @@ -332,52 +327,31 @@ def get_current_exam_period():
return start_datetime, end_datetime


def get_course_assessment_page(
course_names: List[str], offering: Optional[Offering]
) -> str:
"""
Determines the course ids from the course names and returns the
url to the assessment table for the provided courses
"""
profile_ids = map(
lambda course: str(get_course_profile_id(course, offering=offering)),
course_names,
)
return BASE_ASSESSMENT_URL + ",".join(profile_ids)


def get_course_assessment(
49Indium marked this conversation as resolved.
Show resolved Hide resolved
course_names: List[str],
cutoff: Tuple[Optional[datetime], Optional[datetime]] = (None, None),
assessment_url: Optional[str] = None,
offering: Optional[Offering] = None,
) -> List[AssessmentItem]:
course_name: str,
offering: Offering,
) -> list[AssessmentItem]:
"""
Returns all the course assessment for the given
courses that occur after the given cutoff.
Returns all the assessment for the given
course that occur after the given cutoff.
49Indium marked this conversation as resolved.
Show resolved Hide resolved
"""
if assessment_url is None:
joined_assessment_url = get_course_assessment_page(course_names, offering)
else:
joined_assessment_url = assessment_url
http_response = get_uq_request(joined_assessment_url)
course_profile_url = get_course_profile_url(course_name, offering=offering)
course_assessment_url = course_profile_url + "#assessment"

http_response = get_uq_request(course_assessment_url)
if http_response.status_code != requests.codes.ok:
raise HttpException(joined_assessment_url, http_response.status_code)
raise HttpException(course_assessment_url, http_response.status_code)
html = BeautifulSoup(http_response.content, "html.parser")
assessment_table = html.find("table", class_="tblborder")

assessment_table = html.find("div", class_="assessment-summary-table")
if not isinstance(assessment_table, element.Tag):
raise AssessmentNotFoundException(course_names, offering)
raise AssessmentNotFoundException(course_name, offering)
# Start from 1st index to skip over the row containing column names.
assessment = assessment_table.findAll("tr")[1:]
parsed_assessment = map(get_parsed_assessment_item, assessment)
# If no cutoff is specified, set cutoff to UNIX epoch (i.e. filter nothing).
cutoff_min = cutoff[0] or datetime.min
cutoff_max = cutoff[1] or datetime.max
filtered_assessment = filter(
lambda item: item.is_after(cutoff_min) and item.is_before(cutoff_max),
parsed_assessment,
)
return list(filtered_assessment)
assessment_table = assessment_table.findAll("tr")[1:]
return [
get_parsed_assessment_item(row, course_name, course_profile_url)
for row in assessment_table
]


def get_element_inner_html(dom_element: element.Tag):
Expand All @@ -388,31 +362,32 @@ def get_element_inner_html(dom_element: element.Tag):


def get_parsed_assessment_item(
assessment_item_tag: element.Tag,
assessment_item_tag: element.Tag, course_name: str, course_profile_url: str
) -> AssessmentItem:
"""
Returns the parsed assessment details for the
given assessment item table row element.

Note: Because of the inconsistency of UQ assessment details, I've had to
make some fairly strict assumptions about the structure of each field.
This is likely insufficient to handle every course's
structure, and thus is subject to change.
given assessment item table row element in the ECP.
"""
course_name, task, due_date, weight = assessment_item_tag.findAll("div")
# Handles courses of the form 'CSSE1001 - Sem 1 2018 - St Lucia - Internal'.
# Thus, this bit of code will extract the course.
course_name = course_name.text.strip().split(" - ")[0]
# Handles tasks of the form 'Computer Exercise<br/>Assignment 2'.
task = get_element_inner_html(task).strip().replace("<br/>", " - ")
# Handles due dates of the form '26 Mar 18 - 27 Mar 18<br/>Held in Week 6
# Learning Lab Sessions (Monday/Tuesday)'. Thus, this bit of code will
# keep only the date portion of the field.
due_date = get_element_inner_html(due_date).strip().split("<br/>")[0]
# Handles weights of the form '30%<br/>Alternative to oral presentation'.
# Thus, this bit of code will keep only the weight portion of the field.
weight = get_element_inner_html(weight).strip().split("<br/>")[0]
return AssessmentItem(course_name, task, due_date, weight)
assessment_cells: element.ResultSet[element.Tag] = assessment_item_tag.findAll("td")
category, task, weight, due_date = assessment_cells

category = category.text.strip()

task = task.findChild("a")
if not isinstance(task, element.Tag):
raise AssessmentNotParseableException(course_name, course_profile_url)
task_description_url = course_profile_url + task.attrs["href"]
task = task.text.strip()

weight = weight.text.strip()

due_date_paragraphs: element.ResultSet[element.Tag] = due_date.findAll("p")
due_date_lines = (p.text.strip() for p in due_date_paragraphs)
due_date = "\n".join(line for line in due_date_lines if line)

return AssessmentItem(
course_name, category, task, task_description_url, due_date, weight
)


class Exam:
Expand All @@ -433,7 +408,7 @@ def get_past_exams_page_url(course_code: str) -> str:
return BASE_PAST_EXAMS_URL + course_code


def get_past_exams(course_code: str) -> List[Exam]:
def get_past_exams(course_code: str) -> list[Exam]:
"""
Takes the course code and generates each result in the format:
('year Sem X:', link)
Expand All @@ -450,7 +425,7 @@ def get_past_exams(course_code: str) -> List[Exam]:
return []
exam_list_json = exam_list_json[0]

exam_list: List[Exam] = []
exam_list: list[Exam] = []
for exam_json in exam_list_json:
year = int(exam_json[0]["examYear"])
# Semesters are given as "Sem.1", so we will change this to "Sem 1"
Expand Down
Loading