Skip to content

Commit

Permalink
Implement get_schedule
Browse files Browse the repository at this point in the history
This should fix issue #140 (schedule tab only displays first three
classes). The new implementation of get_schedule is still a scraper
because the schedule does not seem to be exposed in the REST API, but it
is written in a cleaner and type-safe way.
  • Loading branch information
psvenk committed Jan 5, 2021
1 parent f4f9a74 commit 5c3ee6e
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 38 deletions.
31 changes: 28 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"tilt.js": "^1.2.1"
},
"devDependencies": {
"@types/jsdom": "^12.2.4",
"@types/node-fetch": "^2.5.7",
"typescript": "^4.1.2"
}
Expand Down
196 changes: 161 additions & 35 deletions src/scrape.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import fetch from "node-fetch";
import { URLSearchParams } from "url";
import { JSDOM } from "jsdom";

import type {
Session,
PDFFileInfo,
ClassInfo,
ClassDetails,
} from "./types";
import type { PDFFile, OverviewItem } from "./types-shared";

import type {
PDFFile,
OverviewItem,
Schedule,
ScheduleItem
} from "./types-shared";

// Using `import type` with an enum disallows accessing the enum variants
import { Quarter } from "./types-shared";

Expand All @@ -20,11 +28,156 @@ export async function get_student(
const academics = await get_academics(session, student_oid, quarter_oids);
const class_details = await Promise.all(academics.map(async class_info =>
get_class_details(session, class_info)));
const overview = get_overview(class_details);
const overview = assemble_overview(class_details);
return overview;
});
}

/**
* Return an array containing all PDF files
*/
export async function get_pdf_files(
username: string, password: string
): Promise<PDFFile[]> {
return await get_session(username, password, async session => {
const pdf_files = await list_pdf_files(session);
return await Promise.all(pdf_files.map(async ({ id, filename }) => ({
title: filename,
content: await download_pdf(session, id),
})));
});
}

export async function get_schedule(
username: string, password: string
): Promise<Schedule> {
return await get_session(username, password, async session => {
const current_quarter = await get_current_quarter(session);
const initial_page = await (await fetch(
"https://aspen.cpsd.us/aspen/studentScheduleContextList.do?navkey=myInfo.sch.list", {
headers: {
"Cookie": `JSESSIONID=${session.session_id}`,
},
}
)).text();
// This is a term OID that is specific to the schedule view (not the same
// as the OIDs in the output of get_quarter_oids)
const [, term_oid] = new RegExp(
String.raw`<option value="(.+)">Q${current_quarter}</option>`
).exec(initial_page) as RegExpExecArray;

const schedule_page = await (await fetch(
"https://aspen.cpsd.us/aspen/studentScheduleMatrix.do?" +
new URLSearchParams({
"navkey": "myInfo.sch.matrix",
"termOid": term_oid,
}), {
headers: {
"Cookie": `JSESSIONID=${session.session_id}`,
},
}
)).text();

const { window: { document } } = new JSDOM(schedule_page);

const rows = document.querySelectorAll(
"table[cellspacing='1'] > tbody > tr:not([class])"
);
// Get a matrix of the cells in the first three columns of the table, then
// transpose it to get a list of periods, a list of silver day classes
// (from Monday), and a list of black day classes (from Tuesday).
const [periods, silver_html, black_html] = transpose([...rows].map(row =>
[1, 2, 3].map(n =>
row.querySelector(`td:nth-child(${n})`)
?.querySelector("td, th")?.innerHTML.trim() ?? ""
)
));
const isScheduleItem =
function(x: ScheduleItem | undefined): x is ScheduleItem {
return x !== undefined;
};
const [black, silver] = [black_html, silver_html].map(arr =>
arr.map((x, i) => {
if (x) {
const lines = x.split("<br>");

// Decode HTML entities (https://stackoverflow.com/a/7394787)
const textarea = document.createElement("textarea");
const [id, name, teacher, room] = lines.map(line => {
textarea.innerHTML = line;
return textarea.value;
});
const aspenPeriod = periods[i];

return { id, name, teacher, room, aspenPeriod }
}
}).filter(isScheduleItem)
);

return { black, silver };
});
}

// https://stackoverflow.com/a/46805290
function transpose<T>(matrix: T[][]): T[][] {
return matrix[0].map((col, i) => matrix.map(row => row[i]));
}

/**
* Get the current quarter (Q1, Q2, Q3, Q4). In the absence of the OID of a
* class (which can be fetched from get_academics), this function makes a
* request to get the OID of one class.
*/
async function get_current_quarter(
session: Session, class_info?: ClassInfo
): Promise<Quarter> {
let oid: string;
if (class_info) {
({ oid } = class_info);
} else {
const { student_oid } = await get_student_info(session);
[{ oid }] = await (await fetch(
"https://aspen.cpsd.us/aspen/rest/lists/academics.classes.list?fieldSetOid=fsnX2Cls++++++&" +
new URLSearchParams({
"selectedStudent": student_oid,
"customParams": "selectedYear|current;selectedTerm|all",
}), {
headers: {
"Cookie": `JSESSIONID=${session.session_id}`,
},
}
)).json();
}

const { currentTermIndex } = await (await fetch(
`https://aspen.cpsd.us/aspen/rest/studentSchedule/${oid}/gradeTerms`, {
headers: {
"Cookie": `JSESSIONID=${session.session_id}`,
},
}
)).json();

if (currentTermIndex + 1 in Quarter) {
return currentTermIndex + 1;
} else {
return 1;
}
}

/**
* Get a list of published reports (PDF files)
*/
async function list_pdf_files({ session_id }: Session): Promise<PDFFileInfo[]> {
const pdf_files: any[] = await (await fetch(
"https://aspen.cpsd.us/aspen/rest/reports", {
headers: {
"Cookie": `JSESSIONID=${session_id}`,
},
}
)).json();
return pdf_files.filter(({ contentTypeId }) => contentTypeId == "cttPdf");
}

/**
* Get name and OID of student
*/
Expand Down Expand Up @@ -53,7 +206,7 @@ async function get_quarter_oids(
}
)).json();
for (const { gradeTermId, oid } of terms) {
const [, quarter] = /^Q(\d)$/.exec(gradeTermId) || [];
const [, quarter] = /^Q(\d)$/.exec(gradeTermId) as RegExpExecArray;
const quarter_num = parseInt(quarter) ?? -1;
if (quarter_num in Quarter) {
mapping.set(quarter_num, oid);
Expand Down Expand Up @@ -175,7 +328,7 @@ async function get_class_details(
return { attendance, categories, ...class_info };
}

function get_overview(class_details: ClassDetails[]): OverviewItem[] {
function assemble_overview(class_details: ClassDetails[]): OverviewItem[] {
return class_details.map(({
name,
grades,
Expand Down Expand Up @@ -211,35 +364,6 @@ function get_overview(class_details: ClassDetails[]): OverviewItem[] {
});
}

/**
* Return an array containing all PDF files
*/
export async function get_pdf_files(
username: string, password: string
): Promise<PDFFile[]> {
return await get_session(username, password, async session => {
const pdf_files = await list_pdf_files(session);
return await Promise.all(pdf_files.map(async ({ id, filename }) => ({
title: filename,
content: await download_pdf(session, id),
})));
});
}

/**
* Get a list of published reports (PDF files)
*/
async function list_pdf_files({ session_id }: Session): Promise<PDFFileInfo[]> {
const pdf_files: any[] = await (await fetch(
"https://aspen.cpsd.us/aspen/rest/reports", {
headers: {
"Cookie": `JSESSIONID=${session_id}`,
},
}
)).json();
return pdf_files.filter(({ contentTypeId }) => contentTypeId == "cttPdf");
}

/**
* Download a PDF file by ID (from list_pdf_files)
*/
Expand Down Expand Up @@ -267,11 +391,13 @@ async function get_session<T>(
const login_page = await (await fetch(
"https://aspen.cpsd.us/aspen/logon.do"
)).text();
const [, session_id] = /sessionId='(.+)';/.exec(login_page) || [];
const [, session_id] = /sessionId='(.+)';/.exec(
login_page
) as RegExpExecArray;
const [, apache_token] =
/name="org.apache.struts.taglib.html.TOKEN" value="(.+)"/.exec(
login_page
) || [];
) as RegExpExecArray;

// Submit login username, password, and session information
const login_response = await (await fetch(
Expand Down

0 comments on commit 5c3ee6e

Please sign in to comment.