diff --git a/_config.yml b/_config.yml index 7c2cd888..866c6f79 100644 --- a/_config.yml +++ b/_config.yml @@ -33,5 +33,9 @@ paginate_path: "/page/:num/" # Build settings markdown: kramdown +kramdown: + input: GFM + math_engine: mathjax + # plugins plugins: [jekyll-paginate] diff --git a/_posts/2020-05-12-covid-model.md b/_posts/2020-05-12-covid-model.md index 12a66ea8..71ec6f51 100644 --- a/_posts/2020-05-12-covid-model.md +++ b/_posts/2020-05-12-covid-model.md @@ -124,10 +124,10 @@ A stochastic block model considers a set of student communities, grouped by depa Then a matrix A defines the probabilities used to randomly assign students from each department to courses in other departments. Celli, j of A represents the probability that a student housed in department i will take a course in department j. For this example, there's a probability of 0.7 that a sciences student will take a sciences class, a 0.1 probability they will take a business class, and a 0.2 probability they will take a humanities class. $$ -Sciences&Business&Humanities\\ -0.7&0.1&0.2\\ -0.1&0.8&0.1\\ -0.1&0.2&0.7 + Sciences&Business&Humanities\\ + 0.7&0.1&0.2\\ + 0.1&0.8&0.1\\ + 0.1&0.2&0.7 \end{pmatrix}$$ This example has simulated probabilities, but the real probabilities in our model are based on the number of GE, diversity and language courses in each major. So if a College of Letters and Science student in the mathematics department takes 140 units of major courses and 40 units of GEs, then the probability of the student being enrolled in the mathematics department is $$\frac{140}{180}$$, and, in the other GE-offering departments, is $$\frac{40}{180}$$, which in turn are distributed by department. So if there are three GE courses offered in total, with two of them being offered in department A and one being offered in department B, department A will have probability $$\frac{2}{3} * \frac{40}{180}$$, and department B will have probability $$\frac{1}{3} * \frac{40}{180}$$. @@ -137,7 +137,7 @@ Sciences&Business&Humanities\\ 0.7&0.1&0.2\\ 0.1&0.8&0.1\\ 0.1&0.2&0.7 -\end{pmatrix}\\\$\$ +\end{pmatrix}\$\$ This example has simulated probabilities, but the real probabilities in our model are based on the number of GE, diversity and language courses in each major. So if a College of Letters and Science student in the mathematics department takes 140 units of major courses and 40 units of GEs, then the probability of the student being enrolled in the mathematics department is $$\frac{140}{180}$$, and, in the other GE-offering departments, is $$\frac{40}{180}$$, which in turn are distributed by department. So if there are three GE courses offered in total, with two of them being offered in department A and one being offered in department B, department A will have probability $$\frac{2}{3} * \frac{40}{180}$$, and department B will have probability $$\frac{1}{3} * \frac{40}{180}$$. diff --git a/collaborations/sports-gymnastics-2024/gymnastics_score_crawl.ipynb b/collaborations/sports-gymnastics-2024/gymnastics_score_crawl.ipynb new file mode 100644 index 00000000..1196d611 --- /dev/null +++ b/collaborations/sports-gymnastics-2024/gymnastics_score_crawl.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "example_url = \"https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2023/1/15/Stats.htm\"\n", + "\n", + "base_url = \"https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/\"\n", + "end_url = \"/Stats.htm\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import datetime\n", + "import time\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a list of all dates for each year (list in list) for the years 2015-2024. Each year is a list and date as a string in yyyy/m/d format\n", + "\n", + "years = list(range(2000,2025))\n", + "dates = []\n", + "\n", + "for year in years:\n", + " year_dates = []\n", + " for month in range(1,3):\n", + " for day in range(1,32):\n", + " try:\n", + " date = datetime.date(year, month, day)\n", + " year_dates.append(date.strftime(\"%Y/%-m/%-d\"))\n", + " except ValueError:\n", + " pass\n", + " dates.append(year_dates)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# For each year, create a list of urls for each date in the dates list\n", + "\n", + "urls = []\n", + "\n", + "for year in dates:\n", + " year_urls = []\n", + " for date in year:\n", + " year_urls.append(base_url + date + end_url)\n", + " urls.append(year_urls)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# iterate through each url and check if url exists. If it does, add to list of valid urls. If not, pass. If there is a valid url for a year, skip to the next year.\n", + "\n", + "valid_urls = []\n", + "\n", + "for year in urls:\n", + " for url in year:\n", + " try:\n", + " response = requests.get(url)\n", + " if response.status_code == 200:\n", + " valid_urls.append(url)\n", + " break\n", + " except:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2017/1/7/Stats.htm',\n", + " 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2018/1/7/Stats.htm',\n", + " 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2019/1/5/Stats.htm',\n", + " 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2020/1/5/Stats.htm',\n", + " 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2021/1/24/Stats.htm',\n", + " 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2022/1/18/Stats.htm',\n", + " 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2023/1/8/Stats.htm',\n", + " 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2024/1/10/Stats.htm']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "valid_urls" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# export valid urls to a txt file\n", + "\n", + "with open(\"valid_urls.txt\", \"w\") as file:\n", + " for url in valid_urls:\n", + " file.write(url + \"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/collaborations/sports-gymnastics-2024/valid_urls.txt b/collaborations/sports-gymnastics-2024/valid_urls.txt new file mode 100644 index 00000000..1bfe0041 --- /dev/null +++ b/collaborations/sports-gymnastics-2024/valid_urls.txt @@ -0,0 +1,8 @@ +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2017/1/7/Stats.htm +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2018/1/7/Stats.htm +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2019/1/5/Stats.htm +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2020/1/5/Stats.htm +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2021/1/24/Stats.htm +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2022/1/18/Stats.htm +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2023/1/8/Stats.htm +https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2024/1/10/Stats.htm