Skip to content

Commit

Permalink
Sports gymnastics 2024 (#198)
Browse files Browse the repository at this point in the history
* Add url checking file for gymnastics

* Fix covid model
  • Loading branch information
cjunwon authored Mar 5, 2024
1 parent 0027123 commit 3366ef8
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 5 deletions.
4 changes: 4 additions & 0 deletions _config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,9 @@ paginate_path: "/page/:num/"
# Build settings
markdown: kramdown

kramdown:
input: GFM
math_engine: mathjax

# plugins
plugins: [jekyll-paginate]
10 changes: 5 additions & 5 deletions _posts/2020-05-12-covid-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ A stochastic block model considers a set of student communities, grouped by depa
Then a matrix A defines the probabilities used to randomly assign students from each department to courses in other departments. Cell<sub>i, j</sub> of A represents the probability that a student housed in department i will take a course in department j. For this example, there's a probability of 0.7 that a sciences student will take a sciences class, a 0.1 probability they will take a business class, and a 0.2 probability they will take a humanities class.

$$
Sciences&Business&Humanities\\
0.7&0.1&0.2\\
0.1&0.8&0.1\\
0.1&0.2&0.7
Sciences&Business&Humanities\\
0.7&0.1&0.2\\
0.1&0.8&0.1\\
0.1&0.2&0.7
\end{pmatrix}$$

This example has simulated probabilities, but the real probabilities in our model are based on the number of GE, diversity and language courses in each major. So if a College of Letters and Science student in the mathematics department takes 140 units of major courses and 40 units of GEs, then the probability of the student being enrolled in the mathematics department is $$\frac{140}{180}$$, and, in the other GE-offering departments, is $$\frac{40}{180}$$, which in turn are distributed by department. So if there are three GE courses offered in total, with two of them being offered in department A and one being offered in department B, department A will have probability $$\frac{2}{3} * \frac{40}{180}$$, and department B will have probability $$\frac{1}{3} * \frac{40}{180}$$.
Expand All @@ -137,7 +137,7 @@ Sciences&Business&Humanities\\
0.7&0.1&0.2\\
0.1&0.8&0.1\\
0.1&0.2&0.7
\end{pmatrix}\\\$\$
\end{pmatrix}\$\$
This example has simulated probabilities, but the real probabilities in our model are based on the number of GE, diversity and language courses in each major. So if a College of Letters and Science student in the mathematics department takes 140 units of major courses and 40 units of GEs, then the probability of the student being enrolled in the mathematics department is $$\frac{140}{180}$$, and, in the other GE-offering departments, is $$\frac{40}{180}$$, which in turn are distributed by department. So if there are three GE courses offered in total, with two of them being offered in department A and one being offered in department B, department A will have probability $$\frac{2}{3} * \frac{40}{180}$$, and department B will have probability $$\frac{1}{3} * \frac{40}{180}$$.
Expand Down
159 changes: 159 additions & 0 deletions collaborations/sports-gymnastics-2024/gymnastics_score_crawl.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"example_url = \"https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2023/1/15/Stats.htm\"\n",
"\n",
"base_url = \"https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/\"\n",
"end_url = \"/Stats.htm\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import datetime\n",
"import time\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Create a list of all dates for each year (list in list) for the years 2015-2024. Each year is a list and date as a string in yyyy/m/d format\n",
"\n",
"years = list(range(2000,2025))\n",
"dates = []\n",
"\n",
"for year in years:\n",
" year_dates = []\n",
" for month in range(1,3):\n",
" for day in range(1,32):\n",
" try:\n",
" date = datetime.date(year, month, day)\n",
" year_dates.append(date.strftime(\"%Y/%-m/%-d\"))\n",
" except ValueError:\n",
" pass\n",
" dates.append(year_dates)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# For each year, create a list of urls for each date in the dates list\n",
"\n",
"urls = []\n",
"\n",
"for year in dates:\n",
" year_urls = []\n",
" for date in year:\n",
" year_urls.append(base_url + date + end_url)\n",
" urls.append(year_urls)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# iterate through each url and check if url exists. If it does, add to list of valid urls. If not, pass. If there is a valid url for a year, skip to the next year.\n",
"\n",
"valid_urls = []\n",
"\n",
"for year in urls:\n",
" for url in year:\n",
" try:\n",
" response = requests.get(url)\n",
" if response.status_code == 200:\n",
" valid_urls.append(url)\n",
" break\n",
" except:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2017/1/7/Stats.htm',\n",
" 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2018/1/7/Stats.htm',\n",
" 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2019/1/5/Stats.htm',\n",
" 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2020/1/5/Stats.htm',\n",
" 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2021/1/24/Stats.htm',\n",
" 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2022/1/18/Stats.htm',\n",
" 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2023/1/8/Stats.htm',\n",
" 'https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2024/1/10/Stats.htm']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"valid_urls"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# export valid urls to a txt file\n",
"\n",
"with open(\"valid_urls.txt\", \"w\") as file:\n",
" for url in valid_urls:\n",
" file.write(url + \"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
8 changes: 8 additions & 0 deletions collaborations/sports-gymnastics-2024/valid_urls.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2017/1/7/Stats.htm
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2018/1/7/Stats.htm
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2019/1/5/Stats.htm
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2020/1/5/Stats.htm
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2021/1/24/Stats.htm
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2022/1/18/Stats.htm
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2023/1/8/Stats.htm
https://s3.us-east-2.amazonaws.com/sidearm.nextgen.sites/uclabruins.com/documents/2024/1/10/Stats.htm

0 comments on commit 3366ef8

Please sign in to comment.