Skip to content

Commit

Permalink
date filtering feature and other improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Vicbi committed Feb 13, 2024
1 parent 53132a2 commit 7abc6a8
Show file tree
Hide file tree
Showing 2 changed files with 196 additions and 119 deletions.
198 changes: 116 additions & 82 deletions SpeziDataPipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"source": [
"import firebase_admin\n",
"from firebase_admin import credentials, db, firestore\n",
"import pickle\n",
"\n",
"from helpers import *"
]
Expand All @@ -36,11 +37,13 @@
"metadata": {},
"outputs": [],
"source": [
"serviceAccountKey_file = 'path/to/serviceAccountKey.json'\n",
"serviceAccountKey_file = 'serviceAccountKey_files/cs342-2024-example-application-firebase-adminsdk-z4ske-800739b1af.json'\n",
"project_id = 'cs342-2024-example-application'\n",
"collection_name = 'users'\n",
"\n",
"cred = credentials.Certificate(serviceAccountKey_file)\n",
"firebase_admin.initialize_app(cred)\n",
"if not firebase_admin._apps:\n",
" cred = credentials.Certificate(serviceAccountKey_file)\n",
" firebase_admin.initialize_app(cred)\n",
"\n",
"db = firestore.client()"
]
Expand Down Expand Up @@ -68,16 +71,119 @@
"metadata": {},
"outputs": [],
"source": [
"users = export_users_to_csv(db, 'users','users_summary.csv')\n",
"users = export_users_to_csv(db, collection_name, 'users_summary.csv')\n",
"users.head()"
]
},
{
"cell_type": "markdown",
"id": "e51e6fb4-6b38-43bf-854d-8ea0b1285e65",
"metadata": {},
"source": [
"## Explore Data Types in the Database"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf179c80-22fd-431d-ba9e-8af738f42eb5",
"metadata": {},
"outputs": [],
"source": [
"unique_loinc_codes, display_to_loinc_dict = get_unique_codes_and_displays(db, collection_name)\n",
"print(display_to_loinc_dict)"
]
},
{
"cell_type": "markdown",
"id": "c0324030-2777-44bf-942c-92bf00b59b7a",
"metadata": {},
"source": [
"# Step 4: Analyze data"
]
},
{
"cell_type": "markdown",
"id": "fadff7a1-c34e-4c11-b0bc-1cab5e9a7a29",
"metadata": {},
"source": [
"## Fetch and flatten for selected code for all documents (users) in the \"users\" collection"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76962068-a60f-4587-8cca-512ff89e244c",
"metadata": {},
"outputs": [],
"source": [
"selected_code = '9052-2'\n",
"\n",
"flattened_df = fetch_and_flatten_data(db, collection_name, selected_code) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b2afd88-10fc-4213-b267-773cca6e5ef6",
"metadata": {},
"outputs": [],
"source": [
"flattened_df.head()"
]
},
{
"cell_type": "markdown",
"id": "c5d5d3ec-0100-4305-9a59-779302e15706",
"metadata": {},
"source": [
"## Filter data by removing values outside a predefined range/threshold"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e627901c-f465-4dbd-945e-35ace2db24af",
"metadata": {},
"outputs": [],
"source": [
"filtered_df = remove_outliers(flattened_df) "
]
},
{
"cell_type": "markdown",
"id": "ab01cf7b-623d-4f56-857f-bb16e4973926",
"metadata": {},
"source": [
"## Aggregate data by date for each user"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3fca1d1-7540-4b8b-a6ba-1e75a2e80d62",
"metadata": {},
"outputs": [],
"source": [
"daily_df = calculate_daily_data(filtered_df, save_as_csv=True) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a68ba836-3f77-4a66-90f6-ec781aad13ea",
"metadata": {},
"outputs": [],
"source": [
"daily_df.head()"
]
},
{
"cell_type": "markdown",
"id": "a814801f-de36-4b23-b753-3ee7ff0a68fb",
"metadata": {},
"source": [
"# Filter data by dates"
"# Single-code line option: analyze_data() func"
]
},
{
Expand All @@ -87,8 +193,7 @@
"metadata": {},
"outputs": [],
"source": [
"date1 = \"2022-02-01\"; date2 = \"2023-07-28\"\n",
"flattened_data, filtered_data, daily_data = analyze_data(db, 'users', date1=None, date2=None, save_as_csv=True)"
"flattened_data, filtered_data, daily_data = analyze_data(db, collection_name)"
]
},
{
Expand All @@ -101,83 +206,12 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "5a09ab04-4c93-44dd-b95c-1b825f645b58",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d1b21056df864faea29932a8379917b9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Dropdown(description='Data type:', options=('Daily Step Count', 'Daily Dietary Protein', 'Daily Dietary Energy…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c76bbe99de8b4dcfb6fa7d3ee773b68e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Dropdown(description='User:', options=(), value=None)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "339316e94a8b4fdcad94e895bd1eda9d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"IntSlider(value=0, continuous_update=False, description='Min value:', max=1000)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8475d49655844ad6952c5d040e0df352",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"IntSlider(value=780, continuous_update=False, description='Max value:', max=5000)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3053bee68a3144bb9fce9966c7d33a8d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"vizualize_data(daily_data, date1, date2)"
"outputs": [],
"source": [
"vizualize_data(daily_data)"
]
}
],
Expand Down
Loading

0 comments on commit 7abc6a8

Please sign in to comment.