Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jcunni/sprucing up #4

Merged
merged 7 commits into from
May 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 72 additions & 54 deletions main_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,89 +4,107 @@
import snowflake.connector as sn
from dotenv import load_dotenv
import os
import util

load_dotenv()

st.markdown("# High Level Summary")
st.sidebar.markdown("# Claims Summary")

# Connect and fetch data
con = sn.connect(
user=os.getenv('SNOWFLAKE_USER'),
password=os.getenv('SNOWFLAKE_PASSWORD'),
account=os.getenv('SNOWFLAKE_ACCOUNT'),
warehouse=os.getenv('SNOWFLAKE_WH'),
role=os.getenv('SNOWFLAKE_ROLE')
)
cs = con.cursor()
cs.execute("""SELECT PT.*, PB.MEMBER_COUNT FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT
conn = util.connection()

@st.cache_data
def summary_data():
query = """SELECT PT.*, PB.MEMBER_COUNT, PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT
LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH,
COUNT(*) AS MEMBER_COUNT
COUNT(*) AS MEMBER_COUNT,
SUM(PHARMACY_PAID) AS PHARMACY_SPEND
FROM TUVA_PROJECT_DEMO.PMPM.PMPM_BUILDER
GROUP BY YEAR_MONTH) AS PB
ON PT.YEAR_MONTH = PB.YEAR_MONTH;""")
ON PT.YEAR_MONTH = PB.YEAR_MONTH;"""

data = util.safe_to_pandas(conn, query)
data['year_month'] = pd.to_datetime(data['year_month'], format='%Y-%m').dt.date
return data

@st.cache_data
def gender_data():
query = """SELECT GENDER, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;"""
data = util.safe_to_pandas(conn, query)
return data

@st.cache_data
def race_data():
query = """SELECT RACE, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;"""
data = util.safe_to_pandas(conn, query)
return data

data = cs.fetch_pandas_all()
data['YEAR_MONTH'] = pd.to_datetime(data['YEAR_MONTH'], format='%Y-%m').dt.date
@st.cache_data
def age_data():
query = """SELECT CASE
WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END
AS AGE_GROUP,
COUNT(*) AS COUNT
FROM TUVA_PROJECT_DEMO.CORE.PATIENT
GROUP BY 1
ORDER BY 1;"""
data = util.safe_to_pandas(conn, query)
return data
data = summary_data()
demo_gender = gender_data()
demo_race = race_data()
demo_age = age_data()

st.markdown("### Summary of Claims")
st.markdown("# Summary of Claims")
start_date, end_date = st.select_slider("Select date range for claims summary",
options=data['YEAR_MONTH'].sort_values(),
value=(data['YEAR_MONTH'].min(), data['YEAR_MONTH'].max()))
filtered_data = data.loc[(data['YEAR_MONTH'] >= start_date) & (data['YEAR_MONTH'] <= end_date), :]
options=data['year_month'].sort_values(),
value=(data['year_month'].min(), data['year_month'].max()))
filtered_data = data.loc[(data['year_month'] >= start_date) & (data['year_month'] <= end_date), :]

st.markdown("### High Level Summary")
st.markdown("""At a glance, see the total medical spend and PMPM for the chosen time period. As well as a trend
graph for other important financial metrics""")
st.sidebar.markdown("# Claims Summary")

# Summary Metrics
total_spend = filtered_data['MEDICAL_SPEND'].sum()
total_member_months = filtered_data['MEMBER_MONTH_COUNT'].sum()
total_spend = filtered_data['medical_spend'].sum()
total_member_months = filtered_data['member_month_count'].sum()
avg_pmpm = total_spend/total_member_months
total_pharm_spend = filtered_data['pharmacy_spend'].sum()

col1, col2, col3 = st.columns([1.5,1,1])
col1.metric("Total Medical Spend", '${:,.2f}'.format(total_spend))
col2.metric("Total Member Months", total_member_months)
col3.metric("Average PMPM", '${:,.2f}'.format(avg_pmpm))
col1, col2, col3, col4 = st.columns([1,1,1,1])
col1.metric("Total Medical Spend", '${}'.format(util.human_format(total_spend)))
col2.metric("Total Member Months", util.human_format(total_member_months))
col3.metric("Average PMPM", '${}'.format(util.human_format(avg_pmpm)))
col4.metric('Total Pharmacy Spend', '${}'.format(util.human_format(total_pharm_spend)))

st.divider()
y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'YEAR_MONTH'])
y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'year_month'])

if y_axis:
st.line_chart(filtered_data, x='YEAR_MONTH', y=y_axis)
st.line_chart(filtered_data, x='year_month', y=y_axis)

# Patient Demographic Section
st.divider()
st.subheader('Patient Demographics')
st.write('Patient data static, not filtered by claims date sliders currently')

cs.execute("""SELECT GENDER, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""")
demo_gender = cs.fetch_pandas_all()
cs.execute("""SELECT RACE, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""")
demo_race = cs.fetch_pandas_all()
cs.execute("""SELECT CASE
WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END
AS AGE_GROUP,
COUNT(*) AS COUNT
FROM TUVA_PROJECT_DEMO.CORE.PATIENT
GROUP BY 1
ORDER BY 1;""")
demo_age = cs.fetch_pandas_all()
st.markdown('### Patient Demographics')
st.markdown("""The patient population during this claims period was mostly `female`, `white` and largely
over the age of 65, with nearly half of patients falling into the `65-78` age group""")
st.write(' Please note that patient data is static, and not filtered by claims date sliders currently')

demo_col1, demo_col2 = st.columns([1, 2])
with demo_col1:
plost.donut_chart(demo_gender, theta='COUNT',
color=dict(field='GENDER', scale=dict(range=['#F8B7CD', '#67A3D9'])), legend='left',
plost.donut_chart(demo_gender, theta='count',
color=dict(field='gender', scale=dict(range=['#F8B7CD', '#67A3D9'])), legend='left',
title='Gender Breakdown')
with demo_col2:
plost.bar_chart(
demo_age, bar='AGE_GROUP', value='COUNT', legend=None, use_container_width=True,
demo_age, bar='age_group', value='count', legend=None, use_container_width=True,
title='Counts by Age Group'
)

plost.bar_chart(
demo_race, bar='RACE', value='COUNT', color='RACE', legend='bottom', use_container_width=True,
title='Counts by Race'
demo_race, bar='race', value='count', color='race', legend='bottom', use_container_width=True,
title='Counts by Race', height=400
)

con.close()
82 changes: 82 additions & 0 deletions pages/drilldown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import streamlit as st
import plost
import pandas as pd
import snowflake.connector as sn
from dotenv import load_dotenv
import os
import util

conn = util.connection()

@st.cache_data
def pmpm_data():
query = """SELECT PT.*, PB.MEMBER_COUNT, PB.PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT
LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH,
COUNT(*) AS MEMBER_COUNT,
SUM(PHARMACY_PAID) AS PHARMACY_SPEND
FROM TUVA_PROJECT_DEMO.PMPM.PMPM_BUILDER
GROUP BY YEAR_MONTH) AS PB
ON PT.YEAR_MONTH = PB.YEAR_MONTH;"""
data = util.safe_to_pandas(conn, query)
data['year_month'] = pd.to_datetime(data['year_month'], format='%Y-%m').dt.date
data['pharmacy_spend'] = data['pharmacy_spend'].astype(float)
return data

@st.cache_data
def condition_data():
query = """SELECT
CONCAT(date_part(year, FIRST_DIAGNOSIS_DATE), '-', lpad(date_part(month, FIRST_DIAGNOSIS_DATE), 2, 0)) AS DIAGNOSIS_YEAR_MONTH,
CONDITION,
COUNT(*) AS CONDITION_CASES,
AVG(LAST_DIAGNOSIS_DATE + 1 - FIRST_DIAGNOSIS_DATE) AS DIAGNOSIS_DURATION
FROM TUVA_PROJECT_DEMO.CHRONIC_CONDITIONS.TUVA_CHRONIC_CONDITIONS_LONG
GROUP BY 1,2
ORDER BY 3 DESC;"""
data = util.safe_to_pandas(conn, query)
return data

pmpm_data = pmpm_data()
cond_data = condition_data()

st.markdown("# A Further Look")
st.markdown( """The page below offers insight into several key metrics from the data, including PMPM,
pharmacy spend and chronic conditions""")
st.sidebar.markdown("# Drilldown")

st.markdown("### PMPM Breakdown and Pharmacy Spend Trends")
start_date, end_date = st.select_slider("Select date range for claims summary",
options=pmpm_data['year_month'].sort_values(),
value=(pmpm_data['year_month'].min(), pmpm_data['year_month'].max()))

filtered_pmpm_data = pmpm_data.loc[(pmpm_data['year_month'] >= start_date) & (pmpm_data['year_month'] <= end_date), :]
filtered_pmpm_data['Metric'] = 'Average PMPM'

pmpm_cats = ['inpatient_pmpm', 'outpatient_pmpm', 'office_visit_pmpm', 'ancillary_pmpm', 'other_pmpm']
grouped_pmpm = filtered_pmpm_data.groupby(by='Metric', as_index=False)[pmpm_cats].mean()

st.divider()
st.markdown("""Inpatient and Outpatient spend are the largest drivers of PMPM during this time period""")
plost.bar_chart(data=grouped_pmpm, bar='Metric', value=pmpm_cats, stack='normalize',
direction='horizontal', legend='top', height=200)
st.markdown("### Total Pharmacy Spend Over Claim Period")
st.markdown("""Pharmacy Spend appears largely steady between 2016 and 2018, averaging between `$100k-$200k` a month.
however we do see larger spikes in April 2017 and February 2018""")
st.line_chart(data=filtered_pmpm_data, x='year_month', y='pharmacy_spend')

st.divider()

st.markdown("### Top 5 Condition Diagnoses Over Claim Period")
st.markdown("""The chart below shows trends in new cases of the top five chronic conditions during the
claims period selected.""")
msk = (cond_data['diagnosis_year_month'] >= str(start_date)) & (cond_data['diagnosis_year_month'] <= str(end_date))
filtered_cond_data = cond_data.loc[msk, :]
top5_conditions = filtered_cond_data.groupby('condition')['condition_cases'].sum().nlargest(5)
msk = filtered_cond_data['condition'].isin(top5_conditions.index)
top5_filtered_cond = filtered_cond_data.loc[msk, :]

plost.line_chart(data=top5_filtered_cond,
x='diagnosis_year_month',
y='condition_cases',
color='condition',
pan_zoom=None,
height=400)
78 changes: 0 additions & 78 deletions pages/page_2.py

This file was deleted.