Skip to content

Commit

Permalink
Merge pull request #4 from jcunninghame/jcunni/sprucing-up
Browse files Browse the repository at this point in the history
Jcunni/sprucing up
  • Loading branch information
jcunninghame authored May 22, 2023
2 parents 84bbbcc + d840f94 commit bcc3085
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 132 deletions.
126 changes: 72 additions & 54 deletions main_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,89 +4,107 @@
import snowflake.connector as sn
from dotenv import load_dotenv
import os
import util

load_dotenv()

st.markdown("# High Level Summary")
st.sidebar.markdown("# Claims Summary")

# Connect and fetch data
con = sn.connect(
user=os.getenv('SNOWFLAKE_USER'),
password=os.getenv('SNOWFLAKE_PASSWORD'),
account=os.getenv('SNOWFLAKE_ACCOUNT'),
warehouse=os.getenv('SNOWFLAKE_WH'),
role=os.getenv('SNOWFLAKE_ROLE')
)
cs = con.cursor()
cs.execute("""SELECT PT.*, PB.MEMBER_COUNT FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT
conn = util.connection()

@st.cache_data
def summary_data():
query = """SELECT PT.*, PB.MEMBER_COUNT, PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT
LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH,
COUNT(*) AS MEMBER_COUNT
COUNT(*) AS MEMBER_COUNT,
SUM(PHARMACY_PAID) AS PHARMACY_SPEND
FROM TUVA_PROJECT_DEMO.PMPM.PMPM_BUILDER
GROUP BY YEAR_MONTH) AS PB
ON PT.YEAR_MONTH = PB.YEAR_MONTH;""")
ON PT.YEAR_MONTH = PB.YEAR_MONTH;"""

data = util.safe_to_pandas(conn, query)
data['year_month'] = pd.to_datetime(data['year_month'], format='%Y-%m').dt.date
return data

@st.cache_data
def gender_data():
query = """SELECT GENDER, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;"""
data = util.safe_to_pandas(conn, query)
return data

@st.cache_data
def race_data():
query = """SELECT RACE, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;"""
data = util.safe_to_pandas(conn, query)
return data

data = cs.fetch_pandas_all()
data['YEAR_MONTH'] = pd.to_datetime(data['YEAR_MONTH'], format='%Y-%m').dt.date
@st.cache_data
def age_data():
query = """SELECT CASE
WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END
AS AGE_GROUP,
COUNT(*) AS COUNT
FROM TUVA_PROJECT_DEMO.CORE.PATIENT
GROUP BY 1
ORDER BY 1;"""
data = util.safe_to_pandas(conn, query)
return data
data = summary_data()
demo_gender = gender_data()
demo_race = race_data()
demo_age = age_data()

st.markdown("### Summary of Claims")
st.markdown("# Summary of Claims")
start_date, end_date = st.select_slider("Select date range for claims summary",
options=data['YEAR_MONTH'].sort_values(),
value=(data['YEAR_MONTH'].min(), data['YEAR_MONTH'].max()))
filtered_data = data.loc[(data['YEAR_MONTH'] >= start_date) & (data['YEAR_MONTH'] <= end_date), :]
options=data['year_month'].sort_values(),
value=(data['year_month'].min(), data['year_month'].max()))
filtered_data = data.loc[(data['year_month'] >= start_date) & (data['year_month'] <= end_date), :]

st.markdown("### High Level Summary")
st.markdown("""At a glance, see the total medical spend and PMPM for the chosen time period. As well as a trend
graph for other important financial metrics""")
st.sidebar.markdown("# Claims Summary")

# Summary Metrics
total_spend = filtered_data['MEDICAL_SPEND'].sum()
total_member_months = filtered_data['MEMBER_MONTH_COUNT'].sum()
total_spend = filtered_data['medical_spend'].sum()
total_member_months = filtered_data['member_month_count'].sum()
avg_pmpm = total_spend/total_member_months
total_pharm_spend = filtered_data['pharmacy_spend'].sum()

col1, col2, col3 = st.columns([1.5,1,1])
col1.metric("Total Medical Spend", '${:,.2f}'.format(total_spend))
col2.metric("Total Member Months", total_member_months)
col3.metric("Average PMPM", '${:,.2f}'.format(avg_pmpm))
col1, col2, col3, col4 = st.columns([1,1,1,1])
col1.metric("Total Medical Spend", '${}'.format(util.human_format(total_spend)))
col2.metric("Total Member Months", util.human_format(total_member_months))
col3.metric("Average PMPM", '${}'.format(util.human_format(avg_pmpm)))
col4.metric('Total Pharmacy Spend', '${}'.format(util.human_format(total_pharm_spend)))

st.divider()
y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'YEAR_MONTH'])
y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'year_month'])

if y_axis:
st.line_chart(filtered_data, x='YEAR_MONTH', y=y_axis)
st.line_chart(filtered_data, x='year_month', y=y_axis)

# Patient Demographic Section
st.divider()
st.subheader('Patient Demographics')
st.write('Patient data static, not filtered by claims date sliders currently')

cs.execute("""SELECT GENDER, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""")
demo_gender = cs.fetch_pandas_all()
cs.execute("""SELECT RACE, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""")
demo_race = cs.fetch_pandas_all()
cs.execute("""SELECT CASE
WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98'
WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END
AS AGE_GROUP,
COUNT(*) AS COUNT
FROM TUVA_PROJECT_DEMO.CORE.PATIENT
GROUP BY 1
ORDER BY 1;""")
demo_age = cs.fetch_pandas_all()
st.markdown('### Patient Demographics')
st.markdown("""The patient population during this claims period was mostly `female`, `white` and largely
over the age of 65, with nearly half of patients falling into the `65-78` age group""")
st.write(' Please note that patient data is static, and not filtered by claims date sliders currently')

demo_col1, demo_col2 = st.columns([1, 2])
with demo_col1:
plost.donut_chart(demo_gender, theta='COUNT',
color=dict(field='GENDER', scale=dict(range=['#F8B7CD', '#67A3D9'])), legend='left',
plost.donut_chart(demo_gender, theta='count',
color=dict(field='gender', scale=dict(range=['#F8B7CD', '#67A3D9'])), legend='left',
title='Gender Breakdown')
with demo_col2:
plost.bar_chart(
demo_age, bar='AGE_GROUP', value='COUNT', legend=None, use_container_width=True,
demo_age, bar='age_group', value='count', legend=None, use_container_width=True,
title='Counts by Age Group'
)

plost.bar_chart(
demo_race, bar='RACE', value='COUNT', color='RACE', legend='bottom', use_container_width=True,
title='Counts by Race'
demo_race, bar='race', value='count', color='race', legend='bottom', use_container_width=True,
title='Counts by Race', height=400
)

con.close()
82 changes: 82 additions & 0 deletions pages/drilldown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import streamlit as st
import plost
import pandas as pd
import snowflake.connector as sn
from dotenv import load_dotenv
import os
import util

conn = util.connection()

@st.cache_data
def pmpm_data():
query = """SELECT PT.*, PB.MEMBER_COUNT, PB.PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT
LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH,
COUNT(*) AS MEMBER_COUNT,
SUM(PHARMACY_PAID) AS PHARMACY_SPEND
FROM TUVA_PROJECT_DEMO.PMPM.PMPM_BUILDER
GROUP BY YEAR_MONTH) AS PB
ON PT.YEAR_MONTH = PB.YEAR_MONTH;"""
data = util.safe_to_pandas(conn, query)
data['year_month'] = pd.to_datetime(data['year_month'], format='%Y-%m').dt.date
data['pharmacy_spend'] = data['pharmacy_spend'].astype(float)
return data

@st.cache_data
def condition_data():
query = """SELECT
CONCAT(date_part(year, FIRST_DIAGNOSIS_DATE), '-', lpad(date_part(month, FIRST_DIAGNOSIS_DATE), 2, 0)) AS DIAGNOSIS_YEAR_MONTH,
CONDITION,
COUNT(*) AS CONDITION_CASES,
AVG(LAST_DIAGNOSIS_DATE + 1 - FIRST_DIAGNOSIS_DATE) AS DIAGNOSIS_DURATION
FROM TUVA_PROJECT_DEMO.CHRONIC_CONDITIONS.TUVA_CHRONIC_CONDITIONS_LONG
GROUP BY 1,2
ORDER BY 3 DESC;"""
data = util.safe_to_pandas(conn, query)
return data

pmpm_data = pmpm_data()
cond_data = condition_data()

st.markdown("# A Further Look")
st.markdown( """The page below offers insight into several key metrics from the data, including PMPM,
pharmacy spend and chronic conditions""")
st.sidebar.markdown("# Drilldown")

st.markdown("### PMPM Breakdown and Pharmacy Spend Trends")
start_date, end_date = st.select_slider("Select date range for claims summary",
options=pmpm_data['year_month'].sort_values(),
value=(pmpm_data['year_month'].min(), pmpm_data['year_month'].max()))

filtered_pmpm_data = pmpm_data.loc[(pmpm_data['year_month'] >= start_date) & (pmpm_data['year_month'] <= end_date), :]
filtered_pmpm_data['Metric'] = 'Average PMPM'

pmpm_cats = ['inpatient_pmpm', 'outpatient_pmpm', 'office_visit_pmpm', 'ancillary_pmpm', 'other_pmpm']
grouped_pmpm = filtered_pmpm_data.groupby(by='Metric', as_index=False)[pmpm_cats].mean()

st.divider()
st.markdown("""Inpatient and Outpatient spend are the largest drivers of PMPM during this time period""")
plost.bar_chart(data=grouped_pmpm, bar='Metric', value=pmpm_cats, stack='normalize',
direction='horizontal', legend='top', height=200)
st.markdown("### Total Pharmacy Spend Over Claim Period")
st.markdown("""Pharmacy Spend appears largely steady between 2016 and 2018, averaging between `$100k-$200k` a month.
however we do see larger spikes in April 2017 and February 2018""")
st.line_chart(data=filtered_pmpm_data, x='year_month', y='pharmacy_spend')

st.divider()

st.markdown("### Top 5 Condition Diagnoses Over Claim Period")
st.markdown("""The chart below shows trends in new cases of the top five chronic conditions during the
claims period selected.""")
msk = (cond_data['diagnosis_year_month'] >= str(start_date)) & (cond_data['diagnosis_year_month'] <= str(end_date))
filtered_cond_data = cond_data.loc[msk, :]
top5_conditions = filtered_cond_data.groupby('condition')['condition_cases'].sum().nlargest(5)
msk = filtered_cond_data['condition'].isin(top5_conditions.index)
top5_filtered_cond = filtered_cond_data.loc[msk, :]

plost.line_chart(data=top5_filtered_cond,
x='diagnosis_year_month',
y='condition_cases',
color='condition',
pan_zoom=None,
height=400)
78 changes: 0 additions & 78 deletions pages/page_2.py

This file was deleted.

0 comments on commit bcc3085

Please sign in to comment.