From 40b3e8949994ee5defa689f5e0a48f085ea43b94 Mon Sep 17 00:00:00 2001 From: Jake Cunninghame Date: Mon, 22 May 2023 07:55:04 -0400 Subject: [PATCH 1/7] cached data funcs, use util, lowercasing --- pages/page_2.py | 62 ++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/pages/page_2.py b/pages/page_2.py index fb4cc36..c6cf1d0 100644 --- a/pages/page_2.py +++ b/pages/page_2.py @@ -4,52 +4,52 @@ import snowflake.connector as sn from dotenv import load_dotenv import os +import util st.markdown("# A Further Look") st.sidebar.markdown("# Drilldown") -con = sn.connect( - user=os.getenv('SNOWFLAKE_USER'), - password=os.getenv('SNOWFLAKE_PASSWORD'), - account=os.getenv('SNOWFLAKE_ACCOUNT'), - warehouse=os.getenv('SNOWFLAKE_WH'), - role=os.getenv('SNOWFLAKE_ROLE') -) -cs = con.cursor() -cs.execute("""SELECT PT.*, PB.MEMBER_COUNT, PB.PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT +conn = util.connection() + +@st.cache_data +def pmpm_data(): + query = """SELECT PT.*, PB.MEMBER_COUNT, PB.PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH, COUNT(*) AS MEMBER_COUNT, SUM(PHARMACY_PAID) AS PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_BUILDER GROUP BY YEAR_MONTH) AS PB - ON PT.YEAR_MONTH = PB.YEAR_MONTH;""") - -pmpm_data = cs.fetch_pandas_all() -pmpm_data['YEAR_MONTH'] = pd.to_datetime(pmpm_data['YEAR_MONTH'], format='%Y-%m').dt.date -pmpm_data['PHARMACY_SPEND'] = pmpm_data['PHARMACY_SPEND'].astype(float) + ON PT.YEAR_MONTH = PB.YEAR_MONTH;""" + data = util.safe_to_pandas(conn, query) + data['year_month'] = pd.to_datetime(data['year_month'], format='%Y-%m').dt.date + data['pharmacy_spend'] = data['pharmacy_spend'].astype(float) + return data -cs.execute("""SELECT +@st.cache_data +def condition_data(): + query = """SELECT CONCAT(date_part(year, FIRST_DIAGNOSIS_DATE), '-', lpad(date_part(month, FIRST_DIAGNOSIS_DATE), 2, 0)) AS DIAGNOSIS_YEAR_MONTH, CONDITION, COUNT(*) AS CONDITION_CASES, AVG(LAST_DIAGNOSIS_DATE + 1 - FIRST_DIAGNOSIS_DATE) AS DIAGNOSIS_DURATION FROM TUVA_PROJECT_DEMO.CHRONIC_CONDITIONS.TUVA_CHRONIC_CONDITIONS_LONG GROUP BY 1,2 - ORDER BY 3 DESC;""") - -cond_data = cs.fetch_pandas_all() -#cond_data['DIAGNOSIS_YEAR_MONTH'] = pd.to_datetime(cond_data['DIAGNOSIS_YEAR_MONTH'], format='%Y-%m').dt.date + ORDER BY 3 DESC;""" + data = util.safe_to_pandas(conn, query) + return data +pmpm_data = pmpm_data() +cond_data = condition_data() st.markdown("### PMPM Breakdown and Pharmacy Spend Trends") start_date, end_date = st.select_slider("Select date range for claims summary", - options=pmpm_data['YEAR_MONTH'].sort_values(), - value=(pmpm_data['YEAR_MONTH'].min(), pmpm_data['YEAR_MONTH'].max())) + options=pmpm_data['year_month'].sort_values(), + value=(pmpm_data['year_month'].min(), pmpm_data['year_month'].max())) -filtered_pmpm_data = pmpm_data.loc[(pmpm_data['YEAR_MONTH'] >= start_date) & (pmpm_data['YEAR_MONTH'] <= end_date), :] +filtered_pmpm_data = pmpm_data.loc[(pmpm_data['year_month'] >= start_date) & (pmpm_data['year_month'] <= end_date), :] filtered_pmpm_data['Metric'] = 'Average PMPM' -pmpm_cats = ['INPATIENT_PMPM', 'OUTPATIENT_PMPM', 'OFFICE_VISIT_PMPM', 'ANCILLARY_PMPM', 'OTHER_PMPM'] +pmpm_cats = ['inpatient_pmpm', 'outpatient_pmpm', 'office_visit_pmpm', 'ancillary_pmpm', 'other_pmpm'] grouped_pmpm = filtered_pmpm_data.groupby(by='Metric', as_index=False)[pmpm_cats].mean() st.divider() @@ -57,22 +57,20 @@ direction='horizontal', legend='top', title='Average PMPM Broken out by Category', height=200) st.markdown("**Total Pharmacy Spend Over Claim Period**") -st.line_chart(data=filtered_pmpm_data, x='YEAR_MONTH', y='PHARMACY_SPEND') +st.line_chart(data=filtered_pmpm_data, x='year_month', y='pharmacy_spend') st.divider() st.markdown("**Top 5 Condition Diagnoses Over Claim Period**") -msk = (cond_data['DIAGNOSIS_YEAR_MONTH'] >= str(start_date)) & (cond_data['DIAGNOSIS_YEAR_MONTH'] <= str(end_date)) +msk = (cond_data['diagnosis_year_month'] >= str(start_date)) & (cond_data['diagnosis_year_month'] <= str(end_date)) filtered_cond_data = cond_data.loc[msk, :] -top5_conditions = filtered_cond_data.groupby('CONDITION')['CONDITION_CASES'].sum().nlargest(5) -msk = filtered_cond_data['CONDITION'].isin(top5_conditions.index) +top5_conditions = filtered_cond_data.groupby('condition')['condition_cases'].sum().nlargest(5) +msk = filtered_cond_data['condition'].isin(top5_conditions.index) top5_filtered_cond = filtered_cond_data.loc[msk, :] plost.line_chart(data=top5_filtered_cond, - x='DIAGNOSIS_YEAR_MONTH', - y='CONDITION_CASES', - color='CONDITION', + x='diagnosis_year_month', + y='condition_cases', + color='condition', pan_zoom=None, height=400) - -con.close() From 20896e56335757a97533757a060699c7e5ded242 Mon Sep 17 00:00:00 2001 From: Jake Cunninghame Date: Mon, 22 May 2023 08:16:50 -0400 Subject: [PATCH 2/7] blurbs --- pages/page_2.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pages/page_2.py b/pages/page_2.py index c6cf1d0..9feb9fc 100644 --- a/pages/page_2.py +++ b/pages/page_2.py @@ -6,9 +6,6 @@ import os import util -st.markdown("# A Further Look") -st.sidebar.markdown("# Drilldown") - conn = util.connection() @st.cache_data @@ -41,6 +38,11 @@ def condition_data(): pmpm_data = pmpm_data() cond_data = condition_data() +st.markdown("# A Further Look") +st.markdown( """The page below offers insight into several key metrics from the data, including PMPM, +pharmacy spend and chronic conditions""") +st.sidebar.markdown("# Drilldown") + st.markdown("### PMPM Breakdown and Pharmacy Spend Trends") start_date, end_date = st.select_slider("Select date range for claims summary", options=pmpm_data['year_month'].sort_values(), @@ -53,15 +55,19 @@ def condition_data(): grouped_pmpm = filtered_pmpm_data.groupby(by='Metric', as_index=False)[pmpm_cats].mean() st.divider() +st.markdown("""Inpatient and Outpatient spend are the largest drivers of PMPM during this time period""") plost.bar_chart(data=grouped_pmpm, bar='Metric', value=pmpm_cats, stack='normalize', - direction='horizontal', legend='top', title='Average PMPM Broken out by Category', - height=200) -st.markdown("**Total Pharmacy Spend Over Claim Period**") + direction='horizontal', legend='top', height=200) +st.markdown("### Total Pharmacy Spend Over Claim Period**") +st.markdown("""Pharmacy Spend appears largely steady between 2016 and 2018, averaging between $100k-$200k a month. +however we do see larger spikes in April 2017 and February 2018""") st.line_chart(data=filtered_pmpm_data, x='year_month', y='pharmacy_spend') st.divider() -st.markdown("**Top 5 Condition Diagnoses Over Claim Period**") +st.markdown("### Top 5 Condition Diagnoses Over Claim Period") +st.markdown("""The chart below shows trends in new cases of the top five chronic conditions during the +claims period selected.""") msk = (cond_data['diagnosis_year_month'] >= str(start_date)) & (cond_data['diagnosis_year_month'] <= str(end_date)) filtered_cond_data = cond_data.loc[msk, :] top5_conditions = filtered_cond_data.groupby('condition')['condition_cases'].sum().nlargest(5) From 6ad0c0ab6f3c6a7817d081eedffc6a3b4d61df29 Mon Sep 17 00:00:00 2001 From: Jake Cunninghame Date: Mon, 22 May 2023 08:38:29 -0400 Subject: [PATCH 3/7] cache and func treatment for data, adding some blurbs --- main_page.py | 113 +++++++++++++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/main_page.py b/main_page.py index 6a6d7a7..85f6521 100644 --- a/main_page.py +++ b/main_page.py @@ -4,40 +4,72 @@ import snowflake.connector as sn from dotenv import load_dotenv import os +import util load_dotenv() -st.markdown("# High Level Summary") -st.sidebar.markdown("# Claims Summary") - # Connect and fetch data -con = sn.connect( - user=os.getenv('SNOWFLAKE_USER'), - password=os.getenv('SNOWFLAKE_PASSWORD'), - account=os.getenv('SNOWFLAKE_ACCOUNT'), - warehouse=os.getenv('SNOWFLAKE_WH'), - role=os.getenv('SNOWFLAKE_ROLE') -) -cs = con.cursor() -cs.execute("""SELECT PT.*, PB.MEMBER_COUNT FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT +conn = util.connection() + +@st.cache_data +def summary_data(): + query = """SELECT PT.*, PB.MEMBER_COUNT FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH, COUNT(*) AS MEMBER_COUNT FROM TUVA_PROJECT_DEMO.PMPM.PMPM_BUILDER GROUP BY YEAR_MONTH) AS PB - ON PT.YEAR_MONTH = PB.YEAR_MONTH;""") + ON PT.YEAR_MONTH = PB.YEAR_MONTH;""" + + data = util.safe_to_pandas(conn, query) + data['year_month'] = pd.to_datetime(data['year_month'], format='%Y-%m').dt.date + return data + +@st.cache_data +def gender_data(): + query = """SELECT GENDER, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""" + data = util.safe_to_pandas(conn, query) + return data + +@st.cache_data +def race_data(): + query = """SELECT RACE, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""" + data = util.safe_to_pandas(conn, query) + return data -data = cs.fetch_pandas_all() -data['YEAR_MONTH'] = pd.to_datetime(data['YEAR_MONTH'], format='%Y-%m').dt.date +@st.cache_data +def age_data(): + query = """SELECT CASE + WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END + AS AGE_GROUP, + COUNT(*) AS COUNT + FROM TUVA_PROJECT_DEMO.CORE.PATIENT + GROUP BY 1 + ORDER BY 1;""" + data = util.safe_to_pandas(conn, query) + return data +data = summary_data() +demo_gender = gender_data() +demo_race = race_data() +demo_age = age_data() -st.markdown("### Summary of Claims") +st.markdown("# Summary of Claims") start_date, end_date = st.select_slider("Select date range for claims summary", - options=data['YEAR_MONTH'].sort_values(), - value=(data['YEAR_MONTH'].min(), data['YEAR_MONTH'].max())) -filtered_data = data.loc[(data['YEAR_MONTH'] >= start_date) & (data['YEAR_MONTH'] <= end_date), :] + options=data['year_month'].sort_values(), + value=(data['year_month'].min(), data['year_month'].max())) +filtered_data = data.loc[(data['year_month'] >= start_date) & (data['year_month'] <= end_date), :] + +st.markdown("### High Level Summary") +st.markdown("""At a glance, see the total medical spend and PMPM for the chosen time period. As well as a trend +graph for other important financial metrics""") +st.sidebar.markdown("# Claims Summary") # Summary Metrics -total_spend = filtered_data['MEDICAL_SPEND'].sum() -total_member_months = filtered_data['MEMBER_MONTH_COUNT'].sum() +total_spend = filtered_data['medical_spend'].sum() +total_member_months = filtered_data['member_month_count'].sum() avg_pmpm = total_spend/total_member_months col1, col2, col3 = st.columns([1.5,1,1]) @@ -46,47 +78,30 @@ col3.metric("Average PMPM", '${:,.2f}'.format(avg_pmpm)) st.divider() -y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'YEAR_MONTH']) +y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'year_month']) if y_axis: - st.line_chart(filtered_data, x='YEAR_MONTH', y=y_axis) + st.line_chart(filtered_data, x='year_month', y=y_axis) # Patient Demographic Section st.divider() -st.subheader('Patient Demographics') -st.write('Patient data static, not filtered by claims date sliders currently') - -cs.execute("""SELECT GENDER, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""") -demo_gender = cs.fetch_pandas_all() -cs.execute("""SELECT RACE, COUNT(*) AS COUNT FROM TUVA_PROJECT_DEMO.CORE.PATIENT GROUP BY 1;""") -demo_race = cs.fetch_pandas_all() -cs.execute("""SELECT CASE - WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END - AS AGE_GROUP, - COUNT(*) AS COUNT - FROM TUVA_PROJECT_DEMO.CORE.PATIENT - GROUP BY 1 - ORDER BY 1;""") -demo_age = cs.fetch_pandas_all() +st.markdown('### Patient Demographics') +st.markdown("""The patient population during this claims period was mostly `female`, `white` and largely +over the age of 65, with nearly half of patients falling into the `65-78` age group""") +st.write(' Please note that patient data is static, and not filtered by claims date sliders currently') demo_col1, demo_col2 = st.columns([1, 2]) with demo_col1: - plost.donut_chart(demo_gender, theta='COUNT', - color=dict(field='GENDER', scale=dict(range=['#F8B7CD', '#67A3D9'])), legend='left', + plost.donut_chart(demo_gender, theta='count', + color=dict(field='gender', scale=dict(range=['#F8B7CD', '#67A3D9'])), legend='left', title='Gender Breakdown') with demo_col2: plost.bar_chart( - demo_age, bar='AGE_GROUP', value='COUNT', legend=None, use_container_width=True, + demo_age, bar='age_group', value='count', legend=None, use_container_width=True, title='Counts by Age Group' ) plost.bar_chart( - demo_race, bar='RACE', value='COUNT', color='RACE', legend='bottom', use_container_width=True, - title='Counts by Race' + demo_race, bar='race', value='count', color='race', legend='bottom', use_container_width=True, + title='Counts by Race', height=400 ) - -con.close() From 6da449adb30183414b6b2e97274a9a40eb23709e Mon Sep 17 00:00:00 2001 From: Jake Cunninghame Date: Mon, 22 May 2023 08:40:59 -0400 Subject: [PATCH 4/7] pretty formating --- main_page.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main_page.py b/main_page.py index 85f6521..0b8f9f4 100644 --- a/main_page.py +++ b/main_page.py @@ -72,10 +72,10 @@ def age_data(): total_member_months = filtered_data['member_month_count'].sum() avg_pmpm = total_spend/total_member_months -col1, col2, col3 = st.columns([1.5,1,1]) -col1.metric("Total Medical Spend", '${:,.2f}'.format(total_spend)) -col2.metric("Total Member Months", total_member_months) -col3.metric("Average PMPM", '${:,.2f}'.format(avg_pmpm)) +col1, col2, col3 = st.columns([1,1,1]) +col1.metric("Total Medical Spend", '${}'.format(util.human_format(total_spend))) +col2.metric("Total Member Months", util.human_format(total_member_months)) +col3.metric("Average PMPM", '${}'.format(util.human_format(avg_pmpm))) st.divider() y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'year_month']) From a843a37c1c15a3c893aa41100ae324e9fb656775 Mon Sep 17 00:00:00 2001 From: Jake Cunninghame Date: Mon, 22 May 2023 08:43:05 -0400 Subject: [PATCH 5/7] page rename --- pages/{page_2.py => drilldown.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pages/{page_2.py => drilldown.py} (100%) diff --git a/pages/page_2.py b/pages/drilldown.py similarity index 100% rename from pages/page_2.py rename to pages/drilldown.py From cf464b7c60ce12cc1a34832e6267079c6c5738ec Mon Sep 17 00:00:00 2001 From: Jake Cunninghame Date: Mon, 22 May 2023 09:02:32 -0400 Subject: [PATCH 6/7] small errs --- pages/drilldown.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pages/drilldown.py b/pages/drilldown.py index 9feb9fc..c90debe 100644 --- a/pages/drilldown.py +++ b/pages/drilldown.py @@ -58,8 +58,8 @@ def condition_data(): st.markdown("""Inpatient and Outpatient spend are the largest drivers of PMPM during this time period""") plost.bar_chart(data=grouped_pmpm, bar='Metric', value=pmpm_cats, stack='normalize', direction='horizontal', legend='top', height=200) -st.markdown("### Total Pharmacy Spend Over Claim Period**") -st.markdown("""Pharmacy Spend appears largely steady between 2016 and 2018, averaging between $100k-$200k a month. +st.markdown("### Total Pharmacy Spend Over Claim Period") +st.markdown("""Pharmacy Spend appears largely steady between 2016 and 2018, averaging between `$100k-$200k` a month. however we do see larger spikes in April 2017 and February 2018""") st.line_chart(data=filtered_pmpm_data, x='year_month', y='pharmacy_spend') From d840f941efab68673621b0bb86816865014fcfb7 Mon Sep 17 00:00:00 2001 From: Jake Cunninghame Date: Mon, 22 May 2023 09:35:40 -0400 Subject: [PATCH 7/7] pharm spend --- main_page.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/main_page.py b/main_page.py index 0b8f9f4..635d9c1 100644 --- a/main_page.py +++ b/main_page.py @@ -13,9 +13,10 @@ @st.cache_data def summary_data(): - query = """SELECT PT.*, PB.MEMBER_COUNT FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT + query = """SELECT PT.*, PB.MEMBER_COUNT, PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_TRENDS PT LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH, - COUNT(*) AS MEMBER_COUNT + COUNT(*) AS MEMBER_COUNT, + SUM(PHARMACY_PAID) AS PHARMACY_SPEND FROM TUVA_PROJECT_DEMO.PMPM.PMPM_BUILDER GROUP BY YEAR_MONTH) AS PB ON PT.YEAR_MONTH = PB.YEAR_MONTH;""" @@ -71,11 +72,13 @@ def age_data(): total_spend = filtered_data['medical_spend'].sum() total_member_months = filtered_data['member_month_count'].sum() avg_pmpm = total_spend/total_member_months +total_pharm_spend = filtered_data['pharmacy_spend'].sum() -col1, col2, col3 = st.columns([1,1,1]) +col1, col2, col3, col4 = st.columns([1,1,1,1]) col1.metric("Total Medical Spend", '${}'.format(util.human_format(total_spend))) col2.metric("Total Member Months", util.human_format(total_member_months)) col3.metric("Average PMPM", '${}'.format(util.human_format(avg_pmpm))) +col4.metric('Total Pharmacy Spend', '${}'.format(util.human_format(total_pharm_spend))) st.divider() y_axis = st.selectbox('Select Metric for Trend Line', [x for x in data.columns if x != 'year_month'])