diff --git a/data.py b/data.py new file mode 100644 index 0000000..178c4fb --- /dev/null +++ b/data.py @@ -0,0 +1,307 @@ +import streamlit as st +import pandas as pd +import util + +conn = util.connection(database="dev_lipsa") + + +@st.cache_data +def test_results(): + query = """ + select * from data_profiling.test_result + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def use_case(): + query = """ + select * from data_profiling.use_case + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def cost_summary(): + query = """ + select * + from dbt_lipsa.cost_summary + order by 1, 2, 3 + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def year_months(): + query = """ + select distinct + year(claim_end_date)::text || '-' || + lpad(month(claim_end_date)::text, 2, '0') + as year_month + , sum(paid_amount) + from core.medical_claim + group by 1 + having sum(paid_amount) > 10 + order by 1 + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def summary_stats(): + query = """ + with medical as ( + select distinct + year(claim_end_date)::text year + , sum(paid_amount) as medical_paid_amount + from core.medical_claim + group by 1 + ) + , pharmacy as ( + select + year(dispensing_date)::text year + , sum(paid_amount) as pharmacy_paid_amount + from core.pharmacy_claim + group by 1 + ), elig as ( + select + substr(year_month, 0, 4) as year + , sum(member_month_count) as member_month_count + from pmpm._int_member_month_count + group by 1 + ) + select + year + , lag(year) over(order by year) as prior_year + , medical_paid_amount + pharmacy_paid_amount as current_period_total_paid + , lag(medical_paid_amount + pharmacy_paid_amount) + over(order by year) as prior_period_total_paid + , div0null( + medical_paid_amount + pharmacy_paid_amount + - lag(medical_paid_amount + pharmacy_paid_amount) over(order by year), + lag(medical_paid_amount + pharmacy_paid_amount) over(order by year) + ) as pct_change_total_paid + , medical_paid_amount as current_period_medical_paid + , lag(medical_paid_amount) over(order by year) as prior_period_medical_paid + , div0null( + medical_paid_amount - lag(medical_paid_amount) over(order by year), + lag(medical_paid_amount) over(order by year) + ) as pct_change_medical_paid + , pharmacy_paid_amount as current_period_pharmacy_paid + , lag(pharmacy_paid_amount) over(order by year) as prior_period_pharmacy_paid + , div0null( + pharmacy_paid_amount - lag(pharmacy_paid_amount) over(order by year), + lag(pharmacy_paid_amount) over(order by year) + ) as pct_change_pharmacy_paid + , member_month_count as current_period_member_months + , lag(member_month_count) over(order by year) as prior_period_member_months + , div0null( + member_month_count - lag(member_month_count) over(order by year), + lag(member_month_count) over(order by year) + ) as pct_change_member_months + from medical + join pharmacy using(year) + join elig using(year) + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def pmpm_by_claim_type(): + query = """ + with spend_summary as ( + select + year(claim_end_date)::text || '-' || + lpad(month(claim_end_date)::text, 2, '0') + as year_month + , claim_type + , sum(paid_amount) as paid_amount_sum + from core.medical_claim + group by 1, 2 + having sum(paid_amount) > 0 + order by 1, 2 desc + ), pharmacy_summary as ( + select + year(dispensing_date)::text || '-' || + lpad(month(dispensing_date)::text, 2, '0') + as year_month + , 'pharmacy' as claim_type + , sum(paid_amount) as paid_amount_sum + from core.pharmacy_claim + group by 1 + ), together as ( + select * from spend_summary union all + select * from pharmacy_summary + ) + select + * + , paid_amount_sum / member_month_count as paid_amount_pmpm + from together + join pmpm._int_member_month_count using(year_month) + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def pmpm_by_service_category_1(): + query = """ + with spend_summary as ( + select + year(claim_end_date)::text || '-' || + lpad(month(claim_end_date)::text, 2, '0') + as year_month + , service_category_1 + , sum(paid_amount) as paid_amount_sum + from core.medical_claim + group by 1, 2 + having sum(paid_amount) > 0 + order by 1, 2 desc + ) + select + * + , paid_amount_sum / member_month_count as paid_amount_pmpm + from spend_summary + join pmpm._int_member_month_count using(year_month) + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def pmpm_by_service_category_1_2(): + query = """ + with spend_summary as ( + select + year(claim_end_date)::text || '-' || + lpad(month(claim_end_date)::text, 2, '0') + as year_month + , service_category_1 + , service_category_2 + , sum(paid_amount) as paid_amount_sum + from core.medical_claim + group by 1, 2, 3 + having sum(paid_amount) > 0 + order by 1, 2, 3 desc + ) + select + * + , paid_amount_sum / member_month_count as paid_amount_pmpm + from spend_summary + join pmpm._int_member_month_count using(year_month) + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def pmpm_data(): + query = """SELECT PT.*, PB.MEMBER_COUNT, PHARMACY_SPEND FROM PMPM.PMPM_TRENDS PT + LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH, + COUNT(*) AS MEMBER_COUNT, + SUM(PHARMACY_PAID) AS PHARMACY_SPEND + FROM PMPM.PMPM_BUILDER + GROUP BY YEAR_MONTH) AS PB + ON PT.YEAR_MONTH = PB.YEAR_MONTH;""" + + data = util.safe_to_pandas(conn, query) + data["year_month"] = pd.to_datetime(data["year_month"], format="%Y-%m").dt.date + data["year"] = pd.to_datetime(data["year_month"], format="%Y-%m").dt.year + data["pharmacy_spend"] = data["pharmacy_spend"].astype(float) + + return data + + +@st.cache_data +def gender_data(): + query = """SELECT GENDER, COUNT(*) AS COUNT FROM CORE.PATIENT GROUP BY 1;""" + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def race_data(): + query = """SELECT RACE, COUNT(*) AS COUNT FROM CORE.PATIENT GROUP BY 1;""" + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def age_data(): + query = """SELECT CASE + WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98' + WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END + AS AGE_GROUP, + COUNT(*) AS COUNT + FROM CORE.PATIENT + GROUP BY 1 + ORDER BY 1;""" + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def pmpm_by_chronic_condition(): + query = """ + with conditions as ( + select distinct + year(condition_date)::text || '-' || lpad(month(condition_date)::text, 2, '0') as year_month + , claim_id + , patient_id + , code + , condition + , condition_family + from core.condition + inner join chronic_conditions._value_set_tuva_chronic_conditions_hierarchy vs on condition.code = vs.icd_10_cm_code + where code_type = 'icd-10-cm' + ) + , medical_spend as ( + select + year(claim_start_date)::text || '-' || lpad(month(claim_start_date)::text, 2, '0') as year_month + , claim_id + , patient_id + , sum(paid_amount) as medical_paid_amount + from core.medical_claim + group by 1, 2, 3 + ), merged as ( + select + year_month + , condition_family + , sum(medical_paid_amount) as medical_paid_amount_sum + from conditions + join medical_spend using(patient_id, claim_id, year_month) + group by 1, 2 + ) + select + * + from merged + join pmpm._int_member_month_count using(year_month) + order by 2, 1 + """ + data = util.safe_to_pandas(conn, query) + return data + + +@st.cache_data +def condition_data(): + query = """SELECT + CONCAT(date_part(year, FIRST_DIAGNOSIS_DATE), '-', lpad(date_part(month, FIRST_DIAGNOSIS_DATE), 2, 0)) AS DIAGNOSIS_YEAR_MONTH, + CONDITION, + COUNT(*) AS CONDITION_CASES, + AVG(LAST_DIAGNOSIS_DATE + 1 - FIRST_DIAGNOSIS_DATE) AS DIAGNOSIS_DURATION + FROM CHRONIC_CONDITIONS.TUVA_CHRONIC_CONDITIONS_LONG + GROUP BY 1,2 + ORDER BY 3 DESC;""" + data = util.safe_to_pandas(conn, query) + data["diagnosis_year"] = pd.to_datetime( + data["diagnosis_year_month"] + ).dt.year.astype(str) + return data diff --git a/main_page.py b/main_page.py index 64e620f..37f03f7 100644 --- a/main_page.py +++ b/main_page.py @@ -1,132 +1,25 @@ import streamlit as st -import pandas as pd import plost -import util import components as comp +import data - -# Connect and fetch data -conn = util.connection(database="dev_lipsa") - - -@st.cache_data -def summary_stats(): - query = """ - with medical as ( - select distinct - year(claim_end_date)::text year - , sum(paid_amount) as medical_paid_amount - from core.medical_claim - group by 1 - ) - , pharmacy as ( - select - year(dispensing_date)::text year - , sum(paid_amount) as pharmacy_paid_amount - from core.pharmacy_claim - group by 1 - ), elig as ( - select - substr(year_month, 0, 4) as year - , sum(member_month_count) as member_month_count - from pmpm._int_member_month_count - group by 1 - ) - select - year - , lag(year) over(order by year) as prior_year - , medical_paid_amount as current_period_medical_paid - , lag(medical_paid_amount) over(order by year) as prior_period_medical_paid - , div0null( - medical_paid_amount - lag(medical_paid_amount) over(order by year), - lag(medical_paid_amount) over(order by year) - ) as pct_change_medical_paid - , pharmacy_paid_amount as current_period_pharmacy_paid - , lag(pharmacy_paid_amount) over(order by year) as prior_period_pharmacy_paid - , div0null( - pharmacy_paid_amount - lag(pharmacy_paid_amount) over(order by year), - lag(pharmacy_paid_amount) over(order by year) - ) as pct_change_pharmacy_paid - , member_month_count as current_period_member_months - , lag(member_month_count) over(order by year) as prior_period_member_months - , div0null( - member_month_count - lag(member_month_count) over(order by year), - lag(member_month_count) over(order by year) - ) as pct_change_member_months - from medical - join pharmacy using(year) - join elig using(year) - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def pmpm_data(): - query = """SELECT PT.*, PB.MEMBER_COUNT, PHARMACY_SPEND FROM PMPM.PMPM_TRENDS PT - LEFT JOIN (SELECT CONCAT(LEFT(YEAR_MONTH, 4), '-', RIGHT(YEAR_MONTH, 2)) AS YEAR_MONTH, - COUNT(*) AS MEMBER_COUNT, - SUM(PHARMACY_PAID) AS PHARMACY_SPEND - FROM PMPM.PMPM_BUILDER - GROUP BY YEAR_MONTH) AS PB - ON PT.YEAR_MONTH = PB.YEAR_MONTH;""" - - data = util.safe_to_pandas(conn, query) - data["year_month"] = pd.to_datetime(data["year_month"], format="%Y-%m").dt.date - data["year"] = pd.to_datetime(data["year_month"], format="%Y-%m").dt.year - data["pharmacy_spend"] = data["pharmacy_spend"].astype(float) - - return data - - -@st.cache_data -def gender_data(): - query = """SELECT GENDER, COUNT(*) AS COUNT FROM CORE.PATIENT GROUP BY 1;""" - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def race_data(): - query = """SELECT RACE, COUNT(*) AS COUNT FROM CORE.PATIENT GROUP BY 1;""" - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def age_data(): - query = """SELECT CASE - WHEN div0(current_date() - BIRTH_DATE, 365) < 49 THEN '34-48' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 49 AND div0(current_date() - BIRTH_DATE, 365) < 65 THEN '49-64' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 65 AND div0(current_date() - BIRTH_DATE, 365) < 79 THEN '65-78' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 79 AND div0(current_date() - BIRTH_DATE, 365) < 99 THEN '79-98' - WHEN div0(current_date() - BIRTH_DATE, 365) >= 99 THEN '99+' END - AS AGE_GROUP, - COUNT(*) AS COUNT - FROM CORE.PATIENT - GROUP BY 1 - ORDER BY 1;""" - data = util.safe_to_pandas(conn, query) - return data - - -cost_data = summary_stats() -data = pmpm_data() -demo_gender = gender_data() -demo_race = race_data() -demo_age = age_data() +cost_data = data.summary_stats() +pmpm_data = data.pmpm_data() +demo_gender = data.gender_data() +demo_race = data.race_data() +demo_age = data.age_data() st.markdown("# Summary of Claims") start_year, end_year = st.select_slider( "Select date range for claims summary", - options=sorted(list(set(data["year"]))), - value=(data["year"].min(), data["year"].max()), + options=sorted(list(set(pmpm_data["year"]))), + value=(pmpm_data["year"].min(), pmpm_data["year"].max()), ) filtered_cost_data = cost_data.loc[ (cost_data["year"] >= str(start_year)) & (cost_data["year"] <= str(end_year)), : ] -filtered_pmpm_data = data.loc[ - (data["year"] >= start_year) & (data["year"] <= end_year), : +filtered_pmpm_data = pmpm_data.loc[ + (pmpm_data["year"] >= start_year) & (pmpm_data["year"] <= end_year), : ] @@ -140,7 +33,7 @@ def age_data(): st.divider() y_axis = st.selectbox( - "Select Metric for Trend Line", [x for x in data.columns if "year" not in x] + "Select Metric for Trend Line", [x for x in pmpm_data.columns if "year" not in x] ) if y_axis: diff --git a/pages/02_financial_summary.py b/pages/02_financial_summary.py index 9054b05..37077d8 100644 --- a/pages/02_financial_summary.py +++ b/pages/02_financial_summary.py @@ -3,206 +3,11 @@ import plost import util import components as comp +import data from streamlit_echarts import st_echarts import time import pandas as pd -conn = util.connection(database="dev_lipsa") - - -@st.cache_data -def test_results(): - query = """ - select * from data_profiling.test_result - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def use_case(): - query = """ - select * from data_profiling.use_case - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def cost_summary(): - query = """ - select * - from dbt_lipsa.cost_summary - order by 1, 2, 3 - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def year_months(): - query = """ - select distinct - year(claim_end_date)::text || '-' || - lpad(month(claim_end_date)::text, 2, '0') - as year_month - , sum(paid_amount) - from core.medical_claim - group by 1 - having sum(paid_amount) > 10 - order by 1 - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def summary_stats(): - query = """ - with medical as ( - select distinct - year(claim_end_date)::text year - , sum(paid_amount) as medical_paid_amount - from core.medical_claim - group by 1 - ) - , pharmacy as ( - select - year(dispensing_date)::text year - , sum(paid_amount) as pharmacy_paid_amount - from core.pharmacy_claim - group by 1 - ), elig as ( - select - substr(year_month, 0, 4) as year - , sum(member_month_count) as member_month_count - from pmpm._int_member_month_count - group by 1 - ) - select - year - , lag(year) over(order by year) as prior_year - , medical_paid_amount + pharmacy_paid_amount as current_period_total_paid - , lag(medical_paid_amount + pharmacy_paid_amount) - over(order by year) as prior_period_total_paid - , div0null( - medical_paid_amount + pharmacy_paid_amount - - lag(medical_paid_amount + pharmacy_paid_amount) over(order by year), - lag(medical_paid_amount + pharmacy_paid_amount) over(order by year) - ) as pct_change_total_paid - , medical_paid_amount as current_period_medical_paid - , lag(medical_paid_amount) over(order by year) as prior_period_medical_paid - , div0null( - medical_paid_amount - lag(medical_paid_amount) over(order by year), - lag(medical_paid_amount) over(order by year) - ) as pct_change_medical_paid - , pharmacy_paid_amount as current_period_pharmacy_paid - , lag(pharmacy_paid_amount) over(order by year) as prior_period_pharmacy_paid - , div0null( - pharmacy_paid_amount - lag(pharmacy_paid_amount) over(order by year), - lag(pharmacy_paid_amount) over(order by year) - ) as pct_change_pharmacy_paid - , member_month_count as current_period_member_months - , lag(member_month_count) over(order by year) as prior_period_member_months - , div0null( - member_month_count - lag(member_month_count) over(order by year), - lag(member_month_count) over(order by year) - ) as pct_change_member_months - from medical - join pharmacy using(year) - join elig using(year) - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def pmpm_by_claim_type(): - query = """ - with spend_summary as ( - select - year(claim_end_date)::text || '-' || - lpad(month(claim_end_date)::text, 2, '0') - as year_month - , claim_type - , sum(paid_amount) as paid_amount_sum - from core.medical_claim - group by 1, 2 - having sum(paid_amount) > 0 - order by 1, 2 desc - ), pharmacy_summary as ( - select - year(dispensing_date)::text || '-' || - lpad(month(dispensing_date)::text, 2, '0') - as year_month - , 'pharmacy' as claim_type - , sum(paid_amount) as paid_amount_sum - from core.pharmacy_claim - group by 1 - ), together as ( - select * from spend_summary union all - select * from pharmacy_summary - ) - select - * - , paid_amount_sum / member_month_count as paid_amount_pmpm - from together - join pmpm._int_member_month_count using(year_month) - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def pmpm_by_service_category_1(): - query = """ - with spend_summary as ( - select - year(claim_end_date)::text || '-' || - lpad(month(claim_end_date)::text, 2, '0') - as year_month - , service_category_1 - , sum(paid_amount) as paid_amount_sum - from core.medical_claim - group by 1, 2 - having sum(paid_amount) > 0 - order by 1, 2 desc - ) - select - * - , paid_amount_sum / member_month_count as paid_amount_pmpm - from spend_summary - join pmpm._int_member_month_count using(year_month) - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def pmpm_by_service_category_1_2(): - query = """ - with spend_summary as ( - select - year(claim_end_date)::text || '-' || - lpad(month(claim_end_date)::text, 2, '0') - as year_month - , service_category_1 - , service_category_2 - , sum(paid_amount) as paid_amount_sum - from core.medical_claim - group by 1, 2, 3 - having sum(paid_amount) > 0 - order by 1, 2, 3 desc - ) - select - * - , paid_amount_sum / member_month_count as paid_amount_pmpm - from spend_summary - join pmpm._int_member_month_count using(year_month) - """ - data = util.safe_to_pandas(conn, query) - return data - def claim_type_line_chart(df, animated=True): if animated: @@ -259,12 +64,12 @@ def claim_type_line_chart(df, animated=True): st_echarts(options=option, height="400px", key="chart") -year_month_values = sorted(list(set(year_months()["year_month"]))) +year_month_values = sorted(list(set(data.year_months()["year_month"]))) year_values = sorted(list(set([x[:4] for x in year_month_values]))) ## --------------------------------- ## ## --- --- ## ## --------------------------------- ## -pmpm_claim_type_data = pmpm_by_claim_type() +pmpm_claim_type_data = data.pmpm_by_claim_type() pmpm_claim_type_data.sort_values(by="year_month", inplace=True) st.markdown("## Claim Type") st.markdown( @@ -301,7 +106,7 @@ def claim_type_line_chart(df, animated=True): paid amount and PMPM.""" ) st.markdown(f"### Spend Summary in {year_string}") -summary_stats_data = summary_stats() +summary_stats_data = data.summary_stats() summary_stats_data = summary_stats_data.loc[ summary_stats_data["year"].isin(selected_range) ] @@ -331,19 +136,26 @@ def claim_type_line_chart(df, animated=True): ## Spend Change ## --------------------------------- ## -for ctype in ['medical', 'pharmacy', 'total']: - summary_stats_data[f'current_period_{ctype}_pmpm'] = ( - summary_stats_data[f'current_period_{ctype}_paid'].astype(float).div( - summary_stats_data['current_period_member_months'].astype(float), fill_value=0) - ) - summary_stats_data[f'prior_period_{ctype}_pmpm'] = ( - summary_stats_data[f'prior_period_{ctype}_paid'].astype(float).div( - summary_stats_data['prior_period_member_months'].astype(float), fill_value=0) +for ctype in ["medical", "pharmacy", "total"]: + summary_stats_data[f"current_period_{ctype}_pmpm"] = ( + summary_stats_data[f"current_period_{ctype}_paid"] + .astype(float) + .div( + summary_stats_data["current_period_member_months"].astype(float), + fill_value=0, + ) ) - summary_stats_data[f'pct_change_{ctype}_pmpm'] = ( - (summary_stats_data[f'current_period_{ctype}_pmpm'] - summary_stats_data[f'prior_period_{ctype}_pmpm']) - .div(summary_stats_data[f'prior_period_{ctype}_pmpm'], fill_value=0) + summary_stats_data[f"prior_period_{ctype}_pmpm"] = ( + summary_stats_data[f"prior_period_{ctype}_paid"] + .astype(float) + .div( + summary_stats_data["prior_period_member_months"].astype(float), fill_value=0 + ) ) + summary_stats_data[f"pct_change_{ctype}_pmpm"] = ( + summary_stats_data[f"current_period_{ctype}_pmpm"] + - summary_stats_data[f"prior_period_{ctype}_pmpm"] + ).div(summary_stats_data[f"prior_period_{ctype}_pmpm"], fill_value=0) # CSS to inject contained in a string @@ -356,25 +168,31 @@ def claim_type_line_chart(df, animated=True): # Inject CSS with Markdown st.markdown(hide_table_row_index, unsafe_allow_html=True) -st.markdown(""" +st.markdown( + """ Get a better sense of the change over time using this table. -""") -test = pd.concat([ - summary_stats_data.assign( - category = lambda x: ctype.title() - )[[ - 'year', - 'category', - f'prior_period_{ctype}_pmpm', - f'current_period_{ctype}_pmpm', - f'pct_change_{ctype}_pmpm' - ]].rename(columns={ - f'current_period_{ctype}_pmpm': 'current_period_pmpm', - f'prior_period_{ctype}_pmpm': 'prior_period_pmpm', - f'pct_change_{ctype}_pmpm': 'pct_change_pmpm' - }) - for ctype in ['medical', 'pharmacy', 'total'] -]) +""" +) +test = pd.concat( + [ + summary_stats_data.assign(category=lambda x: ctype.title())[ + [ + "year", + "category", + f"prior_period_{ctype}_pmpm", + f"current_period_{ctype}_pmpm", + f"pct_change_{ctype}_pmpm", + ] + ].rename( + columns={ + f"current_period_{ctype}_pmpm": "current_period_pmpm", + f"prior_period_{ctype}_pmpm": "prior_period_pmpm", + f"pct_change_{ctype}_pmpm": "pct_change_pmpm", + } + ) + for ctype in ["medical", "pharmacy", "total"] + ] +) test = test.loc[test.year != year_values[0]] st.table(util.format_df(test)) @@ -392,7 +210,7 @@ def claim_type_line_chart(df, animated=True): spend the next. """ ) -service_1_data = pmpm_by_service_category_1() +service_1_data = data.pmpm_by_service_category_1() service_1_data = service_1_data.loc[ service_1_data["year_month"].str[:4].isin(selected_range) ] @@ -459,7 +277,7 @@ def claim_type_line_chart(df, animated=True): label_visibility="collapsed", ) -service_2_data = pmpm_by_service_category_1_2() +service_2_data = data.pmpm_by_service_category_1_2() service_2_data = ( service_2_data.loc[ service_2_data["year_month"].str[:4].isin(selected_range) @@ -502,7 +320,7 @@ def claim_type_line_chart(df, animated=True): A look at pharmacy spend over time during the claims period selected. """ ) -pharm_pmpm = pmpm_by_claim_type() +pharm_pmpm = data.pmpm_by_claim_type() pharm_pmpm = pharm_pmpm.loc[pharm_pmpm["claim_type"] == "pharmacy", :] pharm_pmpm = pharm_pmpm.loc[pharm_pmpm["year_month"].str[:4].isin(selected_range)] st.line_chart(data=pharm_pmpm, x="year_month", y="paid_amount_sum") @@ -516,7 +334,7 @@ def claim_type_line_chart(df, animated=True): failed our tests. It is recommended that you look into these test results to improve the quality of your data and thus, trust in the analysis above. """ -use_case_data = use_case() +use_case_data = data.use_case() st.dataframe(use_case_data, use_container_width=True) st.markdown( @@ -525,7 +343,7 @@ def claim_type_line_chart(df, animated=True): all the checks that failed. """ ) -test_result_data = test_results() +test_result_data = data.test_results() st.dataframe(test_result_data, use_container_width=True) st.markdown( @@ -533,5 +351,5 @@ def claim_type_line_chart(df, animated=True): Then check out the distribution of cost for the spend variables. """ ) -cost_summary_data = cost_summary() +cost_summary_data = data.cost_summary() st.dataframe(cost_summary_data, use_container_width=True) diff --git a/pages/03_chronic_conditions.py b/pages/03_chronic_conditions.py index bcb6819..fb301a6 100644 --- a/pages/03_chronic_conditions.py +++ b/pages/03_chronic_conditions.py @@ -1,86 +1,7 @@ import streamlit as st -import pandas as pd import plost -import util import components - -conn = util.connection(database="dev_lipsa") - - -@st.cache_data -def year_months(): - query = """ - select distinct - year(claim_end_date)::text || '-' || - lpad(month(claim_end_date)::text, 2, '0') - as year_month - , sum(paid_amount) - from core.medical_claim - group by 1 - having sum(paid_amount) > 10 - order by 1 - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def pmpm_by_chronic_condition(): - query = """ - with conditions as ( - select distinct - year(condition_date)::text || '-' || lpad(month(condition_date)::text, 2, '0') as year_month - , claim_id - , patient_id - , code - , condition - , condition_family - from core.condition - inner join chronic_conditions._value_set_tuva_chronic_conditions_hierarchy vs on condition.code = vs.icd_10_cm_code - where code_type = 'icd-10-cm' - ) - , medical_spend as ( - select - year(claim_start_date)::text || '-' || lpad(month(claim_start_date)::text, 2, '0') as year_month - , claim_id - , patient_id - , sum(paid_amount) as medical_paid_amount - from core.medical_claim - group by 1, 2, 3 - ), merged as ( - select - year_month - , condition_family - , sum(medical_paid_amount) as medical_paid_amount_sum - from conditions - join medical_spend using(patient_id, claim_id, year_month) - group by 1, 2 - ) - select - * - from merged - join pmpm._int_member_month_count using(year_month) - order by 2, 1 - """ - data = util.safe_to_pandas(conn, query) - return data - - -@st.cache_data -def condition_data(): - query = """SELECT - CONCAT(date_part(year, FIRST_DIAGNOSIS_DATE), '-', lpad(date_part(month, FIRST_DIAGNOSIS_DATE), 2, 0)) AS DIAGNOSIS_YEAR_MONTH, - CONDITION, - COUNT(*) AS CONDITION_CASES, - AVG(LAST_DIAGNOSIS_DATE + 1 - FIRST_DIAGNOSIS_DATE) AS DIAGNOSIS_DURATION - FROM CHRONIC_CONDITIONS.TUVA_CHRONIC_CONDITIONS_LONG - GROUP BY 1,2 - ORDER BY 3 DESC;""" - data = util.safe_to_pandas(conn, query) - data["diagnosis_year"] = pd.to_datetime( - data["diagnosis_year_month"] - ).dt.year.astype(str) - return data +import data ## --------------------------------- ## @@ -94,13 +15,13 @@ def condition_data(): """ ) -year_month_values = year_months() +year_month_values = data.year_months() year_values = sorted(list(set([x[:4] for x in year_month_values["year_month"]]))) selected_range = components.year_slider(year_values) -chronic_condition_counts = condition_data() -chronic_condition_data = pmpm_by_chronic_condition() +chronic_condition_counts = data.condition_data() +chronic_condition_data = data.pmpm_by_chronic_condition() chronic_condition_data = chronic_condition_data.loc[ chronic_condition_data["year_month"].str[:4].isin(selected_range) ]