jcunninghame · jcunninghame · May 22, 2023 · May 22, 2023 · May 22, 2023
diff --git a/pages/02_financial_summary.py b/pages/02_financial_summary.py
@@ -80,11 +80,23 @@ def pmpm_by_claim_type():
  group by 1, 2
  having sum(paid_amount) > 0
  order by 1, 2 desc
+ ), pharmacy_summary as (
+ select
+ year(dispensing_date)::text || '-' ||
+ lpad(month(dispensing_date)::text, 2, '0')
+ as year_month
+ , 'pharmacy' as claim_type
+ , sum(paid_amount) as paid_amount_sum
+ from core.pharmacy_claim
+ group by 1
+ ), together as (
+ select * from spend_summary union all
+ select * from pharmacy_summary
  )
  select
  *
  , paid_amount_sum / member_month_count as paid_amount_pmpm
- from spend_summary
+ from together
  join pmpm._int_member_month_count using(year_month)
  """
  data = util.safe_to_pandas(conn, query)
@@ -143,8 +155,8 @@ def pmpm_by_service_category_1_2():
  return data
 
 
-year_month_values = year_months()
-year_values = sorted(list(set([x[:4] for x in year_month_values['year_month']])))
+year_month_values = sorted(list(set(year_months()["year_month"])))
+year_values = sorted(list(set([x[:4] for x in year_month_values])))
 
 ## --------------------------------- ##
 ## Header
@@ -190,18 +202,18 @@ def pmpm_by_service_category_1_2():
  [["paid_amount_sum", "member_month_count"]].sum()\
  .assign(paid_amount_pmpm = lambda x: x["paid_amount_sum"] / x["member_month_count"])
 
-st.table(pmpm_claim_type_data)
-
-# plost.bar_chart(
-# data=pmpm_claim_type_data
-# bar='',
-#  value=,
-#  stack='normalize',
-#  direction='horizontal',
-#  legend='top',
-#  height=200
-# )
-
+st.markdown("## Claim Type")
+st.markdown("""
+Explore the per member per month costs across different claim types to gain insights into healthcare expenditure patterns. Inpatient spend will tend to be much higher than professional spend. Dig deeper to find out what is hidden in these costs.
+""")
+plost.bar_chart(
+ data=pmpm_claim_type_data,
+ bar='claim_type',
+ value='paid_amount_pmpm',
+ direction='horizontal',
+ legend='top',
+ height=200
+)
 
 
 ## --------------------------------- ##
@@ -235,7 +247,7 @@ def pmpm_by_service_category_1_2():
 ## Service Category 2
 ## --------------------------------- ##
 service_cat_options = service_1_data["service_category_1"].drop_duplicates().tolist()
-col1, col2 = st.columns(2)
+col1, col2, col3 = st.columns(3)
 with col1:
  st.markdown("""
  Use the following dropdown to get more detail on the service category that interested you.
@@ -246,22 +258,33 @@ def pmpm_by_service_category_1_2():
  options=service_cat_options,
  label_visibility='collapsed',
  )
+with col3:
+ selected_year_month = st.selectbox(
+ label="Select a Year Month",
+ options=["All Time"] + year_month_values,
+ label_visibility='collapsed',
+ )
 
 service_2_data = pmpm_by_service_category_1_2()
 service_2_data = service_2_data.loc[
  service_2_data["year_month"].str[:4].isin(selected_range)
+ & (
+ (service_2_data["year_month"] == selected_year_month) |
+ (selected_year_month == "All Time")
+ )
  & service_2_data["service_category_1"].isin([selected_service_cat])
 ].drop("service_category_1", axis=1).reset_index(drop=True)
 service_2_data = service_2_data\
  .groupby("service_category_2", as_index=False)\
  [["paid_amount_sum", "member_month_count"]].sum()\
  .assign(paid_amount_pmpm = lambda x: x["paid_amount_sum"] / x["member_month_count"])
 
-
 service_2_chart = alt.Chart(service_2_data).mark_bar().encode(
  x="paid_amount_pmpm",
- y=alt.Y("service_category_2", sort="-x"),
- tooltip=["service_category_2", "paid_amount_pmpm"]
+ y=alt.Y("service_category_2", sort="-x", axis=alt.Axis(labelLimit=300)),
+ tooltip=["service_category_2", "paid_amount_pmpm"],
+).properties(
+ height=300
 )
 
 st.altair_chart(service_2_chart, use_container_width=True)
@@ -271,5 +294,8 @@ def pmpm_by_service_category_1_2():
 ## Cost Variables
 ## --------------------------------- ##
 st.markdown("## Cost Variable Quality Summary")
+st.markdown("""
+Explore common descriptive statistics to gain a comprehensive understanding of the quality and distribution of a particular claim cost variable.
+""")
 cost_summary_data = cost_summary()
 st.dataframe(cost_summary_data, use_container_width=True)
diff --git a/pages/03_chronic_conditions.py b/pages/03_chronic_conditions.py
@@ -0,0 +1,97 @@
+import streamlit as st
+import pandas as pd
+import altair as alt
+import plost
+import util
+import components
+
+conn = util.connection(database="dev_lipsa")
+
+@st.cache_data
+def year_months():
+ query = """
+ select distinct
+ year(claim_end_date)::text || '-' ||
+ lpad(month(claim_end_date)::text, 2, '0')
+ as year_month
+ , sum(paid_amount)
+ from core.medical_claim
+ group by 1
+ having sum(paid_amount) > 10
+ order by 1
+ """
+ data = util.safe_to_pandas(conn, query)
+ return data
+
+
+@st.cache_data
+def pmpm_by_chronic_condition():
+ query = """
+ with conditions as (
+ select distinct
+ year(condition_date)::text || '-' || lpad(month(condition_date)::text, 2, '0') as year_month
+ , claim_id
+ , patient_id
+ , code
+ , condition
+ , condition_family
+ from core.condition
+ inner join chronic_conditions._value_set_tuva_chronic_conditions_hierarchy vs on condition.code = vs.icd_10_cm_code
+ where code_type = 'icd-10-cm'
+ )
+ , medical_spend as (
+ select
+ year(claim_start_date)::text || '-' || lpad(month(claim_start_date)::text, 2, '0') as year_month
+ , claim_id
+ , patient_id
+ , sum(paid_amount) as medical_paid_amount
+ from core.medical_claim
+ group by 1, 2, 3
+ ), merged as (
+ select
+ year_month
+ , condition_family
+ , sum(medical_paid_amount) as medical_paid_amount_sum
+ from conditions
+ join medical_spend using(patient_id, claim_id, year_month)
+ group by 1, 2
+ )
+ select
+ *
+ from merged
+ join pmpm._int_member_month_count using(year_month)
+ order by 2, 1
+ """
+ data = util.safe_to_pandas(conn, query)
+ return data
+
+
+## --------------------------------- ##
+## Chronic Condition
+## --------------------------------- ##
+st.markdown("## Chronic Condition")
+st.markdown("""
+Certain conditions will be more expensive in your population. Here we see that the top driver of spend
+is `Cardiovascular disease`. The second highest driver is `Metabolic Disease`.
+""")
+
+year_month_values = year_months()
+year_values = sorted(list(set([x[:4] for x in year_month_values['year_month']])))
+selected_range = components.year_slider(year_values)
+
+
+chronic_condition_data = pmpm_by_chronic_condition()
+chronic_condition_data = chronic_condition_data.loc[
+ chronic_condition_data["year_month"].str[:4].isin(selected_range)
+]
+chronic_condition_data = chronic_condition_data\
+ .groupby("condition_family", as_index=False)\
+ [["medical_paid_amount_sum", "member_month_count"]].sum()\
+ .assign(medical_paid_amount_pmpm = lambda x: x["medical_paid_amount_sum"] / x["member_month_count"])\
+ .round()\
+ .sort_values("medical_paid_amount_pmpm", ascending=False)
+
+st.dataframe(
+ chronic_condition_data[["condition_family", "medical_paid_amount_pmpm"]],
+ use_container_width=True
+)