Skip to content

Commit

Permalink
update_hrh_data_script (#1516)
Browse files Browse the repository at this point in the history
* update repo path to Imperial OneDrive

* modify functions that do not work now

* modify bad functions that do not work now

* run for funded_plus

* Revert "run for funded_plus"

This reverts commit 1284d9b.

* none-necessary changes of data format; just to keep in consistency with TLO master resource files

* Revert "Revert "run for funded_plus""

This reverts commit e4cf31c.

* save new resource files with only changes of data format (such as int/float, one more decimal digit in the floats data)

* recover the path of reading the fixed auxiliary file

* re-comment the lines only for funded_plus scenario

---------

Co-authored-by: Tim Hallett <39991060+tbhallett@users.noreply.github.com>
  • Loading branch information
BinglingICL and tbhallett authored Dec 13, 2024
1 parent 018af3f commit 926d79f
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 59 deletions.
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,18 @@

resourcefilepath = Path('./resources')

path_to_dropbox = Path(
'/Users/jdbb1/Dropbox/Thanzi La Onse') # <-- point to the TLO dropbox locally
path_to_onedrive = Path(
'/Users/jdbb1/Imperial College London/TLOModel - WP - Documents') # <-- point to the TLO onedrive locally

workingfile = (path_to_dropbox /
workingfile = (path_to_onedrive /
'05 - Resources' / 'Module-healthsystem' / 'chai ehp resource use data' / 'ORIGINAL' /
'Malawi optimization model import_2022-02-11.xlsx')

working_file_old = (path_to_dropbox /
working_file_old = (path_to_onedrive /
'05 - Resources' / 'Module-healthsystem' / 'chai ehp resource use data' / 'ORIGINAL' /
'Optimization model import_Malawi_20180315 v10.xlsx')

path_to_auxiliaryfiles = (path_to_dropbox /
path_to_auxiliaryfiles = (path_to_onedrive /
'05 - Resources' /
'Module-healthsystem' /
'chai ehp resource use data' /
Expand Down Expand Up @@ -283,8 +283,9 @@

# --- Generate assumptions of established/funded staff distribution at facility levels 0&1a&1b&2
# Read 2018-03-09 Facility-level establishment MOH & CHAM from CHAI auxiliary datasets
fund_staff_2018_raw = pd.read_excel(path_to_auxiliaryfiles / '2018-03-09 Facility-level establishment MOH & CHAM.xlsx',
sheet_name='Establishment listing')
fund_staff_2018_raw = pd.read_csv(path_to_auxiliaryfiles / '2018-03-09 Facility-level establishment MOH & CHAM.csv')
fund_staff_2018_raw['Number of positions'] = fund_staff_2018_raw['Number of positions'].fillna(0)
fund_staff_2018_raw['Number of positions'] = fund_staff_2018_raw['Number of positions'].astype(int)

# Get relevant columns
fund_staff_2018 = fund_staff_2018_raw[['Number of positions', 'Facility', 'Facility Type', 'WFOM Cadre']].copy()
Expand Down Expand Up @@ -556,7 +557,9 @@
'CenHos'].index, 'Facility_Level'] = 'Facility_Level_3'

# Group staff by levels
immed_need_distribution = pd.DataFrame(immed_need_distribution.groupby(by=['Facility_Level'], sort=False).sum())
immed_need_distribution = pd.DataFrame(
immed_need_distribution.groupby(by=['Facility_Level'], sort=False).sum()
).drop(columns=['FacilityType', 'FacilityName'])
# Drop level 3
immed_need_distribution.drop(index='Facility_Level_3', inplace=True)
# Reset index
Expand Down Expand Up @@ -773,7 +776,8 @@
# Group the referral hospitals QECH and ZCH as Referral Hospital_Southern
Is_DistrictLevel = fund_staffing_table['Is_DistrictLevel'].values # Save the column 'Is_DistrictLevel' first
fund_staffing_table = pd.DataFrame(
fund_staffing_table.groupby(by=['District_Or_Hospital'], sort=False).sum()).reset_index()
fund_staffing_table.groupby(by=['District_Or_Hospital'], sort=False).sum()
).reset_index().drop(columns=['Is_DistrictLevel'])
fund_staffing_table.insert(1, 'Is_DistrictLevel', Is_DistrictLevel[:-1]) # Add the column 'Is_DistrictLevel'

# Check that in fund_staffing_table every staff count entry >= 0
Expand Down Expand Up @@ -809,7 +813,7 @@
record['Is_DistrictLevel'] = True

# get total staff level from the super districts
cols = set(fund_staffing_table.columns).intersection(set(officer_types_table.Officer_Type_Code))
cols = list(set(fund_staffing_table.columns).intersection(set(officer_types_table.Officer_Type_Code)))

total_staff = fund_staffing_table.loc[
fund_staffing_table['District_Or_Hospital'] == super_district, cols].values.squeeze()
Expand All @@ -823,7 +827,8 @@

# assign w * 100% staff to the new district
record.loc[cols] = w * total_staff
fund_staffing_table = fund_staffing_table.append(record).reset_index(drop=True)
assert (record.to_frame().T.columns == fund_staffing_table.columns).all()
fund_staffing_table = pd.concat([fund_staffing_table, record.to_frame().T], axis=0).reset_index(drop=True)

# take staff away from the super district
fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == super_district, cols] = \
Expand Down Expand Up @@ -907,7 +912,7 @@
'Facility_Level_4']

# Check that in fund_staffing_table every staff count entry >= 0
assert (fund_staffing_table.loc[:, 'M01':'R04'].values >= 0).all()
assert (fund_staffing_table.loc[:, 'M01':'R04'] >= 0).all().all()
# fund_staffing_table ready!

# Save the table without column 'Is_DistrictLevel'; staff counts in floats
Expand Down Expand Up @@ -960,7 +965,8 @@
# Group the referral hospitals QECH and ZCH as Referral Hospital_Southern
Is_DistrictLevel = curr_staffing_table['Is_DistrictLevel'].values # Save the column 'Is_DistrictLevel' first
curr_staffing_table = pd.DataFrame(
curr_staffing_table.groupby(by=['District_Or_Hospital'], sort=False).sum()).reset_index()
curr_staffing_table.groupby(by=['District_Or_Hospital'], sort=False).sum()
).reset_index().drop(columns='Is_DistrictLevel')
curr_staffing_table.insert(1, 'Is_DistrictLevel', Is_DistrictLevel[:-1]) # Add the column 'Is_DistrictLevel'

# No need to add a row for Zomba Mental Hospital, as the updated CHAI data has this row for ZMH.
Expand Down Expand Up @@ -993,7 +999,7 @@
record['Is_DistrictLevel'] = True

# get total staff level from the super districts
cols = set(curr_staffing_table.columns).intersection(set(officer_types_table.Officer_Type_Code))
cols = list(set(curr_staffing_table.columns).intersection(set(officer_types_table.Officer_Type_Code)))

total_staff = curr_staffing_table.loc[
curr_staffing_table['District_Or_Hospital'] == super_district, cols].values.squeeze()
Expand All @@ -1008,7 +1014,8 @@

# assign w * 100% staff to the new district
record.loc[cols] = w * total_staff
curr_staffing_table = curr_staffing_table.append(record).reset_index(drop=True)
assert (record.to_frame().T.columns == curr_staffing_table.columns).all()
curr_staffing_table = pd.concat([curr_staffing_table, record.to_frame().T], axis=0).reset_index(drop=True)

# take staff away from the super district
curr_staffing_table.loc[curr_staffing_table['District_Or_Hospital'] == super_district, cols] = \
Expand Down Expand Up @@ -1105,23 +1112,23 @@
for d in pop_districts:
df = pd.DataFrame({'Facility_Level': Facility_Levels[0:4], 'District': d,
'Region': pop.loc[pop['District'] == d, 'Region'].values[0]})
mfl = mfl.append(df, ignore_index=True, sort=True)
mfl = pd.concat([mfl, df], ignore_index=True, sort=True)

# Add in the Referral Hospitals, one for each region
for r in pop_regions:
mfl = mfl.append(pd.DataFrame({
'Facility_Level': Facility_Levels[4], 'District': None, 'Region': r
}, index=[0]), ignore_index=True, sort=True)
df = pd.DataFrame({
'Facility_Level': Facility_Levels[4], 'District': None, 'Region': r}, index=[0])
mfl = pd.concat([mfl, df], ignore_index=True, sort=True)

# Add the ZMH
mfl = mfl.append(pd.DataFrame({
'Facility_Level': Facility_Levels[5], 'District': None, 'Region': None
}, index=[0]), ignore_index=True, sort=True)
df = pd.DataFrame({
'Facility_Level': Facility_Levels[5], 'District': None, 'Region': None}, index=[0])
mfl = pd.concat([mfl, df], ignore_index=True, sort=True)

# Add the HQ
mfl = mfl.append(pd.DataFrame({
'Facility_Level': Facility_Levels[6], 'District': None, 'Region': None
}, index=[0]), ignore_index=True, sort=True)
df = pd.DataFrame({
'Facility_Level': Facility_Levels[6], 'District': None, 'Region': None}, index=[0])
mfl = pd.concat([mfl, df], ignore_index=True, sort=True)

# Create the Facility_ID
mfl.loc[:, 'Facility_ID'] = mfl.index
Expand Down Expand Up @@ -1409,7 +1416,7 @@
# Generate appt_time_table_coarse with officer_category, instead of officer_type
appt_time_table_coarse = pd.DataFrame(
ApptTimeTable.groupby(['Appt_Type_Code', 'Facility_Level', 'Officer_Category']).sum()
).reset_index()
).reset_index().drop(columns=['Officer_Type', 'Officer_Type_Code'])

# Save
# ApptTimeTable.to_csv(
Expand Down Expand Up @@ -1475,19 +1482,14 @@

if len(block) == 0:
# no requirement expressed => The appt is not possible at this location
Officers_Need_For_Appt = Officers_Need_For_Appt.append(
{'Facility_Level': f,
'Appt_Type_Code': a,
'Officer_Type_Codes': False
}, ignore_index=True)
df = pd.DataFrame({'Facility_Level': f, 'Appt_Type_Code': a, 'Officer_Type_Codes': False}, index=[0])
Officers_Need_For_Appt = pd.concat([Officers_Need_For_Appt, df], ignore_index=True)

else:
need_officer_types = list(block['Officer_Type_Code'])
Officers_Need_For_Appt = Officers_Need_For_Appt.append(
{'Facility_Level': f,
'Appt_Type_Code': a,
'Officer_Type_Codes': need_officer_types
}, ignore_index=True)
df = pd.DataFrame({'Facility_Level': f, 'Appt_Type_Code': a, 'Officer_Type_Codes': need_officer_types},
index=range(len(block)))
Officers_Need_For_Appt = pd.concat([Officers_Need_For_Appt, df], ignore_index=True)

# Turn this into the the set of staff that are required for each type of appointment
FacLevel_By_Officer = pd.DataFrame(columns=Facility_Levels,
Expand Down Expand Up @@ -1675,7 +1677,8 @@
HosHC_patient_facing_time_old.iloc[:, 1:].values) /
HosHC_patient_facing_time_old.iloc[:, 1:].values
)
HosHC_pft_diff = HosHC_pft_diff.append(HosHC_pft_diff.iloc[:, 1:].mean(axis=0), ignore_index=True)
df = HosHC_pft_diff.iloc[:, 1:].mean(axis=0).to_frame().T
HosHC_pft_diff = pd.concat([HosHC_pft_diff, df], ignore_index=True)

# save
# HosHC_pft_diff.to_csv(
Expand Down Expand Up @@ -1746,13 +1749,8 @@
funded_daily_capability.drop(columns='District_Or_Hospital', inplace=True)

# Add info from mfl: Region and Facility ID
for i in funded_daily_capability.index:
the_facility_name = funded_daily_capability.loc[i, 'Facility_Name']
the_ID = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Facility_ID']
the_region = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Region']

funded_daily_capability.loc[i, 'Facility_ID'] = the_ID.values
funded_daily_capability.loc[i, 'Region'] = the_region.values
funded_daily_capability = funded_daily_capability.merge(
mfl[['Facility_Name', 'Facility_ID', 'Region']], on='Facility_Name', how='left')

# Add 'officer_category' info
funded_daily_capability = funded_daily_capability.merge(officer_types_table, on='Officer_Type_Code', how='left')
Expand All @@ -1763,6 +1761,9 @@
['Facility_ID', 'Facility_Name', 'Facility_Level', 'District', 'Region', 'Officer_Category'],
dropna=False)[['Total_Mins_Per_Day', 'Staff_Count']].sum()
).reset_index()
# None-necessary changes of data format; just to keep in consistency with TLO master resource files
funded_daily_capability_coarse['Staff_Count'] = funded_daily_capability_coarse['Staff_Count'].astype(float)
funded_daily_capability_coarse['Facility_ID'] = funded_daily_capability_coarse['Facility_ID'].astype(float)

# Since not dropped zero-minute rows in lines 1717-1718,
# check that there are entries for all coarse cadres and all facility id
Expand Down Expand Up @@ -1825,13 +1826,8 @@
curr_daily_capability.drop(columns='District_Or_Hospital', inplace=True)

# Add info from mfl: Region and Facility ID
for i in curr_daily_capability.index:
the_facility_name = curr_daily_capability.loc[i, 'Facility_Name']
the_ID = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Facility_ID']
the_region = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Region']

curr_daily_capability.loc[i, 'Facility_ID'] = the_ID.values
curr_daily_capability.loc[i, 'Region'] = the_region.values
curr_daily_capability = curr_daily_capability.merge(
mfl[['Facility_Name', 'Facility_ID', 'Region']], on='Facility_Name', how='left')

# Add 'officer_category' info
curr_daily_capability = curr_daily_capability.merge(officer_types_table, on='Officer_Type_Code', how='left')
Expand All @@ -1842,6 +1838,9 @@
['Facility_ID', 'Facility_Name', 'Facility_Level', 'District', 'Region', 'Officer_Category'],
dropna=False)[['Total_Mins_Per_Day', 'Staff_Count']].sum()
).reset_index()
# None-necessary changes of data format; just to keep in consistency with TLO master resource files
curr_daily_capability_coarse['Staff_Count'] = curr_daily_capability_coarse['Staff_Count'].astype(float)
curr_daily_capability_coarse['Facility_ID'] = curr_daily_capability_coarse['Facility_ID'].astype(float)

# Since not dropped zero-minute rows in lines 1797-1798,
# check that there are entries for all coarse cadres and all facility id
Expand Down

0 comments on commit 926d79f

Please sign in to comment.