Skip to content

Commit

Permalink
codes_to_items_list: script generalised + PEP 8
Browse files Browse the repository at this point in the history
  • Loading branch information
EvaJanouskova committed Nov 15, 2023
1 parent b6cc514 commit a30017a
Showing 1 changed file with 18 additions and 13 deletions.
31 changes: 18 additions & 13 deletions src/tlo/analysis/codes_to_items_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,44 @@
------
"""

import pandas as pd
from pathlib import Path


# ## CHANGE THIS IF YOU WANT TO USE DIFFERENT FILE AS INPUT
csv_file_to_update_name = 'ResourceFile_Equipment_withoutEquipmentCodes'
import pandas as pd

# Get the path of the current script file
script_path = Path(__file__)
print(script_path)

# Specify the file path to RF csv file
# #############################
# ## CHANGE THIS FOR YOUR FILE
# Specify name of the csv file
csv_file_to_update_name = 'ResourceFile_Equipment_withoutEquipmentCodes'
# Specify the file path to csv file
file_path = script_path.parent.parent.parent.parent / 'resources/healthsystem/infrastructure_and_equipment'
# Specify the names of columns containing the item names and item codes
item_col_name = 'Equip_Item'
code_col_name = 'Equip_Code'
# #############################

# Load the CSV RF into a DataFrame
df = pd.read_csv(Path(file_path) / str(csv_file_to_update_name + '.csv'))

# Find unique values in Equipment that have no code and are not None or empty
unique_values =\
df.loc[df['Equip_Code'].isna() & df['Equip_Item'].notna() & (df['Equip_Item'] != ''), 'Equip_Item'].unique()
df.loc[df[code_col_name].isna() & df[item_col_name].notna() & (df[item_col_name] != ''), item_col_name].unique()

# Create a mapping of unique values to codes
value_to_code = {}
# Initialize the starting code value
if not df['Equip_Code'].isna().all():
next_code = int(df['Equip_Code'].max()) + 1
if not df[code_col_name].isna().all():
next_code = int(df[code_col_name].max()) + 1
else:
next_code = 0

# Iterate through unique values
for value in unique_values:
# Check if there is at least one existing code for this value
matching_rows = df.loc[df['Equip_Item'] == value, 'Equip_Code'].dropna()
matching_rows = df.loc[df[item_col_name] == value, code_col_name].dropna()
if not matching_rows.empty:
# Use the existing code for this value
existing_code = int(matching_rows.iloc[0])
Expand All @@ -60,11 +65,11 @@
existing_code = next_code
next_code += 1
value_to_code[value] = existing_code
# Update the 'Equip_Code' column for matching rows
df.loc[df['Equip_Item'] == value, 'Equip_Code'] = existing_code
# Update the code_col_name column for matching rows
df.loc[df[item_col_name] == value, code_col_name] = existing_code

# Convert 'Equip_Code' column to integers
df['Equip_Code'] = df['Equip_Code'].astype('Int64') # Convert to nullable integer type
# Convert code_col_name column to integers
df[code_col_name] = df[code_col_name].astype('Int64') # Convert to nullable integer type

# Save CSV with equipment codes
df.to_csv(Path(file_path) / str(csv_file_to_update_name + '_new.csv'), index=False)

0 comments on commit a30017a

Please sign in to comment.