Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

773 FutureWarnings in pycode #824

Merged
merged 7 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions pycode/memilio-epidata/memilio/epidata/getCaseData.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,47 +265,47 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
# dict for all files
# filename -> [groupby_list, .agg({}), groupby_index, groupby_cols, mod_cols]
dict_files = {
'infected': [dateToUse, {AnzahlFall: sum}, None, {}, ['Confirmed']],
'deaths': [dateToUse, {AnzahlTodesfall: sum}, None, {}, ['Deaths']],
'all_germany': [dateToUse, {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
'infected': [dateToUse, {AnzahlFall: "sum"}, None, {}, ['Confirmed']],
'deaths': [dateToUse, {AnzahlTodesfall: "sum"}, None, {}, ['Deaths']],
'all_germany': [dateToUse, {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
None, {}, ['Confirmed', 'Deaths', 'Recovered']],
'infected_state': [[dateToUse, IdBundesland], {AnzahlFall: sum}, [IdBundesland],
'infected_state': [[dateToUse, IdBundesland], {AnzahlFall: "sum"}, [IdBundesland],
{dd.EngEng["idState"]: geoger.get_state_ids()}, ['Confirmed']],
'all_state': [[dateToUse, IdBundesland], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
'all_state': [[dateToUse, IdBundesland], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
[IdBundesland], {dd.EngEng["idState"]: geoger.get_state_ids()},
['Confirmed', 'Deaths', 'Recovered']],
'infected_county': [[dateToUse, IdLandkreis], {AnzahlFall: sum}, [IdLandkreis],
'infected_county': [[dateToUse, IdLandkreis], {AnzahlFall: "sum"}, [IdLandkreis],
{dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique()}, ['Confirmed']],
'all_county': [[dateToUse, IdLandkreis], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
'all_county': [[dateToUse, IdLandkreis], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
[IdLandkreis], {dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique()},
['Confirmed', 'Deaths', 'Recovered']],
'all_gender': [[dateToUse, Geschlecht], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
'all_gender': [[dateToUse, Geschlecht], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
[Geschlecht], {dd.EngEng["gender"]: list(
df[dd.EngEng["gender"]].unique())},
['Confirmed', 'Deaths', 'Recovered']],
'all_state_gender': [[dateToUse, IdBundesland, Geschlecht],
{AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}, [
{AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}, [
IdBundesland, Geschlecht],
{dd.EngEng["idState"]: geoger.get_state_ids(), dd.EngEng["gender"]: list(
df[dd.EngEng["gender"]].unique())},
['Confirmed', 'Deaths', 'Recovered']],
'all_county_gender': [[dateToUse, IdLandkreis, Geschlecht],
{AnzahlFall: sum, AnzahlTodesfall: sum,
AnzahlGenesen: sum}, [IdLandkreis, Geschlecht],
{AnzahlFall: "sum", AnzahlTodesfall: "sum",
AnzahlGenesen: "sum"}, [IdLandkreis, Geschlecht],
{dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique(
), dd.EngEng["gender"]: list(df[dd.EngEng["gender"]].unique())},
['Confirmed', 'Deaths', 'Recovered']],
'all_age': [[dateToUse, Altersgruppe], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
'all_age': [[dateToUse, Altersgruppe], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
[Altersgruppe], {dd.EngEng["ageRKI"]: df[dd.EngEng["ageRKI"]].unique()},
['Confirmed', 'Deaths', 'Recovered']],
'all_state_age': [[dateToUse, IdBundesland, Altersgruppe],
{AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}, [
{AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}, [
IdBundesland, Altersgruppe],
{dd.EngEng["idState"]: geoger.get_state_ids(
), dd.EngEng["ageRKI"]: df[dd.EngEng["ageRKI"]].unique()},
['Confirmed', 'Deaths', 'Recovered']],
'all_county_age': [[dateToUse, IdLandkreis, Altersgruppe],
{AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}, [
{AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}, [
IdLandkreis, Altersgruppe],
{dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique(),
dd.EngEng["ageRKI"]: df[dd.EngEng["ageRKI"]].unique()},
Expand Down Expand Up @@ -364,32 +364,32 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
plt.tight_layout()
plt.show()

df.agg({AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}) \
df.agg({AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}) \
.plot(title='COVID-19 infections, deaths, recovered', grid=True,
kind='bar')
plt.tight_layout()
plt.show()

if file == 'all_gender':
df.groupby(Geschlecht).agg(
{AnzahlFall: sum, AnzahlTodesfall: sum,
AnzahlGenesen: sum}).plot(
{AnzahlFall: "sum", AnzahlTodesfall: "sum",
AnzahlGenesen: "sum"}).plot(
title='COVID-19 infections, deaths, recovered',
grid=True, kind='bar')
plt.tight_layout()
plt.show()

if file == 'all_age':
df.groupby(Altersgruppe).agg(
{AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}).plot(
{AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}).plot(
title='COVID-19 infections, deaths, recovered for diff ages',
grid=True, kind='bar')
plt.tight_layout()
plt.show()

# Dead by "Altersgruppe":
df_local = df.groupby(Altersgruppe).agg(
{AnzahlTodesfall: sum})
{AnzahlTodesfall: "sum"})

df_local.plot(title='COVID-19 deaths', grid=True,
kind='bar')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ def compare_estimated_and_rki_deathsnumbers(
df_cases["deaths_estimated_daily"] = df_cases['Deaths_estimated'] - \
df_cases['Deaths_estimated'].shift(periods=1, fill_value=0)
df_cases_week = df_cases.groupby("week").agg(
{"deaths_daily": sum, "deaths_estimated_daily": sum}).reset_index()
df_jh_week = df_jh.groupby("week").agg({"deaths_daily": sum}).reset_index()
{"deaths_daily": "sum", "deaths_estimated_daily": "sum"}).reset_index()
df_jh_week = df_jh.groupby("week").agg(
{"deaths_daily": "sum"}).reset_index()
df_cases_week.rename(
columns={'deaths_daily': 'Deaths_weekly',
'deaths_estimated_daily': 'Deaths_estimated_weekly'},
Expand Down
14 changes: 7 additions & 7 deletions pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,10 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
np.zeros(len(gov_county_table[gov_region])))

# merge eisenach and wartburgkreis
commuter_migration_file.iloc[:, 2].replace(
'16056', '16063', inplace=True)
commuter_migration_file.iloc[:, 0].replace(
'16056', '16063', inplace=True)
commuter_migration_file.replace({commuter_migration_file.columns[2]:
{'16056': '16063'}}, inplace=True)
commuter_migration_file.replace({commuter_migration_file.columns[0]:
{'16056': '16063'}}, inplace=True)

current_col = countykey2numlist[commuter_migration_file.iloc[i, 0]]
curr_county_migratedto = commuter_migration_file.iloc[i, 1]
Expand Down Expand Up @@ -378,13 +378,13 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
elif ((str(commuter_migration_file.iloc[i, 3]) == 'Übrige Regierungsbezirke (Bundesland)' and str(
commuter_migration_file.iloc[i, 4]).isdigit())
or ((commuter_migration_file.iloc[i, 2]).isdigit() and str(
commuter_migration_file.iloc[i - 1][2]).startswith('nan'))
commuter_migration_file.iloc[i - 1, 2]).startswith('nan'))
or (len(str(commuter_migration_file.iloc[i, 2])) == 2 and
abs(float(commuter_migration_file.iloc[i, 2]) - float(
commuter_migration_file.iloc[i - 1][2])) == 1)
commuter_migration_file.iloc[i - 1, 2])) == 1)
or (len(str(commuter_migration_file.iloc[i, 2])) == 2 and
abs(float(commuter_migration_file.iloc[i, 2]) - float(
commuter_migration_file.iloc[i - 1][2])) == 2)):
commuter_migration_file.iloc[i - 1, 2])) == 2)):

# auxiliary key of Bundesland (key translated to int starting at zero)
dummy_key = int(
Expand Down
4 changes: 2 additions & 2 deletions pycode/memilio-epidata/memilio/epidata/getDIVIData.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def get_divi_data(read_data=dd.defaultDict['read_data'],
[dd.EngEng["idState"],
dd.EngEng["state"],
dd.EngEng["date"]]).agg(
{dd.EngEng["ICU"]: sum, dd.EngEng["ICU_ventilated"]: sum})
{dd.EngEng["ICU"]: "sum", dd.EngEng["ICU_ventilated"]: "sum"})
df_states.reset_index(inplace=True)
df_states.sort_index(axis=1, inplace=True)

Expand All @@ -170,7 +170,7 @@ def get_divi_data(read_data=dd.defaultDict['read_data'],
gd.write_dataframe(df_states, directory, filename, file_format)

# write data for germany to file
df_ger = df.groupby(["Date"]).agg({"ICU": sum, "ICU_ventilated": sum})
df_ger = df.groupby(["Date"]).agg({"ICU": "sum", "ICU_ventilated": "sum"})
df_ger.reset_index(inplace=True)
df_ger.sort_index(axis=1, inplace=True)

Expand Down
4 changes: 2 additions & 2 deletions pycode/memilio-epidata/memilio/epidata/getJHData.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def get_jh_data(read_data=dd.defaultDict['read_data'],
########### Countries ##########################

gb = df.groupby(['CountryRegion', 'Date']).agg(
{"Confirmed": sum, "Recovered": sum, "Deaths": sum})
{"Confirmed": "sum", "Recovered": "sum", "Deaths": "sum"})

gd.write_dataframe(gb.reset_index(), out_folder,
"all_countries_jh", file_format)
Expand All @@ -133,7 +133,7 @@ def get_jh_data(read_data=dd.defaultDict['read_data'],
dfD = df[~df["ProvinceState"].isnull()]

gb = dfD.groupby(['CountryRegion', 'ProvinceState', 'Date']).agg(
{"Confirmed": sum, "Recovered": sum, "Deaths": sum})
{"Confirmed": "sum", "Recovered": "sum", "Deaths": "sum"})

gd.write_dataframe(gb.reset_index(), out_folder,
"all_provincestate_jh", file_format)
Expand Down
28 changes: 14 additions & 14 deletions pycode/memilio-epidata/memilio/epidata/getVaccinationData.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def sanitizing_extrapolation_mobility(
max_sanit_threshold_arr = np.zeros(len(age_groups))

# compute average vaccination ratio per age group for full vaccinations
aver_ratio = df.groupby(dd.EngEng['ageRKI']).agg({column_names[1]: sum})[
aver_ratio = df.groupby(dd.EngEng['ageRKI']).agg({column_names[1]: "sum"})[
column_names[1]].values/age_population[age_groups].sum().values

# compute maximum_sanitizing threshold per age group as maxmimum of country-wide ratio + 10%
Expand All @@ -186,7 +186,7 @@ def sanitizing_extrapolation_mobility(
vacc_sums_nonsanit = df.groupby(
[dd.EngEng['idCounty'],
dd.EngEng['ageRKI']]).agg(
{column_names[1]: sum}).reset_index()
{column_names[1]: "sum"}).reset_index()
# create new data frame and reshape it
df_fullsum = compute_vaccination_ratios(
age_groups, vacc_sums_nonsanit, column_names[1],
Expand Down Expand Up @@ -354,7 +354,7 @@ def sanitizing_extrapolation_mobility(
dd.EngEng['idCounty'], dd.EngEng['ageRKI']]
df = df.groupby(
groupby_list).agg(
{column_new: sum for column_new in column_names})
{column_new: "sum" for column_new in column_names})
df = df.groupby(
level=[groupby_list.index(dd.EngEng['idCounty']),
groupby_list.index(dd.EngEng['ageRKI'])]).cumsum().reset_index()
Expand Down Expand Up @@ -550,8 +550,8 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
# remove unknown locations if only modest number (i.e. less than 0.1%)
if df_data[
df_data[dd.EngEng['idCounty']] == 'u'].agg(
{'Number': sum}).Number / df_data.agg(
{'Number': sum}).Number < 0.001:
{'Number': "sum"}).Number / df_data.agg(
{'Number': "sum"}).Number < 0.001:
df_data = df_data[df_data[dd.EngEng['idCounty']] != 'u']
else:
raise gd.DataError(
Expand All @@ -560,8 +560,8 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],

if df_data[
df_data[dd.EngEng['ageRKI']] == 'u'].agg(
{'Number': sum}).Number / df_data.agg(
{'Number': sum}).Number < 0.001:
{'Number': "sum"}).Number / df_data.agg(
{'Number': "sum"}).Number < 0.001:
df_data = df_data[df_data[dd.EngEng['ageRKI']] != 'u']
else:
raise gd.DataError(
Expand Down Expand Up @@ -638,8 +638,8 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
min_age_new = [0, 5, 15, 35, 60, 80, max_age_all]

# combine all age group breaks
min_all_ages = sorted(pd.unique(list(itertools.chain(
min_age_old, min_age_pop, min_age_new))))
min_all_ages = sorted(pd.unique(np.array(list(
itertools.chain(min_age_old, min_age_pop, min_age_new)))))

# check if the vaccinated age groups in old age groups start at zero
if min_age_old[0] == 0:
Expand Down Expand Up @@ -873,7 +873,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
[dd.EngEng['date'],
dd.EngEng['idState'],
dd.EngEng['ageRKI']]).agg(
{column: sum for column in vacc_column_names}).reset_index()
{column: "sum" for column in vacc_column_names}).reset_index()
gd.write_dataframe(df_data_agevacc_state_cs,
directory, filename, file_format)

Expand Down Expand Up @@ -918,7 +918,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
[dd.EngEng['date'],
dd.EngEng['idState'],
dd.EngEng['idCounty']]).agg(
{col_new: sum for col_new in vacc_column_names}).reset_index()
{col_new: "sum" for col_new in vacc_column_names}).reset_index()

# store data for all counties
filename = 'vacc_county'
Expand All @@ -932,7 +932,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
df_data_state_cs = df_data_county_cs.groupby(
[dd.EngEng['date'],
dd.EngEng['idState']]).agg(
{column: sum for column in vacc_column_names}).reset_index()
{column: "sum" for column in vacc_column_names}).reset_index()
gd.write_dataframe(df_data_state_cs,
directory, filename, file_format)

Expand Down Expand Up @@ -964,7 +964,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
[dd.EngEng['date'],
dd.EngEng['idState'],
dd.EngEng['ageRKI']]).agg(
{column: sum for column in vacc_column_names}).reset_index()
{column: "sum" for column in vacc_column_names}).reset_index()
gd.write_dataframe(df_data_ageinf_state_cs,
directory, filename, file_format)

Expand Down Expand Up @@ -1009,7 +1009,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
population_new_ages.loc[population_new_ages[dd.EngEng["idState"]]
== countyid, dd.EngEng["idState"]] = county_to_state[countyid]
population_new_ages_states = population_new_ages.groupby(dd.EngEng['idState']).agg(
{age_group: sum for age_group in unique_age_groups_new}).reset_index()
{age_group: "sum" for age_group in unique_age_groups_new}).reset_index()

df_fullsum_state = compute_vaccination_ratios(
unique_age_groups_new, vacc_sums, vacc_column_names[1],
Expand Down
19 changes: 4 additions & 15 deletions pycode/memilio-epidata/memilio/epidata/modifyDataframeSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,6 @@ def impute_and_reduce_df(
df_old[dd.EngEng['date']] = pd.to_datetime(df_old[dd.EngEng['date']])
df_old.Date = df_old.Date.dt.date.astype(df_old.dtypes.Date)

# create empty copy of the df
df_new = pd.DataFrame(columns=df_old.columns)
# make pandas use the same data types....
df_new = df_new.astype(dtype=dict(zip(df_old.columns, df_old.dtypes)))

# remove 'index' column if available
try:
df_new.drop(columns='index', inplace=True)
except KeyError:
pass

# range of dates which should be filled
if min_date == '':
min_date = min(df_old[dd.EngEng['date']])
Expand Down Expand Up @@ -96,9 +85,9 @@ def impute_and_reduce_df(
unique_ids_comb = list(itertools.product(*unique_ids))
# create list of keys/group_by column names
group_by = list(group_by_cols.keys())
# create to store DataFrames in to be concatenated.
# create list to store DataFrames in to be concatenated.
# pd.concat is not called inside the loop for performance reasons.
df_list = [df_new]
df_list = []
# loop over all items in columns that are given to group by (i.e. regions/ages/gender)
for ids in unique_ids_comb:
# filter df
Expand All @@ -116,7 +105,7 @@ def impute_and_reduce_df(

if len(df_local) > 0:
# create values for first date
values = {column: df_local[column][0]
values = {column: df_local[column].iloc[0]
for column in df_local.columns}
# depending on 'start_w_firstval', missing values at the beginning
# of the frame will either be set to zero or to the first available
Expand Down Expand Up @@ -222,7 +211,7 @@ def split_column_based_on_values(
df_reduced = df_to_split[df_to_split[column_to_split] == column_identifiers[i]].rename(
columns={column_vals_name: new_column_labels[i]}).drop(columns=column_to_split)
df_reduced = df_reduced.groupby(
groupby_list).agg({new_column_labels[i]: sum})
groupby_list).agg({new_column_labels[i]: "sum"})
if compute_cumsum:
# compute cummulative sum over level index of ID_County and level
# index of Age_RKI
Expand Down
4 changes: 2 additions & 2 deletions pycode/memilio-plot/memilio/plot/plotMap.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def extract_data(
df.rename(columns={column: 'Count'}, inplace=True)
if output == 'sum':
return df[dffilter].groupby(region_spec).agg(
{'Count': sum}).reset_index()
{'Count': "sum"}).reset_index()
elif output == 'matrix':
if filters != None:
return df[dffilter].loc[:, [region_spec] +
Expand Down Expand Up @@ -197,7 +197,7 @@ def extract_data(

# Aggregated or matrix output.
if output == 'sum':
return df.groupby('Region').agg({'Count': sum}).reset_index()
return df.groupby('Region').agg({'Count': "sum"}).reset_index()
elif output == 'matrix':
return df
else:
Expand Down