SciCompMod · xsaschako · Nov 27, 2023 · Nov 8, 2023 · Nov 8, 2023 · Nov 8, 2023
diff --git a/pycode/memilio-epidata/memilio/epidata/getCaseData.py b/pycode/memilio-epidata/memilio/epidata/getCaseData.py
@@ -265,47 +265,47 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
     # dict for all files
     # filename -> [groupby_list, .agg({}), groupby_index, groupby_cols, mod_cols]
     dict_files = {
-        'infected': [dateToUse, {AnzahlFall: sum}, None, {}, ['Confirmed']],
-        'deaths': [dateToUse, {AnzahlTodesfall: sum}, None, {}, ['Deaths']],
-        'all_germany': [dateToUse, {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
+        'infected': [dateToUse, {AnzahlFall: "sum"}, None, {}, ['Confirmed']],
+        'deaths': [dateToUse, {AnzahlTodesfall: "sum"}, None, {}, ['Deaths']],
+        'all_germany': [dateToUse, {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
                         None, {}, ['Confirmed', 'Deaths', 'Recovered']],
-        'infected_state': [[dateToUse, IdBundesland], {AnzahlFall: sum}, [IdBundesland],
+        'infected_state': [[dateToUse, IdBundesland], {AnzahlFall: "sum"}, [IdBundesland],
                            {dd.EngEng["idState"]: geoger.get_state_ids()}, ['Confirmed']],
-        'all_state': [[dateToUse, IdBundesland], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
+        'all_state': [[dateToUse, IdBundesland], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
                       [IdBundesland], {dd.EngEng["idState"]: geoger.get_state_ids()},
                       ['Confirmed', 'Deaths', 'Recovered']],
-        'infected_county': [[dateToUse, IdLandkreis], {AnzahlFall: sum}, [IdLandkreis],
+        'infected_county': [[dateToUse, IdLandkreis], {AnzahlFall: "sum"}, [IdLandkreis],
                             {dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique()}, ['Confirmed']],
-        'all_county': [[dateToUse, IdLandkreis], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
+        'all_county': [[dateToUse, IdLandkreis], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
                        [IdLandkreis], {dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique()},
                        ['Confirmed', 'Deaths', 'Recovered']],
-        'all_gender': [[dateToUse, Geschlecht], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
+        'all_gender': [[dateToUse, Geschlecht], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
                        [Geschlecht], {dd.EngEng["gender"]: list(
                            df[dd.EngEng["gender"]].unique())},
                        ['Confirmed', 'Deaths', 'Recovered']],
         'all_state_gender': [[dateToUse, IdBundesland, Geschlecht],
-                             {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}, [
+                             {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}, [
                                  IdBundesland, Geschlecht],
                              {dd.EngEng["idState"]: geoger.get_state_ids(), dd.EngEng["gender"]: list(
                                  df[dd.EngEng["gender"]].unique())},
                              ['Confirmed', 'Deaths', 'Recovered']],
         'all_county_gender': [[dateToUse, IdLandkreis, Geschlecht],
-                              {AnzahlFall: sum, AnzahlTodesfall: sum,
-                                  AnzahlGenesen: sum}, [IdLandkreis, Geschlecht],
+                              {AnzahlFall: "sum", AnzahlTodesfall: "sum",
+                                  AnzahlGenesen: "sum"}, [IdLandkreis, Geschlecht],
                               {dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique(
                               ), dd.EngEng["gender"]: list(df[dd.EngEng["gender"]].unique())},
                               ['Confirmed', 'Deaths', 'Recovered']],
-        'all_age': [[dateToUse, Altersgruppe], {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum},
+        'all_age': [[dateToUse, Altersgruppe], {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"},
                     [Altersgruppe], {dd.EngEng["ageRKI"]: df[dd.EngEng["ageRKI"]].unique()},
                     ['Confirmed', 'Deaths', 'Recovered']],
         'all_state_age': [[dateToUse, IdBundesland, Altersgruppe],
-                          {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}, [
+                          {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}, [
                               IdBundesland, Altersgruppe],
                           {dd.EngEng["idState"]: geoger.get_state_ids(
                           ), dd.EngEng["ageRKI"]: df[dd.EngEng["ageRKI"]].unique()},
                           ['Confirmed', 'Deaths', 'Recovered']],
         'all_county_age': [[dateToUse, IdLandkreis, Altersgruppe],
-                           {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}, [
+                           {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}, [
                                IdLandkreis, Altersgruppe],
                            {dd.EngEng["idCounty"]: df[dd.EngEng["idCounty"]].unique(),
                            dd.EngEng["ageRKI"]: df[dd.EngEng["ageRKI"]].unique()},
@@ -364,32 +364,32 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
                     plt.tight_layout()
                     plt.show()
 
-                    df.agg({AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}) \
+                    df.agg({AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}) \
                         .plot(title='COVID-19 infections, deaths, recovered', grid=True,
                               kind='bar')
                     plt.tight_layout()
                     plt.show()
 
                 if file == 'all_gender':
                     df.groupby(Geschlecht).agg(
-                        {AnzahlFall: sum, AnzahlTodesfall: sum,
-                         AnzahlGenesen: sum}).plot(
+                        {AnzahlFall: "sum", AnzahlTodesfall: "sum",
+                         AnzahlGenesen: "sum"}).plot(
                         title='COVID-19 infections, deaths, recovered',
                         grid=True, kind='bar')
                     plt.tight_layout()
                     plt.show()
 
                 if file == 'all_age':
                     df.groupby(Altersgruppe).agg(
-                        {AnzahlFall: sum, AnzahlTodesfall: sum, AnzahlGenesen: sum}).plot(
+                        {AnzahlFall: "sum", AnzahlTodesfall: "sum", AnzahlGenesen: "sum"}).plot(
                         title='COVID-19 infections, deaths, recovered for diff ages',
                         grid=True, kind='bar')
                     plt.tight_layout()
                     plt.show()
 
                     # Dead by "Altersgruppe":
                     df_local = df.groupby(Altersgruppe).agg(
-                        {AnzahlTodesfall: sum})
+                        {AnzahlTodesfall: "sum"})
 
                     df_local.plot(title='COVID-19 deaths', grid=True,
                                   kind='bar')

diff --git a/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py b/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py
@@ -236,8 +236,9 @@ def compare_estimated_and_rki_deathsnumbers(
     df_cases["deaths_estimated_daily"] = df_cases['Deaths_estimated'] - \
         df_cases['Deaths_estimated'].shift(periods=1, fill_value=0)
     df_cases_week = df_cases.groupby("week").agg(
-        {"deaths_daily": sum, "deaths_estimated_daily": sum}).reset_index()
-    df_jh_week = df_jh.groupby("week").agg({"deaths_daily": sum}).reset_index()
+        {"deaths_daily": "sum", "deaths_estimated_daily": "sum"}).reset_index()
+    df_jh_week = df_jh.groupby("week").agg(
+        {"deaths_daily": "sum"}).reset_index()
     df_cases_week.rename(
         columns={'deaths_daily': 'Deaths_weekly',
                  'deaths_estimated_daily': 'Deaths_estimated_weekly'},

diff --git a/pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py b/pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py
@@ -250,10 +250,10 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
                             np.zeros(len(gov_county_table[gov_region])))
 
                     # merge eisenach and wartburgkreis
-                    commuter_migration_file.iloc[:, 2].replace(
-                        '16056', '16063', inplace=True)
-                    commuter_migration_file.iloc[:, 0].replace(
-                        '16056', '16063', inplace=True)
+                    commuter_migration_file.replace({commuter_migration_file.columns[2]:
+                                                     {'16056': '16063'}}, inplace=True)
+                    commuter_migration_file.replace({commuter_migration_file.columns[0]:
+                                                     {'16056': '16063'}}, inplace=True)
 
                     current_col = countykey2numlist[commuter_migration_file.iloc[i, 0]]
                     curr_county_migratedto = commuter_migration_file.iloc[i, 1]
@@ -378,13 +378,13 @@ def get_commuter_data(read_data=dd.defaultDict['read_data'],
                         elif ((str(commuter_migration_file.iloc[i, 3]) == 'Übrige Regierungsbezirke (Bundesland)' and str(
                                 commuter_migration_file.iloc[i, 4]).isdigit())
                               or ((commuter_migration_file.iloc[i, 2]).isdigit() and str(
-                                commuter_migration_file.iloc[i - 1][2]).startswith('nan'))
+                                commuter_migration_file.iloc[i - 1, 2]).startswith('nan'))
                               or (len(str(commuter_migration_file.iloc[i, 2])) == 2 and
                                   abs(float(commuter_migration_file.iloc[i, 2]) - float(
-                                      commuter_migration_file.iloc[i - 1][2])) == 1)
+                                      commuter_migration_file.iloc[i - 1, 2])) == 1)
                               or (len(str(commuter_migration_file.iloc[i, 2])) == 2 and
                                   abs(float(commuter_migration_file.iloc[i, 2]) - float(
-                                      commuter_migration_file.iloc[i - 1][2])) == 2)):
+                                      commuter_migration_file.iloc[i - 1, 2])) == 2)):
 
                             # auxiliary key of Bundesland (key translated to int starting at zero)
                             dummy_key = int(

diff --git a/pycode/memilio-epidata/memilio/epidata/getDIVIData.py b/pycode/memilio-epidata/memilio/epidata/getDIVIData.py
@@ -161,7 +161,7 @@ def get_divi_data(read_data=dd.defaultDict['read_data'],
         [dd.EngEng["idState"],
          dd.EngEng["state"],
          dd.EngEng["date"]]).agg(
-        {dd.EngEng["ICU"]: sum, dd.EngEng["ICU_ventilated"]: sum})
+        {dd.EngEng["ICU"]: "sum", dd.EngEng["ICU_ventilated"]: "sum"})
     df_states.reset_index(inplace=True)
     df_states.sort_index(axis=1, inplace=True)
 
@@ -170,7 +170,7 @@ def get_divi_data(read_data=dd.defaultDict['read_data'],
     gd.write_dataframe(df_states, directory, filename, file_format)
 
     # write data for germany to file
-    df_ger = df.groupby(["Date"]).agg({"ICU": sum, "ICU_ventilated": sum})
+    df_ger = df.groupby(["Date"]).agg({"ICU": "sum", "ICU_ventilated": "sum"})
     df_ger.reset_index(inplace=True)
     df_ger.sort_index(axis=1, inplace=True)
 

diff --git a/pycode/memilio-epidata/memilio/epidata/getJHData.py b/pycode/memilio-epidata/memilio/epidata/getJHData.py
@@ -112,7 +112,7 @@ def get_jh_data(read_data=dd.defaultDict['read_data'],
     ########### Countries ##########################
 
     gb = df.groupby(['CountryRegion', 'Date']).agg(
-        {"Confirmed": sum, "Recovered": sum, "Deaths": sum})
+        {"Confirmed": "sum", "Recovered": "sum", "Deaths": "sum"})
 
     gd.write_dataframe(gb.reset_index(), out_folder,
                        "all_countries_jh", file_format)
@@ -133,7 +133,7 @@ def get_jh_data(read_data=dd.defaultDict['read_data'],
     dfD = df[~df["ProvinceState"].isnull()]
 
     gb = dfD.groupby(['CountryRegion', 'ProvinceState', 'Date']).agg(
-        {"Confirmed": sum, "Recovered": sum, "Deaths": sum})
+        {"Confirmed": "sum", "Recovered": "sum", "Deaths": "sum"})
 
     gd.write_dataframe(gb.reset_index(), out_folder,
                        "all_provincestate_jh", file_format)

diff --git a/pycode/memilio-epidata/memilio/epidata/getVaccinationData.py b/pycode/memilio-epidata/memilio/epidata/getVaccinationData.py
@@ -171,7 +171,7 @@ def sanitizing_extrapolation_mobility(
     max_sanit_threshold_arr = np.zeros(len(age_groups))
 
     # compute average vaccination ratio per age group for full vaccinations
-    aver_ratio = df.groupby(dd.EngEng['ageRKI']).agg({column_names[1]: sum})[
+    aver_ratio = df.groupby(dd.EngEng['ageRKI']).agg({column_names[1]: "sum"})[
         column_names[1]].values/age_population[age_groups].sum().values
 
     # compute maximum_sanitizing threshold per age group as maxmimum of country-wide ratio + 10%
@@ -186,7 +186,7 @@ def sanitizing_extrapolation_mobility(
     vacc_sums_nonsanit = df.groupby(
         [dd.EngEng['idCounty'],
             dd.EngEng['ageRKI']]).agg(
-        {column_names[1]: sum}).reset_index()
+        {column_names[1]: "sum"}).reset_index()
     # create new data frame and reshape it
     df_fullsum = compute_vaccination_ratios(
         age_groups, vacc_sums_nonsanit, column_names[1],
@@ -354,7 +354,7 @@ def sanitizing_extrapolation_mobility(
                     dd.EngEng['idCounty'], dd.EngEng['ageRKI']]
     df = df.groupby(
         groupby_list).agg(
-        {column_new: sum for column_new in column_names})
+        {column_new: "sum" for column_new in column_names})
     df = df.groupby(
         level=[groupby_list.index(dd.EngEng['idCounty']),
                groupby_list.index(dd.EngEng['ageRKI'])]).cumsum().reset_index()
@@ -550,8 +550,8 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
         # remove unknown locations if only modest number (i.e. less than 0.1%)
         if df_data[
                 df_data[dd.EngEng['idCounty']] == 'u'].agg(
-                {'Number': sum}).Number / df_data.agg(
-                {'Number': sum}).Number < 0.001:
+                {'Number': "sum"}).Number / df_data.agg(
+                {'Number': "sum"}).Number < 0.001:
             df_data = df_data[df_data[dd.EngEng['idCounty']] != 'u']
         else:
             raise gd.DataError(
@@ -560,8 +560,8 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
 
         if df_data[
                 df_data[dd.EngEng['ageRKI']] == 'u'].agg(
-                {'Number': sum}).Number / df_data.agg(
-                {'Number': sum}).Number < 0.001:
+                {'Number': "sum"}).Number / df_data.agg(
+                {'Number': "sum"}).Number < 0.001:
             df_data = df_data[df_data[dd.EngEng['ageRKI']] != 'u']
         else:
             raise gd.DataError(
@@ -638,8 +638,8 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
         min_age_new = [0, 5, 15, 35, 60, 80, max_age_all]
 
         # combine all age group breaks
-        min_all_ages = sorted(pd.unique(list(itertools.chain(
-            min_age_old, min_age_pop, min_age_new))))
+        min_all_ages = sorted(pd.unique(np.array(list(
+            itertools.chain(min_age_old, min_age_pop, min_age_new)))))
 
         # check if the vaccinated age groups in old age groups start at zero
         if min_age_old[0] == 0:
@@ -873,7 +873,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
         [dd.EngEng['date'],
          dd.EngEng['idState'],
          dd.EngEng['ageRKI']]).agg(
-        {column: sum for column in vacc_column_names}).reset_index()
+        {column: "sum" for column in vacc_column_names}).reset_index()
     gd.write_dataframe(df_data_agevacc_state_cs,
                        directory, filename, file_format)
 
@@ -918,7 +918,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
         [dd.EngEng['date'],
          dd.EngEng['idState'],
          dd.EngEng['idCounty']]).agg(
-        {col_new: sum for col_new in vacc_column_names}).reset_index()
+        {col_new: "sum" for col_new in vacc_column_names}).reset_index()
 
     # store data for all counties
     filename = 'vacc_county'
@@ -932,7 +932,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
     df_data_state_cs = df_data_county_cs.groupby(
         [dd.EngEng['date'],
          dd.EngEng['idState']]).agg(
-        {column: sum for column in vacc_column_names}).reset_index()
+        {column: "sum" for column in vacc_column_names}).reset_index()
     gd.write_dataframe(df_data_state_cs,
                        directory, filename, file_format)
 
@@ -964,7 +964,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
         [dd.EngEng['date'],
          dd.EngEng['idState'],
          dd.EngEng['ageRKI']]).agg(
-        {column: sum for column in vacc_column_names}).reset_index()
+        {column: "sum" for column in vacc_column_names}).reset_index()
     gd.write_dataframe(df_data_ageinf_state_cs,
                        directory, filename, file_format)
 
@@ -1009,7 +1009,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'],
             population_new_ages.loc[population_new_ages[dd.EngEng["idState"]]
                                     == countyid, dd.EngEng["idState"]] = county_to_state[countyid]
         population_new_ages_states = population_new_ages.groupby(dd.EngEng['idState']).agg(
-            {age_group: sum for age_group in unique_age_groups_new}).reset_index()
+            {age_group: "sum" for age_group in unique_age_groups_new}).reset_index()
 
         df_fullsum_state = compute_vaccination_ratios(
             unique_age_groups_new, vacc_sums, vacc_column_names[1],

diff --git a/pycode/memilio-epidata/memilio/epidata/modifyDataframeSeries.py b/pycode/memilio-epidata/memilio/epidata/modifyDataframeSeries.py
@@ -58,17 +58,6 @@ def impute_and_reduce_df(
         df_old[dd.EngEng['date']] = pd.to_datetime(df_old[dd.EngEng['date']])
         df_old.Date = df_old.Date.dt.date.astype(df_old.dtypes.Date)
 
-    # create empty copy of the df
-    df_new = pd.DataFrame(columns=df_old.columns)
-    # make pandas use the same data types....
-    df_new = df_new.astype(dtype=dict(zip(df_old.columns, df_old.dtypes)))
-
-    # remove 'index' column if available
-    try:
-        df_new.drop(columns='index', inplace=True)
-    except KeyError:
-        pass
-
     # range of dates which should be filled
     if min_date == '':
         min_date = min(df_old[dd.EngEng['date']])
@@ -96,9 +85,9 @@ def impute_and_reduce_df(
     unique_ids_comb = list(itertools.product(*unique_ids))
     # create list of keys/group_by column names
     group_by = list(group_by_cols.keys())
-    # create to store DataFrames in to be concatenated.
+    # create list to store DataFrames in to be concatenated.
     # pd.concat is not called inside the loop for performance reasons.
-    df_list = [df_new]
+    df_list = []
     # loop over all items in columns that are given to group by (i.e. regions/ages/gender)
     for ids in unique_ids_comb:
         # filter df
@@ -116,7 +105,7 @@ def impute_and_reduce_df(
 
         if len(df_local) > 0:
             # create values for first date
-            values = {column: df_local[column][0]
+            values = {column: df_local[column].iloc[0]
                       for column in df_local.columns}
             # depending on 'start_w_firstval', missing values at the beginning
             # of the frame will either be set to zero or to the first available
@@ -222,7 +211,7 @@ def split_column_based_on_values(
         df_reduced = df_to_split[df_to_split[column_to_split] == column_identifiers[i]].rename(
             columns={column_vals_name: new_column_labels[i]}).drop(columns=column_to_split)
         df_reduced = df_reduced.groupby(
-            groupby_list).agg({new_column_labels[i]: sum})
+            groupby_list).agg({new_column_labels[i]: "sum"})
         if compute_cumsum:
             # compute cummulative sum over level index of ID_County and level
             # index of Age_RKI

diff --git a/pycode/memilio-plot/memilio/plot/plotMap.py b/pycode/memilio-plot/memilio/plot/plotMap.py
@@ -121,7 +121,7 @@ def extract_data(
         df.rename(columns={column: 'Count'}, inplace=True)
         if output == 'sum':
             return df[dffilter].groupby(region_spec).agg(
-                {'Count': sum}).reset_index()
+                {'Count': "sum"}).reset_index()
         elif output == 'matrix':
             if filters != None:
                 return df[dffilter].loc[:, [region_spec] +
@@ -197,7 +197,7 @@ def extract_data(
 
         # Aggregated or matrix output.
         if output == 'sum':
-            return df.groupby('Region').agg({'Count': sum}).reset_index()
+            return df.groupby('Region').agg({'Count': "sum"}).reset_index()
         elif output == 'matrix':
             return df
         else: