diff --git a/pandas_questions.py b/pandas_questions.py index 262ad29..741a904 100644 --- a/pandas_questions.py +++ b/pandas_questions.py @@ -1,6 +1,6 @@ """Plotting referendum results in pandas. -In short, we want to make beautiful map to report results of a referendum. In +In short, we want to make a beautiful map to report results of a referendum. In some way, we would like to depict results with something similar to the maps that you can find here: https://github.com/x-datascience-datacamp/datacamp-assignment-pandas/blob/main/example_map.png @@ -8,16 +8,17 @@ To do that, you will load the data as pandas.DataFrame, merge the info and aggregate them by regions and finally plot them on a map using `geopandas`. """ + import pandas as pd import geopandas as gpd import matplotlib.pyplot as plt def load_data(): - """Load data from the CSV files referundum/regions/departments.""" - referendum = pd.DataFrame({}) - regions = pd.DataFrame({}) - departments = pd.DataFrame({}) + """Load data from the CSV files referendum/regions/departments.""" + referendum = pd.read_csv('data/referendum.csv', sep=';') + regions = pd.read_csv('data/regions.csv') + departments = pd.read_csv('data/departments.csv') return referendum, regions, departments @@ -28,18 +29,39 @@ def merge_regions_and_departments(regions, departments): The columns in the final DataFrame should be: ['code_reg', 'name_reg', 'code_dep', 'name_dep'] """ + merged_df = pd.merge( + departments, regions, + left_on='region_code', right_on='code', + suffixes=('_dep', '_reg') + ) + regions_and_departments = merged_df[ + ['code_reg', 'name_reg', 'code_dep', 'name_dep'] + ] + regions_and_departments.columns = [ + 'code_reg', 'name_reg', 'code_dep', 'name_dep' + ] - return pd.DataFrame({}) + return regions_and_departments def merge_referendum_and_areas(referendum, regions_and_departments): """Merge referendum and regions_and_departments in one DataFrame. You can drop the lines relative to DOM-TOM-COM departments, and the - french living abroad. + French living abroad. """ + regions_and_departments['code_dep'] = regions_and_departments[ + 'code_dep'].apply(lambda x: str(x).lstrip('0')) + indices_to_drop = referendum[ + referendum['Department code'].str.startswith('Z') + ].index + filtered_referendum = referendum.drop(indices_to_drop) + referendum_and_areas = pd.merge( + regions_and_departments, filtered_referendum, + right_on='Department code', left_on='code_dep', how='right' + ) - return pd.DataFrame({}) + return referendum_and_areas def compute_referendum_result_by_regions(referendum_and_areas): @@ -48,8 +70,20 @@ def compute_referendum_result_by_regions(referendum_and_areas): The return DataFrame should be indexed by `code_reg` and have columns: ['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B'] """ - - return pd.DataFrame({}) + grouped = referendum_and_areas.groupby( + ['code_reg', 'name_reg'] + ).sum().reset_index() + + return grouped.set_index('code_reg')[ + [ + 'name_reg', + 'Registered', + 'Abstentions', + 'Null', + 'Choice A', + 'Choice B' + ] + ] def plot_referendum_map(referendum_result_by_regions): @@ -61,8 +95,19 @@ def plot_referendum_map(referendum_result_by_regions): should display the rate of 'Choice A' over all expressed ballots. * Return a gpd.GeoDataFrame with a column 'ratio' containing the results. """ + regions_geo = gpd.read_file('data/regions.geojson') + merged = pd.merge( + regions_geo, referendum_result_by_regions, + left_on='code', right_on='code_reg' + ) + merged['ratio'] = merged['Choice A'] / ( + merged['Choice A'] + merged['Choice B'] + ) + merged.plot( + column='ratio', legend=True, cmap='coolwarm' + ) - return gpd.GeoDataFrame({}) + return merged if __name__ == "__main__":