diff --git a/pandas_questions.py b/pandas_questions.py index 262ad29..85bd651 100644 --- a/pandas_questions.py +++ b/pandas_questions.py @@ -15,9 +15,9 @@ def load_data(): """Load data from the CSV files referundum/regions/departments.""" - referendum = pd.DataFrame({}) - regions = pd.DataFrame({}) - departments = pd.DataFrame({}) + referendum = pd.read_csv("data/referendum.csv", sep=";") + regions = pd.read_csv("data/regions.csv", sep=",") + departments = pd.read_csv("data/departments.csv", sep=",") return referendum, regions, departments @@ -28,18 +28,37 @@ def merge_regions_and_departments(regions, departments): The columns in the final DataFrame should be: ['code_reg', 'name_reg', 'code_dep', 'name_dep'] """ + merged_df = pd.merge( + departments, + regions, + left_on='region_code', + right_on='code', + suffixes=('_dep', '_reg') + ) + result = merged_df[['code_reg', 'name_reg', 'code_dep', 'name_dep']] - return pd.DataFrame({}) + return result def merge_referendum_and_areas(referendum, regions_and_departments): """Merge referendum and regions_and_departments in one DataFrame. You can drop the lines relative to DOM-TOM-COM departments, and the - french living abroad. + French living abroad. """ + referendum = referendum[~referendum['Department code'].str.startswith('Z')] + referendum.loc[:, 'Department code'] = ( + referendum['Department code'].astype(str).str.zfill(2) + ) - return pd.DataFrame({}) + merged_df = pd.merge( + referendum, + regions_and_departments, + left_on='Department code', + right_on='code_dep' + ) + + return merged_df def compute_referendum_result_by_regions(referendum_and_areas): @@ -48,8 +67,16 @@ def compute_referendum_result_by_regions(referendum_and_areas): The return DataFrame should be indexed by `code_reg` and have columns: ['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B'] """ + region_results = referendum_and_areas.groupby('code_reg').agg({ + 'name_reg': 'first', + 'Registered': 'sum', + 'Abstentions': 'sum', + 'Null': 'sum', + 'Choice A': 'sum', + 'Choice B': 'sum' + }) - return pd.DataFrame({}) + return region_results def plot_referendum_map(referendum_result_by_regions): @@ -61,8 +88,29 @@ def plot_referendum_map(referendum_result_by_regions): should display the rate of 'Choice A' over all expressed ballots. * Return a gpd.GeoDataFrame with a column 'ratio' containing the results. """ + gdf = gpd.read_file("data/regions.geojson") + + merged = gdf.merge( + referendum_result_by_regions, + left_on='code', + right_on='code_reg' + ) + + merged['expressed_ballots'] = ( + merged['Registered'] - merged['Abstentions'] - merged['Null'] + ) + merged['ratio'] = merged['Choice A'] / merged['expressed_ballots'] + + merged.plot( + column='ratio', + cmap='coolwarm', + legend=True, + figsize=(10, 10), + legend_kwds={'label': "Choice A (%)"} + ) - return gpd.GeoDataFrame({}) + plt.title("Referendum Results: Choice A Ratio by Region") + return merged if __name__ == "__main__":