Skip to content

Commit

Permalink
UP my solution
Browse files Browse the repository at this point in the history
  • Loading branch information
Rania Mani authored and Rania Mani committed Dec 20, 2024
1 parent a18c91e commit ac81be9
Showing 1 changed file with 80 additions and 25 deletions.
105 changes: 80 additions & 25 deletions pandas_questions.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,14 @@
"""Plotting referendum results in pandas.
In short, we want to make beautiful map to report results of a referendum. In
some way, we would like to depict results with something similar to the maps
that you can find here:
https://github.com/x-datascience-datacamp/datacamp-assignment-pandas/blob/main/example_map.png
To do that, you will load the data as pandas.DataFrame, merge the info and
aggregate them by regions and finally plot them on a map using `geopandas`.
"""
import pandas as pd
# -*- coding: utf-8 -*-
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd


def load_data():
"""Load data from the CSV files referundum/regions/departments."""
referendum = pd.DataFrame({})
regions = pd.DataFrame({})
departments = pd.DataFrame({})
"""Load data from the CSV files referendum/regions/departments."""
referendum = pd.read_csv("data/referendum.csv", delimiter=";")
regions = pd.read_csv("data/regions.csv")
departments = pd.read_csv("data/departments.csv")

return referendum, regions, departments

Expand All @@ -28,18 +19,42 @@ def merge_regions_and_departments(regions, departments):
The columns in the final DataFrame should be:
['code_reg', 'name_reg', 'code_dep', 'name_dep']
"""
regions = regions.rename(columns={"code": "code_reg", "name": "name_reg"})
departments = departments.rename(
columns={
"region_code": "code_reg",
"code": "code_dep",
"name": "name_dep",
}
)

return pd.DataFrame({})
merged = departments.merge(regions, on="code_reg", how="right")
return merged[["code_reg", "name_reg", "code_dep", "name_dep"]]


def merge_referendum_and_areas(referendum, regions_and_departments):
"""Merge referendum and regions_and_departments in one DataFrame.
You can drop the lines relative to DOM-TOM-COM departments, and the
french living abroad.
French living abroad.
"""

return pd.DataFrame({})
def standardize_department_code(code):
if code.isnumeric():
return code.zfill(2) # Zero-pad numeric codes
return code

referendum["Department code"] = referendum["Department code"].apply(
standardize_department_code
)
# Merge and resolve column name conflicts
merged = referendum.merge(
regions_and_departments,
left_on="Department code",
right_on="code_dep",
how="inner",
)
return merged


def compute_referendum_result_by_regions(referendum_and_areas):
Expand All @@ -48,8 +63,35 @@ def compute_referendum_result_by_regions(referendum_and_areas):
The return DataFrame should be indexed by `code_reg` and have columns:
['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B']
"""

return pd.DataFrame({})
grouped = (
referendum_and_areas.groupby(["code_reg", "name_reg"])
.sum(numeric_only=True)
.reset_index()
)
result = grouped[
[
"code_reg",
"name_reg",
"Registered",
"Abstentions",
"Null",
"Choice A",
"Choice B",
]
]

# Ensure all regions are included
all_regions = referendum_and_areas[
["code_reg", "name_reg"]
].drop_duplicates()

# Join with explicit suffixes to avoid overlap
result = (
all_regions.merge(result, how="inner", on=["code_reg", "name_reg"])
.fillna(0)
.set_index("code_reg")
)
return result


def plot_referendum_map(referendum_result_by_regions):
Expand All @@ -61,16 +103,29 @@ def plot_referendum_map(referendum_result_by_regions):
should display the rate of 'Choice A' over all expressed ballots.
* Return a gpd.GeoDataFrame with a column 'ratio' containing the results.
"""
geo_data = gpd.read_file("data/regions.geojson")
geo_data = geo_data.rename(columns={"code": "code_reg"})

# Compute the ratio for 'Choice A'
referendum_result_by_regions["ratio"] = referendum_result_by_regions[
"Choice A"
] / (
referendum_result_by_regions["Choice A"]
+ referendum_result_by_regions["Choice B"]
)

return gpd.GeoDataFrame({})
merged = geo_data.merge(referendum_result_by_regions, on="code_reg")

# Plot
merged.plot(column="ratio", legend=True, cmap="coolwarm")
plt.title("Referendum Results: Choice A Ratio by Region")

return merged

if __name__ == "__main__":

if __name__ == "__main__":
referendum, df_reg, df_dep = load_data()
regions_and_departments = merge_regions_and_departments(
df_reg, df_dep
)
regions_and_departments = merge_regions_and_departments(df_reg, df_dep)
referendum_and_areas = merge_referendum_and_areas(
referendum, regions_and_departments
)
Expand Down

0 comments on commit ac81be9

Please sign in to comment.