Skip to content

Commit

Permalink
Migrate to polars
Browse files Browse the repository at this point in the history
  • Loading branch information
gutzbenj committed Mar 26, 2023
1 parent 6f99618 commit 32b81b6
Show file tree
Hide file tree
Showing 75 changed files with 2,386 additions and 3,208 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ jobs:
runs-on: ubuntu-latest
name: Check code coverage
env:
OS: "ubuntu-latest"
OS: ubuntu-latest
PYTHON: "3.11"
steps:
- name: Acquire sources
uses: actions/checkout@v3

- name: Install Poetry
run: pipx install poetry
run: pipx install poetry==1.4.0

- name: Setup Python
uses: actions/setup-python@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
uses: actions/checkout@v3

- name: Install Poetry
run: pipx install poetry
run: pipx install poetry==1.4.0

- name: Setup Python
id: python-setup
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
uses: actions/checkout@v3

- name: Install Poetry
run: pipx install poetry
run: pipx install poetry==1.4.0

- name: Setup Python
uses: actions/setup-python@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
uses: actions/checkout@v3

- name: Install Poetry
run: pipx install poetry
run: pipx install poetry==1.4.0

- name: Setup Python
uses: actions/setup-python@v4
Expand Down
19 changes: 8 additions & 11 deletions benchmarks/interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
pl.Config.set_tbl_width_chars(400)
pl.Config.set_tbl_cols(None)

# pl.set_option("display.width", 400)
# pd.set_option("display.max_columns", None)

"""
example:
Expand Down Expand Up @@ -63,9 +60,9 @@ def request_weather_data(
# request the nearest weather stations
request = stations.filter_by_distance(latlon=(lat, lon), distance=distance)
print(request.df)
station_ids = request.df["station_id"].values.tolist()
latitudes = request.df["latitude"].values.tolist()
longitudes = request.df["longitude"].values.tolist()
station_ids = request.df.get_column("station_id").to_list()
latitudes = request.df.get_column("latitude").to_list()
longitudes = request.df.get_column("longitude").to_list()

utm_x = []
utm_y = []
Expand All @@ -75,13 +72,13 @@ def request_weather_data(
utm_y.append(y)

# request parameter from weather stations
df = request.values.all().df.dropna()
df = request.values.all().df.drop_nulls()

# filters by one exact time and saves the given parameter per station at this time
day_time = start_date + timedelta(days=1)
filtered_df = df[df["date"].astype(str).str[:] == day_time.strftime("%Y-%m-%d %H:%M:%S+00:00")]
filtered_df = df.filter(pl.col("date").eq(day_time))
print(filtered_df)
values = filtered_df["value"].values.tolist()
values = filtered_df.get_column("value").to_list()

return Data(
station_ids=station_ids,
Expand All @@ -94,15 +91,15 @@ def request_weather_data(

def interpolate_data(latitude: float, longitude: float, data: Data):
# function for bilinear interpolation
f = interpolate.interp2d(data.utm_x, data.utm_y, data.values, kind="linear")
f = interpolate.LinearNDInterpolator(points=list(zip(data.utm_x, data.utm_y)), values=data.values)
x, y, _, _ = utm.from_latlon(latitude, longitude)
interpolated = f(x, y)
print(f"{interpolated=}")

# append interpolated value to the list to visualize the points later on
data.utm_x.append(x)
data.utm_y.append(y)
data.values.append(interpolated[0])
data.values.append(interpolated)
data.station_ids.append("interpolated")
data.colors.append("red")

Expand Down
16 changes: 12 additions & 4 deletions benchmarks/interpolation_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,31 @@ def get_regular_df(start_date: datetime, end_date: datetime, exclude_stations: l
)
request = stations.filter_by_distance(latlon=(50.0, 8.9), distance=30)
df = request.values.all().df.drop_nulls()
station_ids = df.get_column("station_id").to_series().tolist()
station_ids = df.get_column("station_id").to_list()
first_station_id = set(station_ids).difference(set(exclude_stations)).pop()
return df.filter(pl.col("station_id").eq(first_station_id))


def get_rmse(regular_values: pl.Series, interpolated_values: pl.Series):
n = regular_values.size
n = regular_values.len()
return (((regular_values - interpolated_values).drop_nulls() ** 2).sum() / n) ** 0.5


def get_corr(regular_values: pl.Series, interpolated_values: pl.Series):
return np.corrcoef(regular_values.to_list(), interpolated_values.to_list())[0][1].item()


def visualize(regular_df: pl.DataFrame, interpolated_df: pl.DataFrame):
rmse = get_rmse(regular_df.get_column("value"), interpolated_df.get_column("value"))
corr = get_corr(regular_df.get_column("value"), interpolated_df.get_column("value"))
plt.plot_date(regular_df.get_column("date"), regular_df.get_column("value"), fmt="red", label="regular")
plt.plot_date(
interpolated_df.get_column("date"), interpolated_df.get_column("value"), fmt="black", label="interpolated"
)
title = f"RMSE: {np.round(rmse, 2)}"
title = (
f"RMSE: {np.round(rmse, 2)}, CORR: {np.round(corr, 2)}, "
f"STATION_IDS: {interpolated_df.get_column('station_ids').to_list()[0]}"
)
plt.title(title)
plt.legend()
plt.tight_layout()
Expand All @@ -59,7 +67,7 @@ def main():
start_date = datetime(2022, 1, 1)
end_date = datetime(2022, 2, 24)
interpolated_df = get_interpolated_df(start_date, end_date)
exclude_stations = interpolated_df.station_ids[0]
exclude_stations = interpolated_df.get_column("station_ids")[0]
regular_df = get_regular_df(start_date, end_date, exclude_stations)
visualize(regular_df, interpolated_df)

Expand Down
7 changes: 1 addition & 6 deletions docs/img/readme_img.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@
from matplotlib.patches import Rectangle

from wetterdienst.provider.dwd.observation import (
DwdObservationDataset,
DwdObservationPeriod,
DwdObservationRequest,
DwdObservationResolution,
)

plt.style.use("ggplot")
Expand Down Expand Up @@ -88,9 +85,7 @@ def create_hohenpeissenberg_warming_stripes():
"""Create warming stripes for Potsdam
Source: https://matplotlib.org/matplotblog/posts/warming-stripes/
"""
request = DwdObservationRequest(
"temperature_air_200", DwdObservationResolution.ANNUAL, DwdObservationPeriod.HISTORICAL
).filter_by_name("Hohenpeissenberg")
request = DwdObservationRequest("temperature_air_200", "annual", "historical").filter_by_name("Hohenpeissenberg")

values_df = request.values.all().df

Expand Down
24 changes: 17 additions & 7 deletions example/observations_station_gaussian_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ def __init__(self, station_data: StationsResult):

valid_data = self.get_valid_data(result_values)

valid_data = valid_data.with_row_count("rc")

model, pars = self.make_composite_yearly_model(valid_data)

x = valid_data.with_row_count("rc").get_column("rc").to_numpy()
x = valid_data.get_column("rc").to_numpy()
y = valid_data.get_column("value").to_numpy()

out = model.fit(y, pars, x=x)
Expand Down Expand Up @@ -101,13 +103,13 @@ def make_composite_yearly_model(self, valid_data: pl.DataFrame) -> Tuple[Gaussia
https://lmfit.github.io/lmfit-py/model.html#composite-models-adding-or-multiplying-models"""
number_of_years = valid_data.get_column("date").dt.year().n_unique()

x = valid_data.with_row_count("rc").get_column("rc").to_numpy()
x = valid_data.get_column("rc").to_numpy()
y = valid_data.get_column("value").to_numpy()

index_per_year = x.max() / number_of_years

pars, composite_model = None, None
for year, group in valid_data.groupby(pl.col("date").dt.year()):
for year, group in valid_data.groupby(pl.col("date").dt.year(), maintain_order=True):
gmod = GaussianModel(prefix=f"g{year}_")
if pars is None:
pars = gmod.make_params()
Expand All @@ -121,9 +123,11 @@ def make_composite_yearly_model(self, valid_data: pl.DataFrame) -> Tuple[Gaussia
return composite_model, pars

@staticmethod
def model_pars_update(year: int, group: pl.DataFrame, pars: Parameters, index_per_year: float, y_max: float) -> Parameters:
def model_pars_update(
year: int, group: pl.DataFrame, pars: Parameters, index_per_year: float, y_max: float
) -> Parameters:
"""updates the initial values of the model parameters"""
idx = group.with_row_count("rc").get_column("rc").to_numpy()
idx = group.get_column("rc").to_numpy()
mean_index = idx.mean()

pars[f"g{year}_center"].set(value=mean_index, min=0.75 * mean_index, max=1.25 * mean_index)
Expand All @@ -135,8 +139,14 @@ def model_pars_update(year: int, group: pl.DataFrame, pars: Parameters, index_pe
def plot_data_and_model(self, valid_data: pl.DataFrame, out: ModelResult, savefig_to_file=True) -> None:
"""plots the data and the model"""
if savefig_to_file:
fig, ax = fig, ax = plt.subplots(figsize=(12, 12))
df = pl.DataFrame({"year": valid_data.get_column("date"), "value": valid_data.get_column("value").to_numpy(), "model": out.best_fit})
_ = plt.subplots(figsize=(12, 12))
df = pl.DataFrame(
{
"year": valid_data.get_column("date"),
"value": valid_data.get_column("value").to_numpy(),
"model": out.best_fit,
}
)
title = valid_data.get_column("parameter").unique()[0]
df.to_pandas().plot(x="year", y=["value", "model"], title=title)
if savefig_to_file:
Expand Down
1,591 changes: 345 additions & 1,246 deletions example/wetterdienst_notebook.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 32b81b6

Please sign in to comment.