-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstatistical_distribution_aep.R
548 lines (467 loc) · 26.9 KB
/
statistical_distribution_aep.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
library(tidyverse)
conflicted::conflict_prefer(name = "select", winner = "dplyr")
conflicted::conflict_prefer(name = "filter", winner = "dplyr")
library(wesanderson)
# Avoid the scientific notation
options(scipen = 999)
# Download NREL sample onshore wind turbine's power curve which has 3.3 MWs of
# rated power and 148 meter rotor diameter
nrel_pc_url = "https://raw.githubusercontent.com/NREL/turbine-models/master/Onshore/2023NREL_Bespoke_3.3MW_148.csv"
nrel_pc = read.csv(nrel_pc_url)
head(nrel_pc, n = 15)
# Select only wind speed and power. Rename them and subset only power values
# other than 0.
nrel_pc = nrel_pc %>%
select(1,2) %>%
rename(WindSpeed = 1, Power = 2) %>%
filter(Power != 0)
head(nrel_pc, n = 15)
# Since we will discuss the statistical distributions and their limitations in
# the wind energy applications, we should have a wind speed data. For this
# purpose, we've decided to use Riso met mast located at the DTU Riso Campus,
# Denmark. This met mast has measurements period includes 12 years which starts
# from 1995 and extends up to 2007. Wind speed has been measured at 44, 77, 125
# meter heights above from the ground. We will use the top anemometers wind
# speed measurements in this study. Here you can see the dataset below;
"https://gitlab.windenergy.dtu.dk/fair-data/winddata-revamp/winddata-documentation/-/blob/master/risoe_m.md"
# To proceed to this script, you should download the data named, risose_m_all.nc
library(ncdf4)
dtu_riso_metmast = nc_open("risoe_m_all.nc")
#Let's inspect the variables and the dimensions found in the netcdf file.
names(dtu_riso_metmast[["var"]])
names(dtu_riso_metmast[["dim"]])
# Let's extract the wind speed at 125 meter height.
ws125 = ncvar_get(dtu_riso_metmast, "ws125")
# Let's extract the time and covert it into human readable format.
# Metadata about the time variable states that it is stored as
# "minutes since 1995-11-20 16:25:00"
time = as.POSIXct(ncvar_get(dtu_riso_metmast, "time")*60,
origin = "1995-11-20 16:25:00", tz = "Europe/Copenhagen")
# Although it's a fact that one year data is not enough for the wind resource
# studies, this study aims to show the limitations of the parametric statistical
# distributions hence, we have chosen the complete year 1997 and taken the
# subset of the data.
ws125_dtu = data.frame(time, ws125) %>%
filter(year(time) == 1997) %>%
rename(DateTime = 1,
WindSpeed = 2)
summary(ws125_dtu)
#We have 22 NA values and we have to omit the NA values before proceeding. Our
# recovery rate is
1-(sum(is.na(ws125_dtu$WindSpeed))/nrow(ws125_dtu))
# very close to the 1 which is 0.9995814. So it's okay to omit these values and
# proceed to the calculations.
ws125_dtu = na.omit(ws125_dtu)
# Since one of the main goals of this document is investigating the comparison
# between non-parametric and parametric estimation in the annual energy
# production calculation of a site, we should have a power curve function as
# well. For this purpose, loess function of R has been used. Span and degree
# parameters of the loess model function have also been searched over a grid.
#Let's create the hyperparameter grid.
grid_search = expand.grid(degree = 0:2,
span = c(seq(0.05, 0.1, 0.01),
seq(0.15, 0.5, 0.05))) %>%
mutate(RMSE = NA_real_)
for (i in 1:nrow(grid_search)) {
nrel_model = loess(Power ~ WindSpeed,
data = nrel_pc,
degree = grid_search[i,1],
span = grid_search[i,2])
rmse = nrel_pc %>%
mutate(Power_P = predict(nrel_model, newdata = nrel_pc)) %>%
summarise(RMSE = sqrt(mean((Power_P - Power)^2))) %>%
pull()
grid_search[i, 3] = rmse
}
loess_best_parameters = grid_search[which.min(grid_search$RMSE),]
nrel_model = loess(Power ~ WindSpeed,
data = nrel_pc,
degree = loess_best_parameters$degree,
span = loess_best_parameters$span)
nrel_pc %>%
mutate(Power_P = predict(nrel_model, newdata = nrel_pc)) %>%
ggplot(aes(x = WindSpeed)) +
geom_point(aes(y = Power, color = "Observation"), alpha = 0.75) +
geom_line(aes(y = Power_P, color = "LOESS Fit"), alpha = 0.75) +
scale_x_continuous(name = "Wind Speed (m/s)", breaks = seq(0, 25, 1)) +
scale_y_continuous(name = "Power (kW)", breaks = seq(0, 3400, 200)) +
scale_color_manual(values = c("Observation" = "cyan4",
"LOESS Fit" = "tomato")) +
guides(color = guide_legend(override.aes = list(shape = c(NA, 19), linetype = c(1,NA)))) +
ggtitle(label = "NREL Wind Turbine - 3.3 MW with 148m rotor dimater") +
theme_bw() +
theme(legend.title = element_blank())
# While the cyan colored points represents the real data taken from NREL wind
# power curve, tomato line indicates the curve fitted to the predictions.
# Since we have a power curve model that overfits the real data (taken from
# NREL), it's time to calculate the annual energy production of the virtual wind
# turbine if it would be erected directly on the location and the height (125 m)
# of the met mast found at the DTU Riso Campus.
ws125_dtu = ws125_dtu %>%
mutate(Power_P = predict(nrel_model, newdata = WindSpeed)) %>%
# Since the nrel_model will create NA values for the wind speed values outside
# the input range (WindSpeed<3.25 & WindSpeed >25), let's replace them with 0.
mutate(Power_P = replace_na(Power_P, 0))
# Let's calculate the annual energy production (AEP) in MWh.
# It should'nt be forgotten that the data has 10 minutes time resolution
# That's why we're dividing the result to 6.
(aep_pc = sum(ws125_dtu$Power_P)/1000/6)
# Let's calculate the capacity factor of this virtually erected 3.3 MW WTG
(cf_pc = aep_pc/nrow(ws125_dtu)/3.3*6)
# We've calculated the AEP by applying the power curve model we've created to
# the each wind speed in each row in time series. We'll call it power curve
# approach since then. It could be assumed as the grand truth by neglecting the
# losses and uncertainties. Now let's calculate the AEP with the most popular
# statistical distribution in wind energy sector as well. We are going to fit
# Weibull distribution with two different fitting methods. While the first one
# is maximum likelihood estimation, second one is moment matching estimation.
# The Weibull distribution is commonly used in the wind energy sector due to its
# simplicity and historical precedence. It provides a convenient and relatively
# straightforward way to represent wind speed distributions for resource
# assessment and energy production calculations. Many commercial software
# packages also incorporate the Weibull distribution because it allows for quick
# and efficient calculations, making it easier to perform wind energy
# assessments on a large scale.
# Weibull can be described with the two parameters named as shape (k) and
# scale (a). Since it is a parametric distribution and quite good at
# representing the nature of the wind speed over a time for a location, it
# provides a common framework for wind resource assessment and energy production
# calculations. This standardization allows for easier comparison of results
# across different projects, and locations. For instance, anyone can calculate
# the AEP of a point by just only knowing the parameters of the weibull and
# the power curve function of the desired wind turbine for that location.
# For the statistical distribution fitting we'll use fitdistrplus library.
library(fitdistrplus)
# First method for fitting - Maximum likelihood estimation
weibull_mle = fitdist(data = ws125_dtu$WindSpeed,
distr = "weibull",
method = "mle")
# Second method for fitting - Maximum goodness of fit estimation
weibull_mge = fitdist(data = ws125_dtu$WindSpeed,
distr = "weibull",
method = "mge",
gof = "KS")
# Since we fit the weibull distributions with different methods, and calculated
# the shape and scale parameters of the weibull distributions with different
# methods it's time to evaluate those fits by comparing them with the empirical
# distribution.
# Empirical distribution is derived from the observed data, and it represents
# the frequencies or proportions of different wind speeds in the dataset. So,
# it provides and actual representation of the data collected. It can be
# directly calculated with the ecdf() function found in base R, but we will
# do it step by step to better understand what it actually is.
empirical_df = ws125_dtu %>%
mutate(Bin = cut(WindSpeed, breaks = seq(0, max(WindSpeed) + 0.5, by = 0.5))) %>%
group_by(Bin) %>%
summarise(Freq = n()) %>%
ungroup() %>%
mutate(
#Probabilities can be calculated by dividing each freq with the total freq
Prob = Freq / sum(Freq),
#Cumulative probabilities can be calculated by cumulative sum
CDF = cumsum(Prob),
Bin = as.character(Bin),
#Lower bin values as numeric with the help of some gibberish REGEX
BinLower = as.numeric(str_extract(Bin, "\\d+\\.?\\d*(?=,)")),
#Upper bin values as numeric with the help of some gibberish REGEX
BinUpper = as.numeric(str_extract(Bin, "(?<=,)\\s*\\d+\\.?\\d*")),
#Middle points of the bins
WindSpeed = (BinLower+BinUpper)/2)
# Now we have empirical distributions' PDF and CDF values for the bins that have
# 0.5 m/s width.
# Now let's evaluate if we've calculated it correct. Let's compare the
# calculation above with the R's base ecdf() function. To be able to
# calculate the CDF after applying the ecdf function to the data, WindSpeed
# values that separates the bins should be used, not the WindSpeed values at
# the middle points of the bins
ws125_dtu_ecdf = ecdf(ws125_dtu$WindSpeed)
empirical_df = empirical_df %>%
mutate(CDF_R = ws125_dtu_ecdf(BinUpper))
nrow(empirical_df) == sum(round(empirical_df$CDF, 3) == round(empirical_df$CDF_R, 3))
# All the CDF and CDF_R values are equal to each other! So that we've calculated
# the cdf for empirical distribution correct.
# Now let's plot the probability density functions calculated with the weibull
# distributions and compare them with the empirical distribution. First,
# we need to calculate the densities/probabilities for the desired wind speeds
# for each weibull distribution we've calculated above. In empirical
# distribution, we have used a dataset from 0.5 to 18.5 wind speed with 0.5 m/s
# step. Now with the dweibull function of base R, we can calculate the density
# for the desired wind speeds and we would like to calculate the probabilities
# for each bin for visualization. To do this, we have to use the wind speed
# that represents each bin. Therefore, the middle points of each bin can be used
# for a better visualization.
empirical_df_weibull = empirical_df %>%
mutate(Density_Weibull_MLE = dweibull(empirical_df$WindSpeed,
shape = weibull_mle$estimate["shape"],
scale = weibull_mle$estimate["scale"]),
PDF_Weibull_MLE = Density_Weibull_MLE/sum(Density_Weibull_MLE),
Density_Weibull_MGE = dweibull(empirical_df$WindSpeed,
shape = weibull_mge$estimate["shape"],
scale = weibull_mge$estimate["scale"]),
PDF_Weibull_MGE = Density_Weibull_MGE/sum(Density_Weibull_MGE))
empirical_df_weibull %>%
select(WindSpeed, starts_with("PDF"), Prob) %>%
pivot_longer(names_to = "PDF_Type", values_to = "Value", starts_with("PDF")) %>%
ggplot(.) +
geom_col(aes(x = WindSpeed, y = Prob, fill = "Empirical"), position = position_dodge(width =0.8)) +
geom_line(aes(x = WindSpeed, y = Value, color = PDF_Type, linetype = PDF_Type), lwd = 0.8) +
scale_color_manual(values = wes_palette("Darjeeling1", 3)[c(2,3)]) +
scale_fill_manual(values = "royalblue") +
scale_linetype_manual(values = c(2,4)) +
scale_y_continuous(name = "Probability (%)",
n.breaks = 20,
labels = scales::percent_format(accuracy = 0.1)) +
scale_x_continuous(name = "Wind Speed (m/s)", breaks = seq(0, 26, 1)) +
ggtitle(label = "Comparison of Weibull with the Empirical Distribution") +
theme_bw() +
theme(legend.title = element_blank())
# As it can be clearly seen, all the weibull distributions fitted with different
# methods have similar curves. But, the empirical wind speed distribution has
# multimodality (multiple peaks) between 6.5 - 7 m/s and 9 - 10 m/s wind speeds
# and it seems like the weibull distributions doesn't represent these peaks.
# So do you think that they represent empirical distribution (observed data)
# well? It's obvious to state that the distributions don't fit well by just
# observing the plot. So we can state that weibull distribution has some
# limitations and these limitations can be expressed as:
# 1. Simplicity: The Weibull distribution assumes a single-peaked, symmetric
# distribution, which might not always match real-world wind data.
# 2.Lack of flexibility: The Weibull distribution has limited flexibility in
# modeling different shapes of wind speed distributions. It cannot capture
# characteristics like multimodality (multiple peaks), skewness, or heavy tails
# that might be present in actual wind data.
# 3. Data Fit Issues: Fitting a Weibull distribution to observed wind data can
# be challenging. Real-world wind data often deviates from the Weibull
# assumption, leading to poor fits and inaccurate representations of the wind
# speed distribution. This can result in significant errors when estimating
# energy production.
# 4. Overestimation of low wind speeds: The Weibull distribution tends to
# overestimate the frequency of low wind speeds. This can lead to overly
# optimistic energy production estimates, which can be problematic for project
# planning and financing.
# 5. Underestimation of high wind speeds: Conversely, the Weibull distribution
# may underestimate the frequency of high wind speeds. This can lead to
# conservative energy production estimates, potentially causing project
# developers to oversize equipment or miss out on revenue.
# On the other hand, for all of the limitations mentioned above about the
# weibull distribution which is a parametric distribution, let's focus on the
# non-parametric estimations For this purpose kernel density estimation (KDE)
# algorithm has been chosen. KDE calculates the PDF by placing a kernel (a
# smooth, continuous function, usually a Gaussian) on each data point and
# summing these kernels to create a smooth, continuous density estimate. The
# bandwidth parameter determines the width of the kernels, affecting the
# smoothness of the estimate. Now let's talk about the advantages of KDE over
# a parametric distribution:
# 1. Flexibility: It can handle wind speed data with complex and non-standard
# distributions.
# 2. No Distribution Assumptions: The Weibull distribution makes specific
# assumptions about the shape of the wind speed distribution, which may not
# always hold true.
# 3. Accuracy in energy production: By providing a more accurate representation
# of the wind speed distribution, KDE can lead to more precise estimates of
# annual energy production for wind turbines.
# For KDE, we'll use density() function of base R. We'll use n, width, from and
# to arguments of this function. These arguments is configured based on the
# WindSpeed variable of the empirical_df dataframe created before on this study.
# It should be highlighted that the WindSpeed variable consists of the middle
# points of the each bin created before. So it will begin with the 0.25 and will
# continue through the 18.25. This function directly calculates the frequencies
# (densities) for the wind speeds we've provided.
kde_method = density(x = ws125_dtu$WindSpeed,
n = nrow(empirical_df_weibull),
width = 0.5,
# From the middlepoint of the first bin
from = empirical_df$WindSpeed[1],
# To the middlepoint of the last bin
to = empirical_df$WindSpeed[nrow(empirical_df)])
# Once again, after calculating the densities, now it's time to calculate the
# Probabilities.
empirical_df_kde = empirical_df_weibull %>%
mutate(Density_KDE = kde_method$y,
PDF_KDE = Density_KDE/sum(Density_KDE))
#Let's plot the previous PDF graph with KDE as well.
empirical_df_kde %>%
select(WindSpeed, starts_with("PDF"), Prob) %>%
pivot_longer(names_to = "PDF_Type",
values_to = "Value",
starts_with("PDF")) %>%
ggplot(.) +
geom_col(aes(x = WindSpeed,
y = Prob,
fill = "Empirical"),
position = position_dodge(width =0.8)) +
geom_line(aes(x = WindSpeed,
y = Value,
color = PDF_Type,
linetype = PDF_Type)) +
scale_color_manual(values = c(
#For KDE color (Red)
wes_palette("Darjeeling1", 2)[1],
#For Weibull colors (cyan and orange)
wes_palette("Darjeeling1", 3)[c(2,3)])) +
scale_fill_manual(values = "royalblue") +
scale_linetype_manual(values = c(1,2,4)) +
scale_y_continuous(name = "Probability (%)",
n.breaks = 20,
labels = scales::percent_format(accuracy = 0.1)) +
scale_x_continuous(name = "Wind Speed (m/s)", breaks = seq(0, 26, 1)) +
ggtitle(label = "Comparison of Weibull and KDE with the Empirical Distribution") +
theme_bw() +
theme(legend.title = element_blank())
# As it can be seen clearly seen in the plot, KDE non-parametric distribution
# represents empirical distribution much more better compared to other weibull
# distribution fits. Let's prove that with a statistical performance metric.
# Kolmogorov-Smirnov (KV) test assesses the goodness of fit between the observed
# data and the expected/empirical distribution (usually a theoretical or
# hypothesized distribution). Now, we can use KS test for evaluating whether
# weibull or KDE distributions represent the empirical distribution or not. For
# this, ks.test() function of base R will be used. This test is calculated by
# using the Cumulative Distribution Functions so that, CDF's have to be
# calculated. For weibull distributions, pweibull() functions calculates the
# CDF values. The KS test is a non-parametric test that compares the empirical
# distribution to the expected distribution. The more data points you have,
# the more sensitive the test can be to detecting differences between the
# distributions. So, CDF values will be kept in another dataset.
windspeed_to_calc = seq(0,26.5,0.1)
cdf_to_ks = data.frame(WindSpeed = windspeed_to_calc,
CDF_Weibull_MLE = pweibull(windspeed_to_calc,
shape = weibull_mle$estimate["shape"],
scale = weibull_mle$estimate["scale"]),
CDF_Weibull_MGE = pweibull(windspeed_to_calc,
shape = weibull_mge$estimate["shape"],
scale = weibull_mge$estimate["scale"]))
#Let's make a quick function that scales data between 0 and 1.
scaling = function(x) {
x/sum(x)
}
cdf_to_ks = cdf_to_ks %>%
mutate(CDF_KDE = cumsum(scaling(density(x = ws125_dtu$WindSpeed,
width = 0.1,
from = 0,
to = 26.5,
n = nrow(cdf_to_ks))$y)),
CDF_Empirical = ws125_dtu_ecdf(seq(0,26.5,0.1)))
cdf_to_ks %>%
select(WindSpeed, starts_with("CDF")) %>%
pivot_longer(names_to = "Cesit", values_to = "Deger", -WindSpeed) %>%
ggplot(., aes(x = WindSpeed, y = Deger, color = Cesit, linetype = Cesit)) +
geom_line() +
scale_linetype_manual(values = c(1, 1, 2, 4)) +
scale_color_manual(values = c(
#For Empirical CDF color
"black",
#For KDE CDF color (Red)
wes_palette("Darjeeling1", 2)[1],
#For Weibull CDF colors (cyan and orange)
wes_palette("Darjeeling1", 3)[c(2,3)])) +
scale_y_continuous(name = "CDF", labels = scales::percent, breaks = seq(0,1,0.1)) +
scale_x_continuous(name = "Wind Speed (m/s)", breaks = seq(0,26,2)) +
ggtitle("Comparison Between Empirical and KDE, Different Weibull CDFs") +
theme_bw() +
theme(legend.title = element_blank())
# It's obvious to state that all three CDF's are not quite different from each
# other. But, the differences can be seen especially in the ranges from 0 to 4
# m/s and especially from 10 to 15 m/s which has crucial effect on the AEP
# calculation. It should also be noted that, Empirical CDF (black line) is
# followed closely by the CDF of the KDE (red line).
# Here KS test results can be seen;
(ks_weibull_mle = ks.test(x = cdf_to_ks$CDF_Weibull_MLE,y = cdf_to_ks$CDF_Empirical))
(ks_weibull_mge = ks.test(x = cdf_to_ks$CDF_Weibull_MGE,y = cdf_to_ks$CDF_Empirical))
(ks_kde = ks.test(x = cdf_to_ks$CDF_KDE,y = cdf_to_ks$CDF_Empirical))
# All three p-values are relatively large, with the smallest being 0.8549. Large
# p-values suggest that your data is consistent with the specified distributions.
# The D statistic for each test is quite small, indicating that the EDF and CDF
# are close in shape. Besides, D statistic that represents the maximum absolute
# difference between the EDF of the observed data and the CDF of the
# distribution we are testing and it has the smallest value in the KDE
# distribution. Since a smaller D value indicates a better fit because it means
# the EDF and CDF are closer in shape, we prove that KDE represents our data
# better.
# Now we can calculate the AEP based on these distributions and compare the
# results with the power curve approach which will be served as grand truth in
# our study.
# To be able to calculate the AEP from these distributions, we have to integrate
# the area resulted by multiplying the wind speed PDF and power curve function.
# So we have to first calculate the energy equivalent of each wind speed in
# the PDF's by using the loess model of the power curve we've introduced above.
empirical_df_kde
aep_dists = empirical_df_kde %>%
mutate(Power = predict(nrel_model, newdata = data.frame(WindSpeed = WindSpeed)),
Power = replace_na(Power, 0))
# Now we've calculated the power equivalent of each wind speed bin in the PDFs
# It's time to calculate areas covered by each bin and sum all of them (integration)
# After this integration we will have calculated the hourly mean power generation
# of the virtual wind turbine for the location of the met mast. If we would have
# full complete year data and this study wouldn't be about the direct comparison
# of the distributions with the power curve approach introduced above, we would
# multiply this with the 365.25 and 24 to be able to calculate the annual energy
# production. But we want to compare the results and evaluate which distribution
# does have more realistic results. So, NA values in the data and the row number
# have crucial impacts on the result as well. Let's consider all of them, and
# instead of multiplying the integrated area with the 8760 (total hours in a
# year), multiply this area with a realistic hour value taken from our data.
# Since our data has 10 minutes of time resolution and has nrow(ws125_dtu),
# 52535 row after na.omit, we can directly calculate how many hours does it have
# by dividing the total row number to 6. It is 8755.833 hours.
(data_hours = nrow(ws125_dtu)/6)
(aep_weibull_mle = sum(aep_dists$Power*aep_dists$PDF_Weibull_MLE)/1000*data_hours)
(aep_weibull_mge = sum(aep_dists$Power*aep_dists$PDF_Weibull_MGE)/1000*data_hours)
(aep_kde = sum(aep_dists$Power*aep_dists$PDF_KDE)/1000*data_hours)
(aep_powercurve = sum(ws125_dtu$Power_P)/1000/6)
data.frame(PowerCurve = aep_powercurve,
Diff_Weibull_MLE = aep_weibull_mle-aep_powercurve,
Diff_Weibull_MGE = aep_weibull_mge-aep_powercurve,
Diff_KDE = aep_kde-aep_powercurve,
Ratio_Weibull_MLE = aep_weibull_mle/aep_powercurve,
Ratio_Weibull_MGE = aep_weibull_mge/aep_powercurve,
Ratio_KDE = aep_kde/aep_powercurve)
# As it can be clearly seen, best method is calculated as the KDE non-parametric
# approach with an about 16.55 MWh error for AEP calculation although the
# differences are quite small. Since the surrounding of the met mast is can be
# classified as a flat(flat landscape) coastal(water and land) site according to
# the DTU's website, it's clear to state that in more complex terrains there
# will be very different wind speed distributions and it will be very hard to
# fit weibull distributions to these observations. In my experience through
# working as wind resource asessment engineer in different companies, I have
# faced lots of complex sites located in Turkey and the weibull fit unfortunately
# doesn't represent the empirical distribution. That's why it's very important
# to implement other fitting methodologies or non-parametric distributions to
# the worldwide known softwares like WAsP, windPRO or Meteodyn.
# Final plot!
empirical_df_kde %>%
select(WindSpeed, starts_with("PDF"), Prob) %>%
pivot_longer(names_to = "PDF_Type",
values_to = "Value",
starts_with("PDF")) %>%
ggplot(.) +
geom_col(aes(x = WindSpeed,
y = Prob,
fill = "Empirical"),
position = position_dodge(width =0.8)) +
geom_line(aes(x = WindSpeed,
y = Value,
color = PDF_Type,
linetype = PDF_Type), lwd = 0.9) +
scale_color_manual(values = c(
#For KDE color (Red)
wes_palette("Darjeeling1", 2)[1],
#For Weibull colors (cyan and orange)
wes_palette("Darjeeling1", 3)[c(2,3)])) +
scale_fill_manual(values = "royalblue") +
scale_linetype_manual(values = c(1,2,4)) +
scale_y_continuous(name = "Probability (%)",
n.breaks = 20,
labels = scales::percent_format(accuracy = 0.1)) +
geom_text(aes(x = 20, y = 0.05), label = paste0("KS Stat KDE: ", round(ks_kde$statistic,5)), color = "red3") +
geom_text(aes(x = 20, y = 0.046), label = paste0("KS Stat Weibull-MLE: ", round(ks_weibull_mle$statistic,5))) +
geom_text(aes(x = 20, y = 0.042), label = paste0("KS Stat Weibull-MGE: ", round(ks_weibull_mge$statistic,5))) +
geom_text(aes(x = 20, y = 0.03), label = paste0("AEP PC Assumpt. (Truth): ", round(aep_pc, 1), " MWh")) +
geom_text(aes(x = 20, y = 0.026), label = paste0("AEP KDE: ", round(aep_kde,1), " MWh"), color = "red3") +
geom_text(aes(x = 20, y = 0.022), label = paste0("AEP Weib MLE: ", round(aep_weibull_mle,1), " MWh")) +
geom_text(aes(x = 20, y = 0.018), label = paste0("AEP Weib MGE: ", round(aep_weibull_mge,1), " MWh")) +
scale_x_continuous(name = "Wind Speed (m/s)", breaks = seq(0, 26, 1)) +
ggtitle(label = "PDF's of Weibull and KDE with the Empirical Distribution",
subtitle = "Data: Met Mast Located at DTU Riso Campus @125m height (1997)") +
theme_bw() +
theme(legend.title = element_blank(),
legend.position = "top")
ggsave("final_plot.jpeg", dpi = 300, height = 9/1.25, width = 16/1.25)