-
Notifications
You must be signed in to change notification settings - Fork 0
/
wunderClean.R
34 lines (25 loc) · 938 Bytes
/
wunderClean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Weather Underground basic data cleaning
#
library(data.table)
library(lubridate)
setwd("D:/data/taxi/nyctaxi//")
all <- lapply(list.files("./wunder_raw/", full.names = T),
fread)
clean <- function(dt){
dt[,precipitation := as.numeric(precipitation)]
dt[,temperature := as.numeric(temperature)]
dt[,windspeed := as.numeric(windspeed)]
dt[,datetime := as.POSIXct(time, "%Y-%m-%d %I:%M %p", tz="EST")]
dt[,hour := hour(datetime)]
}
lapply(all, clean) # side effects, no saving
# Bind/save
wunder <- rbindlist(all)
# Missing = 0 precip
wunder[is.na(precipitation), precipitation := 0]
# multiple measurements for some hours
cleaned = wunder[, list(temperature = mean(temperature, na.rm = T),
precipitation = max(precipitation, na.rm = T),
windspeed = mean(windspeed, na.rm = T)),
by = list(year, month, day, hour)]
write.csv(cleaned, "./wunder/wunder.csv", row.names = F)