-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdistribution_of_destinations.R
85 lines (67 loc) · 3.42 KB
/
distribution_of_destinations.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
library(dplyr)
## load n the daya
load("../Rdata/one_month_taxi.Rdata")
taxi_clean <- taxi_clean %>%
mutate(dropoff_lat = ifelse(dropoff_neighborhood ==
"John F. Kennedy International Airport" |rate_code == 2,
40.64,
ifelse(dropoff_neighborhood =="LaGuardia Airport",
40.77,
round(dropoff_latitude, 2))),
dropoff_lng = ifelse(dropoff_neighborhood ==
"John F. Kennedy International Airport" |rate_code == 2 ,
-73.78,
ifelse(dropoff_neighborhood =="LaGuardia Airport",
-73.87,
round(dropoff_longitude, 2))),
is_weekend=ifelse(day_of_the_week == "Sun"|
day_of_the_week == "Sat",
T,
F))
probs_dst_overall <- taxi_clean %>%
group_by(dropoff_lat, dropoff_lng, pickup_hour, is_weekend) %>%
summarize(overall_n = n()) %>%
group_by(pickup_hour, is_weekend) %>%
mutate(overall_total = sum(overall_n),
overall_probability = overall_n/overall_total)
# round drop lat/lng to 2 decimal places, and fi
probs_dst_given_src_and_time <- taxi_clean %>%
group_by(pickup_neighborhood,
pickup_hour,
dropoff_lat,
dropoff_lng,
is_weekend) %>%
summarize(n = n()) %>%
filter(!is.na(pickup_neighborhood)) %>% # filtering out trips that go out of the city
group_by(pickup_neighborhood, pickup_hour, is_weekend) %>%
mutate(total = sum(n), probability = n/total)
probability <- left_join (probs_dst_given_src_and_time, probs_dst_overall,
by = c("pickup_hour",
"dropoff_lat",
"dropoff_lng",
"is_weekend"))
probability <- probability %>%
mutate(a = probability/ overall_probability,
b = ((probability/(1-probability)) - (overall_probability/ (1-overall_probability))),
c = log(probability/ (1-probability)) - log(overall_probability/ (1-overall_probability)))
### without hour
probs_dst_overall_without_hour <- taxi_clean %>%
group_by(dropoff_lat, dropoff_lng, is_weekend) %>%
summarize(overall_n = n()) %>%
group_by(is_weekend) %>%
mutate(overall_total = sum(overall_n),
overall_probability = overall_n/overall_total)
# round drop lat/lng to 2 decimal places, and fi
probs_dst_given_src_and_time_without_hour<- taxi_clean %>%
group_by(pickup_neighborhood, dropoff_lat, dropoff_lng, is_weekend) %>%
summarize(n = n()) %>%
filter(!is.na(pickup_neighborhood)) %>%# filtering out trips that go out of the city
group_by(pickup_neighborhood, is_weekend) %>%
mutate(total = sum(n), probability = n/total)
probability_without_hour <- left_join (probs_dst_given_src_and_time_without_hour,
probs_dst_overall_without_hour,
by = c("dropoff_lat", "dropoff_lng", "is_weekend"))
probability_without_hour <- probability_without_hour %>%
mutate(a = probability/ overall_probability,
b = ((probability/ (1-probability)) - (overall_probability/ (1-overall_probability))),
c = log(probability/ (1-probability)) - log(overall_probability/ (1-overall_probability)))