-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added New York city's Taxi and Limousine Commission high volume for h…
…ire vehicle trip support
- Loading branch information
Showing
4 changed files
with
259 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/usr/bin/env ruby | ||
|
||
require "datasets-parquet" | ||
|
||
trips = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1) | ||
|
||
p trips.to_arrow | ||
#<Arrow::Table:0x13f920640 ptr=0x13f180160> | ||
# hvfhs_license_num dispatching_base_num originating_base_num request_datetime on_scene_datetime pickup_datetime dropoff_datetime PULocationID DOLocationID trip_miles trip_time base_passenger_fare tolls bcf sales_tax congestion_surcharge airport_fee tips driver_pay shared_request_flag shared_match_flag access_a_ride_flag wav_request_flag wav_match_flag | ||
# 0 HV0003 B03404 B03404 2022-01-01T09:05:31+09:00 2022-01-01T09:05:40+09:00 2022-01-01T09:07:24+09:00 2022-01-01T09:18:28+09:00 170 161 1.180000 664 24.900000 0.000000 0.750000 2.210000 2.750000 0.000000 0.000000 23.030000 N N N N | ||
# 1 HV0003 B03404 B03404 2022-01-01T09:19:27+09:00 2022-01-01T09:22:08+09:00 2022-01-01T09:22:32+09:00 2022-01-01T09:30:12+09:00 237 161 0.820000 460 11.970000 0.000000 0.360000 1.060000 2.750000 0.000000 0.000000 12.320000 N N N N | ||
# ... | ||
|
||
|
||
trips.each do |trip| | ||
p [ | ||
trip.hvfhs_license_num, | ||
trip.dispatching_base_num, | ||
trip.originating_base_num, | ||
trip.request_datetime, | ||
trip.on_scene_datetime, | ||
trip.pickup_datetime, | ||
trip.dropoff_datetime, | ||
trip.pu_locationID, | ||
trip.do_locationID, | ||
trip.trip_miles, | ||
trip.trip_time, | ||
trip.base_passenger_fare, | ||
trip.tolls, | ||
trip.bcf, | ||
trip.sales_tax, | ||
trip.congestion_surcharge, | ||
trip.airport_fee, | ||
trip.tips, | ||
trip.driver_pay, | ||
trip.shared_request_flag?, | ||
trip.shared_match_flag?, | ||
trip.access_a_ride_flag?, | ||
trip.wav_request_flag?, | ||
trip.wav_match_flag?, | ||
] | ||
end | ||
# [:uber, "B03404", "B03404", 2022-01-01 09:05:31 +0900, 2022-01-01 09:05:40 +0900, 2022-01-01 09:07:24 +0900, 2022-01-01 09:18:28 +0900, 170, 161, 1.18, 664, 24.9, 0.0, 0.75, 2.21, 2.75, 0.0, 0.0, 23.03, false, false, false, false, false] | ||
# [:uber, "B03404", "B03404", 2022-01-01 09:19:27 +0900, 2022-01-01 09:22:08 +0900, 2022-01-01 09:22:32 +0900, 2022-01-01 09:30:12 +0900, 237, 161, 0.82, 460, 11.97, 0.0, 0.36, 1.06, 2.75, 0.0, 0.0, 12.32, false, false, false, false, false] | ||
# [:uber, "B03404", "B03404", 2022-01-01 09:43:53 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 10:07:32 +0900, 237, 161, 1.18, 595, 29.82, 0.0, 0.89, 2.65, 2.75, 0.0, 0.0, 23.3, false, false, false, false, false] | ||
# ... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
module Datasets | ||
module TLC | ||
class HighVolumeFHVTrip < Dataset | ||
class Record < Struct.new(:hvfhs_license_num, | ||
:dispatching_base_num, | ||
:originating_base_num, | ||
:request_datetime, | ||
:on_scene_datetime, | ||
:pickup_datetime, | ||
:dropoff_datetime, | ||
:pu_locationID, | ||
:do_locationID, | ||
:trip_miles, | ||
:trip_time, | ||
:base_passenger_fare, | ||
:tolls, | ||
:bcf, | ||
:sales_tax, | ||
:congestion_surcharge, | ||
:airport_fee, | ||
:tips, | ||
:driver_pay, | ||
:shared_request_flag, | ||
:shared_match_flag, | ||
:access_a_ride_flag, | ||
:wav_request_flag, | ||
:wav_match_flag) | ||
alias_method :shared_request_flag?, :shared_request_flag | ||
alias_method :shared_match_flag?, :shared_match_flag | ||
alias_method :access_a_ride_flag?, :access_a_ride_flag | ||
alias_method :wav_request_flag?, :wav_request_flag | ||
alias_method :wav_match_flag?, :wav_match_flag | ||
|
||
def initialize(*values) | ||
super() | ||
members.zip(values) do |member, value| | ||
__send__("#{member}=", value) | ||
end | ||
end | ||
|
||
def hvfhs_license_num=(hvfhs_license_num) | ||
case hvfhs_license_num | ||
when 'HV0002' | ||
super(:juno) | ||
when 'HV0003' | ||
super(:uber) | ||
when 'HV0004' | ||
super(:via) | ||
when 'HV0005' | ||
super(:lyft) | ||
end | ||
end | ||
|
||
def shared_request_flag=(shared_request_flag) | ||
super(shared_request_flag == 'Y') | ||
end | ||
|
||
def shared_match_flag=(shared_match_flag) | ||
super(shared_match_flag == 'Y') | ||
end | ||
|
||
def access_a_ride_flag=(access_a_ride_flag) | ||
super(access_a_ride_flag == 'Y') | ||
end | ||
|
||
def wav_request_flag=(wav_request_flag) | ||
super(wav_request_flag == 'Y') | ||
end | ||
|
||
def wav_match_flag=(wav_match_flag) | ||
super(wav_match_flag == 'Y') | ||
end | ||
end | ||
|
||
def initialize(year: Date.today.year, month: Date.today.month) | ||
super() | ||
@metadata.id = "nyc-taxi-and-limousine-commission-high-volume-for-hire-vehicle-trip" | ||
@metadata.name = "New York city Taxi and Limousine Commission: high volume for hire vehicle trip record dataset" | ||
@metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page" | ||
@metadata.licenses = [ | ||
{ | ||
name: "NYC Open Data Terms of Use", | ||
url: "https://opendata.cityofnewyork.us/overview/#termsofuse", | ||
} | ||
] | ||
@year = year | ||
@month = month | ||
end | ||
|
||
def to_arrow | ||
base_name = "fhvhv_tripdata_%04d-%02d.parquet" % [@year, @month] | ||
data_path = cache_dir_path + base_name | ||
data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}" | ||
download(data_path, data_url) | ||
Arrow::Table.load(data_path) | ||
end | ||
|
||
def each | ||
return to_enum(__method__) unless block_given? | ||
|
||
to_arrow.raw_records.each do |raw_record| | ||
record = Record.new(*raw_record) | ||
yield(record) | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
class TLCHighVolumeFHVTripTest < Test::Unit::TestCase | ||
def setup | ||
@default_timezone_env = ENV["TZ"] | ||
ENV["TZ"] = "UTC" | ||
@dataset = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1) | ||
end | ||
|
||
def teardown | ||
ENV["TZ"] = @default_timezone_env | ||
end | ||
|
||
test("#to_arrow") do | ||
assert_equal(<<~TABLE, @dataset.to_arrow.to_s) | ||
\thvfhs_license_num\tdispatching_base_num\toriginating_base_num\t request_datetime\t on_scene_datetime\t pickup_datetime\t dropoff_datetime\tPULocationID\tDOLocationID\ttrip_miles\ttrip_time\tbase_passenger_fare\t tolls\t bcf\t sales_tax\tcongestion_surcharge\tairport_fee\t tips\tdriver_pay\tshared_request_flag\tshared_match_flag\taccess_a_ride_flag\twav_request_flag\twav_match_flag | ||
0\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:05:31+00:00\t2022-01-01T00:05:40+00:00\t2022-01-01T00:07:24+00:00\t2022-01-01T00:18:28+00:00\t 170\t 161\t 1.180000\t 664\t 24.900000\t 0.000000\t 0.750000\t 2.210000\t 2.750000\t 0.000000\t 0.000000\t 23.030000\tN \tN \t \tN \tN | ||
1\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:19:27+00:00\t2022-01-01T00:22:08+00:00\t2022-01-01T00:22:32+00:00\t2022-01-01T00:30:12+00:00\t 237\t 161\t 0.820000\t 460\t 11.970000\t 0.000000\t 0.360000\t 1.060000\t 2.750000\t 0.000000\t 0.000000\t 12.320000\tN \tN \t \tN \tN | ||
2\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:43:53+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T01:07:32+00:00\t 237\t 161\t 1.180000\t 595\t 29.820000\t 0.000000\t 0.890000\t 2.650000\t 2.750000\t 0.000000\t 0.000000\t 23.300000\tN \tN \t \tN \tN | ||
3\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:15:36+00:00\t2022-01-01T00:17:08+00:00\t2022-01-01T00:18:02+00:00\t2022-01-01T00:23:05+00:00\t 262\t 229\t 1.650000\t 303\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 2.750000\t 0.000000\t 0.000000\t 6.300000\tN \tN \t \tN \tN | ||
4\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:25:45+00:00\t2022-01-01T00:26:01+00:00\t2022-01-01T00:28:01+00:00\t2022-01-01T00:35:42+00:00\t 229\t 141\t 1.650000\t 461\t 9.440000\t 0.000000\t 0.280000\t 0.840000\t 2.750000\t 0.000000\t 0.000000\t 7.440000\tN \tN \t \tN \tN | ||
5\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:34:44+00:00\t2022-01-01T00:36:52+00:00\t2022-01-01T00:38:50+00:00\t2022-01-01T00:51:32+00:00\t 263\t 79\t 4.510000\t 762\t 17.670000\t 0.000000\t 0.530000\t 1.570000\t 2.750000\t 0.000000\t 0.000000\t 12.250000\tN \tN \t \tN \tN | ||
6\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:47:51+00:00\t2022-01-01T00:52:00+00:00\t2022-01-01T00:53:25+00:00\t2022-01-01T01:08:56+00:00\t 113\t 140\t 3.680000\t 931\t 16.680000\t 0.000000\t 0.500000\t 1.480000\t 2.750000\t 0.000000\t 0.000000\t 12.750000\tN \tN \t \tN \tN | ||
7\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:06:21+00:00\t2022-01-01T00:06:58+00:00\t2022-01-01T00:08:58+00:00\t2022-01-01T00:23:01+00:00\t 151\t 75\t 2.770000\t 843\t 14.410000\t 0.000000\t 0.430000\t 1.280000\t 0.000000\t 0.000000\t 4.000000\t 11.470000\tN \tN \t \tN \tN | ||
8\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:27:54+00:00\t2022-01-01T00:30:26+00:00\t2022-01-01T00:32:25+00:00\t2022-01-01T00:44:15+00:00\t 263\t 229\t 2.040000\t 710\t 10.640000\t 0.000000\t 0.320000\t 0.940000\t 2.750000\t 0.000000\t 0.000000\t 9.550000\tN \tN \t \tN \tN | ||
9\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:44:59+00:00\t2022-01-01T00:48:23+00:00\t2022-01-01T00:50:23+00:00\t2022-01-01T01:15:30+00:00\t 237\t 169\t 8.790000\t 1507\t 107.560000\t 0.000000\t 0.830000\t 2.450000\t 2.750000\t 0.000000\t 0.000000\t 23.670000\tN \tN \t \tN \tN | ||
... | ||
14751581\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:36+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:33:23+00:00\t 163\t 244\t 7.570000\t 858\t 18.460000\t 0.000000\t 0.550000\t 1.640000\t 2.750000\t 0.000000\t 0.000000\t 15.870000\tN \tN \t \tN \tN | ||
14751582\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:34+00:00\t2022-01-31T23:34:20+00:00\t2022-01-31T23:36:02+00:00\t2022-01-31T23:50:15+00:00\t 244\t 47\t 3.050000\t 853\t 16.230000\t 0.000000\t 0.490000\t 1.440000\t 0.000000\t 0.000000\t 0.000000\t 10.850000\tN \tN \t \tN \tN | ||
14751583\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:57:18+00:00\t2022-01-31T23:07:52+00:00\t2022-01-31T23:09:52+00:00\t2022-01-31T23:19:46+00:00\t 86\t 86\t 2.050000\t 594\t 9.630000\t 0.000000\t 0.290000\t 0.850000\t 0.000000\t 0.000000\t 0.000000\t 8.510000\tN \tN \t \tN \tN | ||
14751584\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:23:00+00:00\t2022-01-31T23:24:44+00:00\t2022-01-31T23:26:37+00:00\t2022-01-31T23:34:37+00:00\t 86\t 117\t 1.300000\t 480\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 6.730000\tN \tN \t \tN \tN | ||
14751585\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:19+00:00\t2022-01-31T23:40:56+00:00\t2022-01-31T23:41:58+00:00\t2022-01-31T23:47:44+00:00\t 86\t 86\t 1.530000\t 346\t 7.190000\t 0.000000\t 0.220000\t 0.640000\t 0.000000\t 0.000000\t 0.000000\t 6.680000\tN \tN \t \tN \tN | ||
14751586\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:22:16+00:00\t2022-01-31T23:26:04+00:00\t2022-01-31T23:27:20+00:00\t2022-01-31T23:40:46+00:00\t 77\t 71\t 2.590000\t 806\t 14.280000\t 0.000000\t 0.430000\t 1.270000\t 0.000000\t 0.000000\t 0.000000\t 9.900000\tN \tN \t \tN \tN | ||
14751587\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:42:30+00:00\t2022-01-31T23:45:08+00:00\t2022-01-31T23:45:46+00:00\t2022-01-31T23:59:44+00:00\t 72\t 72\t 1.560000\t 838\t 10.420000\t 0.000000\t 0.310000\t 0.920000\t 0.000000\t 0.000000\t 0.000000\t 9.030000\tN \tN \t \tN \tN | ||
14751588\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:56:50+00:00\t2022-01-31T23:03:17+00:00\t2022-01-31T23:03:25+00:00\t2022-01-31T23:17:17+00:00\t 136\t 20\t 1.230000\t 832\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 8.730000\tN \tN \t \tN \tN | ||
14751589\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:07+00:00\t2022-01-31T23:19:25+00:00\t2022-01-31T23:20:26+00:00\t2022-01-31T23:30:26+00:00\t 20\t 136\t 1.690000\t 600\t 9.320000\t 0.000000\t 0.280000\t 0.830000\t 0.000000\t 0.000000\t 0.000000\t 7.300000\tN \tN \t \tN \tN | ||
14751590\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:24+00:00\t2022-01-31T23:36:13+00:00\t2022-01-31T23:38:13+00:00\t2022-02-01T00:07:24+00:00\t 136\t 82\t 14.700000\t 1751\t 27.340000\t 6.550000\t 1.020000\t 3.010000\t 0.000000\t 0.000000\t 0.000000\t 31.280000\tN \tN \t \tN \tN | ||
TABLE | ||
end | ||
|
||
test("#each") do | ||
omit("Skip test of HighVolumeFHVTrip#each because the size of data is too huge to execute.") | ||
records = @dataset.each.to_a | ||
|
||
assert_equal([ | ||
14751590, | ||
{ | ||
hvfhs_license_num: :uber, | ||
dispatching_base_num: "B03404", | ||
originating_base_num: "B03404", | ||
request_datetime: Time.parse("2022-01-01 00:05:31 +0000"), | ||
on_scene_datetime: Time.parse("2022-01-01 00:05:40 +0000"), | ||
pickup_datetime: Time.parse("2022-01-01 00:07:24 +0000"), | ||
dropoff_datetime: Time.parse("2022-01-01 00:18:28 +0000"), | ||
pu_locationID: 170, | ||
do_locationID: 161, | ||
trip_miles: 1.18, | ||
trip_time: 664, | ||
base_passenger_fare: 24.9, | ||
tolls: 0.0, | ||
bcf: 0.75, | ||
sales_tax: 2.21, | ||
congestion_surcharge: 2.75, | ||
airport_fee: 0.0, | ||
tips: 0.0, | ||
driver_pay: 23.03, | ||
shared_request_flag: false, | ||
shared_match_flag: false, | ||
access_a_ride_flag: false, | ||
wav_request_flag: false, | ||
wav_match_flag: false | ||
}, | ||
{ | ||
hvfhs_license_num: :uber, | ||
dispatching_base_num: "B03404", | ||
originating_base_num: "B03404", | ||
request_datetime: Time.parse("2022-01-31 23:33:24 +00:00"), | ||
on_scene_datetime: Time.parse("2022-01-31 23:36:13 +00:00"), | ||
pickup_datetime: Time.parse("2022-01-31 23:38:13 +00:00"), | ||
dropoff_datetime: Time.parse("2022-02-01 00:07:24 +00:00"), | ||
pu_locationID: 136, | ||
do_locationID: 82, | ||
trip_miles: 14.7, | ||
trip_time: 1751, | ||
base_passenger_fare: 27.34, | ||
tolls: 6.55, | ||
bcf: 1.02, | ||
sales_tax: 3.01, | ||
congestion_surcharge: 0.0, | ||
airport_fee: 0.0, | ||
tips: 0.0, | ||
driver_pay: 31.28, | ||
shared_request_flag: false, | ||
shared_match_flag: false, | ||
access_a_ride_flag: false, | ||
wav_request_flag: false, | ||
wav_match_flag: false | ||
} | ||
], | ||
[ | ||
records.size, | ||
records.first.to_h, | ||
records.last.to_h, | ||
]) | ||
end | ||
end |