Skip to content

Commit

Permalink
Added New York city's Taxi and Limousine Commission high volume for h…
Browse files Browse the repository at this point in the history
…ire vehicle trip support
  • Loading branch information
otegami committed Nov 16, 2022
1 parent 3b7e864 commit e2eb492
Show file tree
Hide file tree
Showing 4 changed files with 259 additions and 0 deletions.
46 changes: 46 additions & 0 deletions example/tlc-high-volume-fhv-trip.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env ruby

require "datasets-parquet"

trips = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1)

p trips.to_arrow
#<Arrow::Table:0x13f920640 ptr=0x13f180160>
# hvfhs_license_num dispatching_base_num originating_base_num request_datetime on_scene_datetime pickup_datetime dropoff_datetime PULocationID DOLocationID trip_miles trip_time base_passenger_fare tolls bcf sales_tax congestion_surcharge airport_fee tips driver_pay shared_request_flag shared_match_flag access_a_ride_flag wav_request_flag wav_match_flag
# 0 HV0003 B03404 B03404 2022-01-01T09:05:31+09:00 2022-01-01T09:05:40+09:00 2022-01-01T09:07:24+09:00 2022-01-01T09:18:28+09:00 170 161 1.180000 664 24.900000 0.000000 0.750000 2.210000 2.750000 0.000000 0.000000 23.030000 N N N N
# 1 HV0003 B03404 B03404 2022-01-01T09:19:27+09:00 2022-01-01T09:22:08+09:00 2022-01-01T09:22:32+09:00 2022-01-01T09:30:12+09:00 237 161 0.820000 460 11.970000 0.000000 0.360000 1.060000 2.750000 0.000000 0.000000 12.320000 N N N N
# ...


trips.each do |trip|
p [
trip.hvfhs_license_num,
trip.dispatching_base_num,
trip.originating_base_num,
trip.request_datetime,
trip.on_scene_datetime,
trip.pickup_datetime,
trip.dropoff_datetime,
trip.pu_locationID,
trip.do_locationID,
trip.trip_miles,
trip.trip_time,
trip.base_passenger_fare,
trip.tolls,
trip.bcf,
trip.sales_tax,
trip.congestion_surcharge,
trip.airport_fee,
trip.tips,
trip.driver_pay,
trip.shared_request_flag?,
trip.shared_match_flag?,
trip.access_a_ride_flag?,
trip.wav_request_flag?,
trip.wav_match_flag?,
]
end
# [:uber, "B03404", "B03404", 2022-01-01 09:05:31 +0900, 2022-01-01 09:05:40 +0900, 2022-01-01 09:07:24 +0900, 2022-01-01 09:18:28 +0900, 170, 161, 1.18, 664, 24.9, 0.0, 0.75, 2.21, 2.75, 0.0, 0.0, 23.03, false, false, false, false, false]
# [:uber, "B03404", "B03404", 2022-01-01 09:19:27 +0900, 2022-01-01 09:22:08 +0900, 2022-01-01 09:22:32 +0900, 2022-01-01 09:30:12 +0900, 237, 161, 0.82, 460, 11.97, 0.0, 0.36, 1.06, 2.75, 0.0, 0.0, 12.32, false, false, false, false, false]
# [:uber, "B03404", "B03404", 2022-01-01 09:43:53 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 10:07:32 +0900, 237, 161, 1.18, 595, 29.82, 0.0, 0.89, 2.65, 2.75, 0.0, 0.0, 23.3, false, false, false, false, false]
# ...
1 change: 1 addition & 0 deletions lib/datasets-parquet.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@

require_relative "datasets-parquet/tlc/fhv-trip"
require_relative "datasets-parquet/tlc/green-taxi-trip"
require_relative "datasets-parquet/tlc/high-volume-fhv-trip"
require_relative "datasets-parquet/tlc/yellow-taxi-trip"
108 changes: 108 additions & 0 deletions lib/datasets-parquet/tlc/high-volume-fhv-trip.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
module Datasets
module TLC
class HighVolumeFHVTrip < Dataset
class Record < Struct.new(:hvfhs_license_num,
:dispatching_base_num,
:originating_base_num,
:request_datetime,
:on_scene_datetime,
:pickup_datetime,
:dropoff_datetime,
:pu_locationID,
:do_locationID,
:trip_miles,
:trip_time,
:base_passenger_fare,
:tolls,
:bcf,
:sales_tax,
:congestion_surcharge,
:airport_fee,
:tips,
:driver_pay,
:shared_request_flag,
:shared_match_flag,
:access_a_ride_flag,
:wav_request_flag,
:wav_match_flag)
alias_method :shared_request_flag?, :shared_request_flag
alias_method :shared_match_flag?, :shared_match_flag
alias_method :access_a_ride_flag?, :access_a_ride_flag
alias_method :wav_request_flag?, :wav_request_flag
alias_method :wav_match_flag?, :wav_match_flag

def initialize(*values)
super()
members.zip(values) do |member, value|
__send__("#{member}=", value)
end
end

def hvfhs_license_num=(hvfhs_license_num)
case hvfhs_license_num
when 'HV0002'
super(:juno)
when 'HV0003'
super(:uber)
when 'HV0004'
super(:via)
when 'HV0005'
super(:lyft)
end
end

def shared_request_flag=(shared_request_flag)
super(shared_request_flag == 'Y')
end

def shared_match_flag=(shared_match_flag)
super(shared_match_flag == 'Y')
end

def access_a_ride_flag=(access_a_ride_flag)
super(access_a_ride_flag == 'Y')
end

def wav_request_flag=(wav_request_flag)
super(wav_request_flag == 'Y')
end

def wav_match_flag=(wav_match_flag)
super(wav_match_flag == 'Y')
end
end

def initialize(year: Date.today.year, month: Date.today.month)
super()
@metadata.id = "nyc-taxi-and-limousine-commission-high-volume-for-hire-vehicle-trip"
@metadata.name = "New York city Taxi and Limousine Commission: high volume for hire vehicle trip record dataset"
@metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page"
@metadata.licenses = [
{
name: "NYC Open Data Terms of Use",
url: "https://opendata.cityofnewyork.us/overview/#termsofuse",
}
]
@year = year
@month = month
end

def to_arrow
base_name = "fhvhv_tripdata_%04d-%02d.parquet" % [@year, @month]
data_path = cache_dir_path + base_name
data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}"
download(data_path, data_url)
Arrow::Table.load(data_path)
end

def each
return to_enum(__method__) unless block_given?

to_arrow.raw_records.each do |raw_record|
record = Record.new(*raw_record)
yield(record)
end
end
end
end
end
104 changes: 104 additions & 0 deletions test/test-tlc-high-volume-fhv-trip.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
class TLCHighVolumeFHVTripTest < Test::Unit::TestCase
def setup
@default_timezone_env = ENV["TZ"]
ENV["TZ"] = "UTC"
@dataset = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1)
end

def teardown
ENV["TZ"] = @default_timezone_env
end

test("#to_arrow") do
assert_equal(<<~TABLE, @dataset.to_arrow.to_s)
\thvfhs_license_num\tdispatching_base_num\toriginating_base_num\t request_datetime\t on_scene_datetime\t pickup_datetime\t dropoff_datetime\tPULocationID\tDOLocationID\ttrip_miles\ttrip_time\tbase_passenger_fare\t tolls\t bcf\t sales_tax\tcongestion_surcharge\tairport_fee\t tips\tdriver_pay\tshared_request_flag\tshared_match_flag\taccess_a_ride_flag\twav_request_flag\twav_match_flag
0\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:05:31+00:00\t2022-01-01T00:05:40+00:00\t2022-01-01T00:07:24+00:00\t2022-01-01T00:18:28+00:00\t 170\t 161\t 1.180000\t 664\t 24.900000\t 0.000000\t 0.750000\t 2.210000\t 2.750000\t 0.000000\t 0.000000\t 23.030000\tN \tN \t \tN \tN
1\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:19:27+00:00\t2022-01-01T00:22:08+00:00\t2022-01-01T00:22:32+00:00\t2022-01-01T00:30:12+00:00\t 237\t 161\t 0.820000\t 460\t 11.970000\t 0.000000\t 0.360000\t 1.060000\t 2.750000\t 0.000000\t 0.000000\t 12.320000\tN \tN \t \tN \tN
2\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:43:53+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T01:07:32+00:00\t 237\t 161\t 1.180000\t 595\t 29.820000\t 0.000000\t 0.890000\t 2.650000\t 2.750000\t 0.000000\t 0.000000\t 23.300000\tN \tN \t \tN \tN
3\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:15:36+00:00\t2022-01-01T00:17:08+00:00\t2022-01-01T00:18:02+00:00\t2022-01-01T00:23:05+00:00\t 262\t 229\t 1.650000\t 303\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 2.750000\t 0.000000\t 0.000000\t 6.300000\tN \tN \t \tN \tN
4\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:25:45+00:00\t2022-01-01T00:26:01+00:00\t2022-01-01T00:28:01+00:00\t2022-01-01T00:35:42+00:00\t 229\t 141\t 1.650000\t 461\t 9.440000\t 0.000000\t 0.280000\t 0.840000\t 2.750000\t 0.000000\t 0.000000\t 7.440000\tN \tN \t \tN \tN
5\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:34:44+00:00\t2022-01-01T00:36:52+00:00\t2022-01-01T00:38:50+00:00\t2022-01-01T00:51:32+00:00\t 263\t 79\t 4.510000\t 762\t 17.670000\t 0.000000\t 0.530000\t 1.570000\t 2.750000\t 0.000000\t 0.000000\t 12.250000\tN \tN \t \tN \tN
6\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:47:51+00:00\t2022-01-01T00:52:00+00:00\t2022-01-01T00:53:25+00:00\t2022-01-01T01:08:56+00:00\t 113\t 140\t 3.680000\t 931\t 16.680000\t 0.000000\t 0.500000\t 1.480000\t 2.750000\t 0.000000\t 0.000000\t 12.750000\tN \tN \t \tN \tN
7\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:06:21+00:00\t2022-01-01T00:06:58+00:00\t2022-01-01T00:08:58+00:00\t2022-01-01T00:23:01+00:00\t 151\t 75\t 2.770000\t 843\t 14.410000\t 0.000000\t 0.430000\t 1.280000\t 0.000000\t 0.000000\t 4.000000\t 11.470000\tN \tN \t \tN \tN
8\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:27:54+00:00\t2022-01-01T00:30:26+00:00\t2022-01-01T00:32:25+00:00\t2022-01-01T00:44:15+00:00\t 263\t 229\t 2.040000\t 710\t 10.640000\t 0.000000\t 0.320000\t 0.940000\t 2.750000\t 0.000000\t 0.000000\t 9.550000\tN \tN \t \tN \tN
9\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:44:59+00:00\t2022-01-01T00:48:23+00:00\t2022-01-01T00:50:23+00:00\t2022-01-01T01:15:30+00:00\t 237\t 169\t 8.790000\t 1507\t 107.560000\t 0.000000\t 0.830000\t 2.450000\t 2.750000\t 0.000000\t 0.000000\t 23.670000\tN \tN \t \tN \tN
...
14751581\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:36+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:33:23+00:00\t 163\t 244\t 7.570000\t 858\t 18.460000\t 0.000000\t 0.550000\t 1.640000\t 2.750000\t 0.000000\t 0.000000\t 15.870000\tN \tN \t \tN \tN
14751582\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:34+00:00\t2022-01-31T23:34:20+00:00\t2022-01-31T23:36:02+00:00\t2022-01-31T23:50:15+00:00\t 244\t 47\t 3.050000\t 853\t 16.230000\t 0.000000\t 0.490000\t 1.440000\t 0.000000\t 0.000000\t 0.000000\t 10.850000\tN \tN \t \tN \tN
14751583\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:57:18+00:00\t2022-01-31T23:07:52+00:00\t2022-01-31T23:09:52+00:00\t2022-01-31T23:19:46+00:00\t 86\t 86\t 2.050000\t 594\t 9.630000\t 0.000000\t 0.290000\t 0.850000\t 0.000000\t 0.000000\t 0.000000\t 8.510000\tN \tN \t \tN \tN
14751584\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:23:00+00:00\t2022-01-31T23:24:44+00:00\t2022-01-31T23:26:37+00:00\t2022-01-31T23:34:37+00:00\t 86\t 117\t 1.300000\t 480\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 6.730000\tN \tN \t \tN \tN
14751585\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:19+00:00\t2022-01-31T23:40:56+00:00\t2022-01-31T23:41:58+00:00\t2022-01-31T23:47:44+00:00\t 86\t 86\t 1.530000\t 346\t 7.190000\t 0.000000\t 0.220000\t 0.640000\t 0.000000\t 0.000000\t 0.000000\t 6.680000\tN \tN \t \tN \tN
14751586\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:22:16+00:00\t2022-01-31T23:26:04+00:00\t2022-01-31T23:27:20+00:00\t2022-01-31T23:40:46+00:00\t 77\t 71\t 2.590000\t 806\t 14.280000\t 0.000000\t 0.430000\t 1.270000\t 0.000000\t 0.000000\t 0.000000\t 9.900000\tN \tN \t \tN \tN
14751587\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:42:30+00:00\t2022-01-31T23:45:08+00:00\t2022-01-31T23:45:46+00:00\t2022-01-31T23:59:44+00:00\t 72\t 72\t 1.560000\t 838\t 10.420000\t 0.000000\t 0.310000\t 0.920000\t 0.000000\t 0.000000\t 0.000000\t 9.030000\tN \tN \t \tN \tN
14751588\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:56:50+00:00\t2022-01-31T23:03:17+00:00\t2022-01-31T23:03:25+00:00\t2022-01-31T23:17:17+00:00\t 136\t 20\t 1.230000\t 832\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 8.730000\tN \tN \t \tN \tN
14751589\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:07+00:00\t2022-01-31T23:19:25+00:00\t2022-01-31T23:20:26+00:00\t2022-01-31T23:30:26+00:00\t 20\t 136\t 1.690000\t 600\t 9.320000\t 0.000000\t 0.280000\t 0.830000\t 0.000000\t 0.000000\t 0.000000\t 7.300000\tN \tN \t \tN \tN
14751590\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:24+00:00\t2022-01-31T23:36:13+00:00\t2022-01-31T23:38:13+00:00\t2022-02-01T00:07:24+00:00\t 136\t 82\t 14.700000\t 1751\t 27.340000\t 6.550000\t 1.020000\t 3.010000\t 0.000000\t 0.000000\t 0.000000\t 31.280000\tN \tN \t \tN \tN
TABLE
end

test("#each") do
omit("Skip test of HighVolumeFHVTrip#each because the size of data is too huge to execute.")
records = @dataset.each.to_a

assert_equal([
14751590,
{
hvfhs_license_num: :uber,
dispatching_base_num: "B03404",
originating_base_num: "B03404",
request_datetime: Time.parse("2022-01-01 00:05:31 +0000"),
on_scene_datetime: Time.parse("2022-01-01 00:05:40 +0000"),
pickup_datetime: Time.parse("2022-01-01 00:07:24 +0000"),
dropoff_datetime: Time.parse("2022-01-01 00:18:28 +0000"),
pu_locationID: 170,
do_locationID: 161,
trip_miles: 1.18,
trip_time: 664,
base_passenger_fare: 24.9,
tolls: 0.0,
bcf: 0.75,
sales_tax: 2.21,
congestion_surcharge: 2.75,
airport_fee: 0.0,
tips: 0.0,
driver_pay: 23.03,
shared_request_flag: false,
shared_match_flag: false,
access_a_ride_flag: false,
wav_request_flag: false,
wav_match_flag: false
},
{
hvfhs_license_num: :uber,
dispatching_base_num: "B03404",
originating_base_num: "B03404",
request_datetime: Time.parse("2022-01-31 23:33:24 +00:00"),
on_scene_datetime: Time.parse("2022-01-31 23:36:13 +00:00"),
pickup_datetime: Time.parse("2022-01-31 23:38:13 +00:00"),
dropoff_datetime: Time.parse("2022-02-01 00:07:24 +00:00"),
pu_locationID: 136,
do_locationID: 82,
trip_miles: 14.7,
trip_time: 1751,
base_passenger_fare: 27.34,
tolls: 6.55,
bcf: 1.02,
sales_tax: 3.01,
congestion_surcharge: 0.0,
airport_fee: 0.0,
tips: 0.0,
driver_pay: 31.28,
shared_request_flag: false,
shared_match_flag: false,
access_a_ride_flag: false,
wav_request_flag: false,
wav_match_flag: false
}
],
[
records.size,
records.first.to_h,
records.last.to_h,
])
end
end

0 comments on commit e2eb492

Please sign in to comment.