diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c98cb94..f16b711 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,7 +26,7 @@ jobs: runs-on: ${{ matrix.runs-on }} env: # We can invalidate the current cache by updating this. - CACHE_VERSION: "2022-08-27" + CACHE_VERSION: "2022-11-22" steps: - uses: actions/checkout@v3 - uses: ruby/setup-ruby@v1 @@ -68,6 +68,13 @@ jobs: wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb sudo apt update + - name: Prepare Apache Arrow on macOS + if: | + runner.os == 'macOS' + run: | + brew install apache-arrow + brew install gobject-introspection + brew install apache-arrow-glib - name: Install dependencies run: | bundle install diff --git a/example/tlc-fhv-trip.rb b/example/tlc-fhv-trip.rb new file mode 100644 index 0000000..a296a38 --- /dev/null +++ b/example/tlc-fhv-trip.rb @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby + +require "datasets-parquet" + +trips = Datasets::TLC::FHVTrip.new(year: 2022, month: 1) + +p trips.to_arrow +# +# dispatching_base_num pickup_datetime dropOff_datetime PUlocationID DOlocationIDSR_Flag Affiliated_base_number +# 0 B00009 2022-01-01T09:31:00+09:00 2022-01-01T10:05:00+09:00 (null) (null) (null) B00009 +# 1 B00009 2022-01-01T09:37:00+09:00 2022-01-01T10:05:00+09:00 (null) (null) (null) B00009 +# ... + +trips.each do |trip| + p [ + trip.dispatching_base_num, + trip.pickup_datetime, + trip.dropoff_datetime, + trip.pu_location_id, + trip.do_location_id, + trip.sr_flag?, + trip.affiliated_base_number + ] +end +# ["B00009", 2022-01-01 09:31:00 +0900, 2022-01-01 10:05:00 +0900, nil, nil, false, "B00009"] +# ["B00009", 2022-01-01 09:37:00 +0900, 2022-01-01 10:05:00 +0900, nil, nil, false, "B00009"] +# ... diff --git a/example/tlc-high-volume-fhv-trip.rb b/example/tlc-high-volume-fhv-trip.rb new file mode 100644 index 0000000..c83cda4 --- /dev/null +++ b/example/tlc-high-volume-fhv-trip.rb @@ -0,0 +1,46 @@ +#!/usr/bin/env ruby + +require "datasets-parquet" + +trips = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1) + +p trips.to_arrow +# +# hvfhs_license_num dispatching_base_num originating_base_num request_datetime on_scene_datetime pickup_datetime dropoff_datetime PULocationID DOLocationID trip_miles trip_time base_passenger_fare tolls bcf sales_tax congestion_surcharge airport_fee tips driver_pay shared_request_flag shared_match_flag access_a_ride_flag wav_request_flag wav_match_flag +# 0 HV0003 B03404 B03404 2022-01-01T09:05:31+09:00 2022-01-01T09:05:40+09:00 2022-01-01T09:07:24+09:00 2022-01-01T09:18:28+09:00 170 161 1.180000 664 24.900000 0.000000 0.750000 2.210000 2.750000 0.000000 0.000000 23.030000 N N N N +# 1 HV0003 B03404 B03404 2022-01-01T09:19:27+09:00 2022-01-01T09:22:08+09:00 2022-01-01T09:22:32+09:00 2022-01-01T09:30:12+09:00 237 161 0.820000 460 11.970000 0.000000 0.360000 1.060000 2.750000 0.000000 0.000000 12.320000 N N N N +# ... + + +trips.each do |trip| + p [ + trip.hvfhs_license_num, + trip.dispatching_base_num, + trip.originating_base_num, + trip.request_datetime, + trip.on_scene_datetime, + trip.pickup_datetime, + trip.dropoff_datetime, + trip.pu_locationID, + trip.do_locationID, + trip.trip_miles, + trip.trip_time, + trip.base_passenger_fare, + trip.tolls, + trip.bcf, + trip.sales_tax, + trip.congestion_surcharge, + trip.airport_fee, + trip.tips, + trip.driver_pay, + trip.shared_request_flag?, + trip.shared_match_flag?, + trip.access_a_ride_flag?, + trip.wav_request_flag?, + trip.wav_match_flag?, + ] +end +# [:uber, "B03404", "B03404", 2022-01-01 09:05:31 +0900, 2022-01-01 09:05:40 +0900, 2022-01-01 09:07:24 +0900, 2022-01-01 09:18:28 +0900, 170, 161, 1.18, 664, 24.9, 0.0, 0.75, 2.21, 2.75, 0.0, 0.0, 23.03, false, false, false, false, false] +# [:uber, "B03404", "B03404", 2022-01-01 09:19:27 +0900, 2022-01-01 09:22:08 +0900, 2022-01-01 09:22:32 +0900, 2022-01-01 09:30:12 +0900, 237, 161, 0.82, 460, 11.97, 0.0, 0.36, 1.06, 2.75, 0.0, 0.0, 12.32, false, false, false, false, false] +# [:uber, "B03404", "B03404", 2022-01-01 09:43:53 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 09:57:37 +0900, 2022-01-01 10:07:32 +0900, 237, 161, 1.18, 595, 29.82, 0.0, 0.89, 2.65, 2.75, 0.0, 0.0, 23.3, false, false, false, false, false] +# ... diff --git a/lib/datasets-parquet.rb b/lib/datasets-parquet.rb index 85761f2..a92e7a1 100644 --- a/lib/datasets-parquet.rb +++ b/lib/datasets-parquet.rb @@ -3,5 +3,7 @@ require_relative "datasets-parquet/version" +require_relative "datasets-parquet/tlc/fhv-trip" require_relative "datasets-parquet/tlc/green-taxi-trip" +require_relative "datasets-parquet/tlc/high-volume-fhv-trip" require_relative "datasets-parquet/tlc/yellow-taxi-trip" diff --git a/lib/datasets-parquet/tlc/fhv-trip.rb b/lib/datasets-parquet/tlc/fhv-trip.rb new file mode 100644 index 0000000..5455ac1 --- /dev/null +++ b/lib/datasets-parquet/tlc/fhv-trip.rb @@ -0,0 +1,58 @@ +module Datasets + module TLC + class FHVTrip < Dataset + class Record < Struct.new(:dispatching_base_num, + :pickup_datetime, + :dropoff_datetime, + :pu_location_id, + :do_location_id, + :sr_flag, + :affiliated_base_number) + alias_method :sr_flag?, :sr_flag + + def initialize(*values) + super() + members.zip(values) do |member, value| + __send__("#{member}=", value) + end + end + + def sr_flag=(sr_flag) + super(!!sr_flag) + end + end + + def initialize(year: Date.today.year, month: Date.today.month) + super() + @metadata.id = "nyc-taxi-and-limousine-commission-for-hire-vehicle-trip" + @metadata.name = "New York city Taxi and Limousine Commission: for hire vehicle trip record dataset" + @metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page" + @metadata.licenses = [ + { + name: "NYC Open Data Terms of Use", + url: "https://opendata.cityofnewyork.us/overview/#termsofuse", + } + ] + @year = year + @month = month + end + + def to_arrow + base_name = "fhv_tripdata_%04d-%02d.parquet" % [@year, @month] + data_path = cache_dir_path + base_name + data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}" + download(data_path, data_url) + Arrow::Table.load(data_path) + end + + def each + return to_enum(__method__) unless block_given? + + to_arrow.raw_records.each do |raw_record| + record = Record.new(*raw_record) + yield(record) + end + end + end + end +end diff --git a/lib/datasets-parquet/tlc/high-volume-fhv-trip.rb b/lib/datasets-parquet/tlc/high-volume-fhv-trip.rb new file mode 100644 index 0000000..63514bc --- /dev/null +++ b/lib/datasets-parquet/tlc/high-volume-fhv-trip.rb @@ -0,0 +1,108 @@ +module Datasets + module TLC + class HighVolumeFHVTrip < Dataset + class Record < Struct.new(:hvfhs_license_num, + :dispatching_base_num, + :originating_base_num, + :request_datetime, + :on_scene_datetime, + :pickup_datetime, + :dropoff_datetime, + :pu_locationID, + :do_locationID, + :trip_miles, + :trip_time, + :base_passenger_fare, + :tolls, + :bcf, + :sales_tax, + :congestion_surcharge, + :airport_fee, + :tips, + :driver_pay, + :shared_request_flag, + :shared_match_flag, + :access_a_ride_flag, + :wav_request_flag, + :wav_match_flag) + alias_method :shared_request_flag?, :shared_request_flag + alias_method :shared_match_flag?, :shared_match_flag + alias_method :access_a_ride_flag?, :access_a_ride_flag + alias_method :wav_request_flag?, :wav_request_flag + alias_method :wav_match_flag?, :wav_match_flag + + def initialize(*values) + super() + members.zip(values) do |member, value| + __send__("#{member}=", value) + end + end + + def hvfhs_license_num=(hvfhs_license_num) + case hvfhs_license_num + when 'HV0002' + super(:juno) + when 'HV0003' + super(:uber) + when 'HV0004' + super(:via) + when 'HV0005' + super(:lyft) + end + end + + def shared_request_flag=(shared_request_flag) + super(shared_request_flag == 'Y') + end + + def shared_match_flag=(shared_match_flag) + super(shared_match_flag == 'Y') + end + + def access_a_ride_flag=(access_a_ride_flag) + super(access_a_ride_flag == 'Y') + end + + def wav_request_flag=(wav_request_flag) + super(wav_request_flag == 'Y') + end + + def wav_match_flag=(wav_match_flag) + super(wav_match_flag == 'Y') + end + end + + def initialize(year: Date.today.year, month: Date.today.month) + super() + @metadata.id = "nyc-taxi-and-limousine-commission-high-volume-for-hire-vehicle-trip" + @metadata.name = "New York city Taxi and Limousine Commission: high volume for hire vehicle trip record dataset" + @metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page" + @metadata.licenses = [ + { + name: "NYC Open Data Terms of Use", + url: "https://opendata.cityofnewyork.us/overview/#termsofuse", + } + ] + @year = year + @month = month + end + + def to_arrow + base_name = "fhvhv_tripdata_%04d-%02d.parquet" % [@year, @month] + data_path = cache_dir_path + base_name + data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}" + download(data_path, data_url) + Arrow::Table.load(data_path) + end + + def each + return to_enum(__method__) unless block_given? + + to_arrow.raw_records.each do |raw_record| + record = Record.new(*raw_record) + yield(record) + end + end + end + end +end diff --git a/test/test-tlc-fhv-trip.rb b/test/test-tlc-fhv-trip.rb new file mode 100644 index 0000000..4b9c654 --- /dev/null +++ b/test/test-tlc-fhv-trip.rb @@ -0,0 +1,69 @@ +class TLCFHVTripTest < Test::Unit::TestCase + def setup + @default_timezone_env = ENV['TZ'] + ENV['TZ'] = 'UTC' + @dataset = Datasets::TLC::FHVTrip.new(year: 2022, month: 1) + end + + def teardown + ENV['TZ'] = @default_timezone_env + end + + test("#to_arrow") do + assert_equal(<<~TABLE, @dataset.to_arrow.to_s) +\tdispatching_base_num\t pickup_datetime\t dropOff_datetime\tPUlocationID\tDOlocationID\tSR_Flag\tAffiliated_base_number + 0\tB00009 \t2022-01-01T00:31:00+00:00\t2022-01-01T01:05:00+00:00\t (null)\t (null)\t (null)\tB00009 + 1\tB00009 \t2022-01-01T00:37:00+00:00\t2022-01-01T01:05:00+00:00\t (null)\t (null)\t (null)\tB00009 + 2\tB00037 \t2022-01-01T00:56:37+00:00\t2022-01-01T01:06:11+00:00\t (null)\t 85.000000\t (null)\tB00037 + 3\tB00037 \t2022-01-01T00:19:54+00:00\t2022-01-01T00:30:47+00:00\t (null)\t 85.000000\t (null)\tB00037 + 4\tB00037 \t2022-01-01T00:41:49+00:00\t2022-01-01T00:52:16+00:00\t (null)\t 188.000000\t (null)\tB00037 + 5\tB00037 \t2022-01-01T00:21:32+00:00\t2022-01-01T00:35:06+00:00\t (null)\t 61.000000\t (null)\tB00037 + 6\tB00037 \t2022-01-01T00:51:19+00:00\t2022-01-01T01:08:06+00:00\t (null)\t 76.000000\t (null)\tB00037 + 7\tB00111 \t2022-01-01T00:30:00+00:00\t2022-01-01T01:41:00+00:00\t (null)\t (null)\t (null)\tB03406 + 8\tB00112 \t2022-01-01T00:31:30+00:00\t2022-01-01T01:10:06+00:00\t (null)\t 67.000000\t (null)\tB00112 + 9\tB00112 \t2022-01-01T00:12:26+00:00\t2022-01-01T00:37:22+00:00\t (null)\t 155.000000\t (null)\tB00112 +... +1143681\tB03380 \t2022-01-31T23:39:32+00:00\t2022-01-31T23:47:43+00:00\t 246.000000\t 158.000000\t (null)\tB03380 +1143682\tB03380 \t2022-01-31T23:52:52+00:00\t2022-02-01T00:03:14+00:00\t 158.000000\t 107.000000\t (null)\tB03380 +1143683\tB03380 \t2022-01-31T23:24:44+00:00\t2022-01-31T23:35:46+00:00\t 231.000000\t 4.000000\t (null)\tB03380 +1143684\tB03380 \t2022-01-31T23:21:35+00:00\t2022-01-31T23:32:16+00:00\t 229.000000\t 48.000000\t (null)\tB03380 +1143685\tB03380 \t2022-01-31T23:02:50+00:00\t2022-01-31T23:20:07+00:00\t 142.000000\t 113.000000\t (null)\tB03380 +1143686\tB03380 \t2022-01-31T23:22:41+00:00\t2022-01-31T23:26:39+00:00\t 234.000000\t 107.000000\t (null)\tB03380 +1143687\tB03380 \t2022-01-31T23:42:42+00:00\t2022-01-31T23:52:58+00:00\t 114.000000\t 148.000000\t (null)\tB03380 +1143688\tB03380 \t2022-01-31T23:07:13+00:00\t2022-01-31T23:13:40+00:00\t 90.000000\t 113.000000\t (null)\tB03380 +1143689\tB03380 \t2022-01-31T23:16:14+00:00\t2022-01-31T23:31:03+00:00\t 113.000000\t 140.000000\t (null)\tB03380 +1143690\tB03381 \t2022-01-31T23:47:42+00:00\t2022-02-01T00:15:03+00:00\t (null)\t 122.000000\t (null)\tB03404 + TABLE + end + + test("#each") do + records = @dataset.each.to_a + + assert_equal([ + 1143691, + { + dispatching_base_num: 'B00009', + pickup_datetime: Time.parse('2022-01-01 00:31:00 +0000'), + dropoff_datetime: Time.parse('2022-01-01 01:05:00 +0000'), + pu_location_id: nil, + do_location_id: nil, + sr_flag: false, + affiliated_base_number: 'B00009' + }, + { + dispatching_base_num: 'B03381', + pickup_datetime: Time.parse('2022-01-31 23:47:42 +0000'), + dropoff_datetime: Time.parse('2022-02-01 00:15:03 +0000'), + pu_location_id: nil, + do_location_id: 122.0, + sr_flag: false, + affiliated_base_number: 'B03404' + } + ], + [ + records.size, + records.first.to_h, + records.last.to_h, + ]) + end +end diff --git a/test/test-tlc-high-volume-fhv-trip.rb b/test/test-tlc-high-volume-fhv-trip.rb new file mode 100644 index 0000000..95a5c49 --- /dev/null +++ b/test/test-tlc-high-volume-fhv-trip.rb @@ -0,0 +1,104 @@ +class TLCHighVolumeFHVTripTest < Test::Unit::TestCase + def setup + @default_timezone_env = ENV["TZ"] + ENV["TZ"] = "UTC" + @dataset = Datasets::TLC::HighVolumeFHVTrip.new(year: 2022, month: 1) + end + + def teardown + ENV["TZ"] = @default_timezone_env + end + + test("#to_arrow") do + assert_equal(<<~TABLE, @dataset.to_arrow.to_s) +\thvfhs_license_num\tdispatching_base_num\toriginating_base_num\t request_datetime\t on_scene_datetime\t pickup_datetime\t dropoff_datetime\tPULocationID\tDOLocationID\ttrip_miles\ttrip_time\tbase_passenger_fare\t tolls\t bcf\t sales_tax\tcongestion_surcharge\tairport_fee\t tips\tdriver_pay\tshared_request_flag\tshared_match_flag\taccess_a_ride_flag\twav_request_flag\twav_match_flag + 0\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:05:31+00:00\t2022-01-01T00:05:40+00:00\t2022-01-01T00:07:24+00:00\t2022-01-01T00:18:28+00:00\t 170\t 161\t 1.180000\t 664\t 24.900000\t 0.000000\t 0.750000\t 2.210000\t 2.750000\t 0.000000\t 0.000000\t 23.030000\tN \tN \t \tN \tN + 1\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:19:27+00:00\t2022-01-01T00:22:08+00:00\t2022-01-01T00:22:32+00:00\t2022-01-01T00:30:12+00:00\t 237\t 161\t 0.820000\t 460\t 11.970000\t 0.000000\t 0.360000\t 1.060000\t 2.750000\t 0.000000\t 0.000000\t 12.320000\tN \tN \t \tN \tN + 2\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:43:53+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T00:57:37+00:00\t2022-01-01T01:07:32+00:00\t 237\t 161\t 1.180000\t 595\t 29.820000\t 0.000000\t 0.890000\t 2.650000\t 2.750000\t 0.000000\t 0.000000\t 23.300000\tN \tN \t \tN \tN + 3\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:15:36+00:00\t2022-01-01T00:17:08+00:00\t2022-01-01T00:18:02+00:00\t2022-01-01T00:23:05+00:00\t 262\t 229\t 1.650000\t 303\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 2.750000\t 0.000000\t 0.000000\t 6.300000\tN \tN \t \tN \tN + 4\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:25:45+00:00\t2022-01-01T00:26:01+00:00\t2022-01-01T00:28:01+00:00\t2022-01-01T00:35:42+00:00\t 229\t 141\t 1.650000\t 461\t 9.440000\t 0.000000\t 0.280000\t 0.840000\t 2.750000\t 0.000000\t 0.000000\t 7.440000\tN \tN \t \tN \tN + 5\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:34:44+00:00\t2022-01-01T00:36:52+00:00\t2022-01-01T00:38:50+00:00\t2022-01-01T00:51:32+00:00\t 263\t 79\t 4.510000\t 762\t 17.670000\t 0.000000\t 0.530000\t 1.570000\t 2.750000\t 0.000000\t 0.000000\t 12.250000\tN \tN \t \tN \tN + 6\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:47:51+00:00\t2022-01-01T00:52:00+00:00\t2022-01-01T00:53:25+00:00\t2022-01-01T01:08:56+00:00\t 113\t 140\t 3.680000\t 931\t 16.680000\t 0.000000\t 0.500000\t 1.480000\t 2.750000\t 0.000000\t 0.000000\t 12.750000\tN \tN \t \tN \tN + 7\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:06:21+00:00\t2022-01-01T00:06:58+00:00\t2022-01-01T00:08:58+00:00\t2022-01-01T00:23:01+00:00\t 151\t 75\t 2.770000\t 843\t 14.410000\t 0.000000\t 0.430000\t 1.280000\t 0.000000\t 0.000000\t 4.000000\t 11.470000\tN \tN \t \tN \tN + 8\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:27:54+00:00\t2022-01-01T00:30:26+00:00\t2022-01-01T00:32:25+00:00\t2022-01-01T00:44:15+00:00\t 263\t 229\t 2.040000\t 710\t 10.640000\t 0.000000\t 0.320000\t 0.940000\t 2.750000\t 0.000000\t 0.000000\t 9.550000\tN \tN \t \tN \tN + 9\tHV0003 \tB03404 \tB03404 \t2022-01-01T00:44:59+00:00\t2022-01-01T00:48:23+00:00\t2022-01-01T00:50:23+00:00\t2022-01-01T01:15:30+00:00\t 237\t 169\t 8.790000\t 1507\t 107.560000\t 0.000000\t 0.830000\t 2.450000\t 2.750000\t 0.000000\t 0.000000\t 23.670000\tN \tN \t \tN \tN +... +14751581\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:36+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:19:05+00:00\t2022-01-31T23:33:23+00:00\t 163\t 244\t 7.570000\t 858\t 18.460000\t 0.000000\t 0.550000\t 1.640000\t 2.750000\t 0.000000\t 0.000000\t 15.870000\tN \tN \t \tN \tN +14751582\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:34+00:00\t2022-01-31T23:34:20+00:00\t2022-01-31T23:36:02+00:00\t2022-01-31T23:50:15+00:00\t 244\t 47\t 3.050000\t 853\t 16.230000\t 0.000000\t 0.490000\t 1.440000\t 0.000000\t 0.000000\t 0.000000\t 10.850000\tN \tN \t \tN \tN +14751583\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:57:18+00:00\t2022-01-31T23:07:52+00:00\t2022-01-31T23:09:52+00:00\t2022-01-31T23:19:46+00:00\t 86\t 86\t 2.050000\t 594\t 9.630000\t 0.000000\t 0.290000\t 0.850000\t 0.000000\t 0.000000\t 0.000000\t 8.510000\tN \tN \t \tN \tN +14751584\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:23:00+00:00\t2022-01-31T23:24:44+00:00\t2022-01-31T23:26:37+00:00\t2022-01-31T23:34:37+00:00\t 86\t 117\t 1.300000\t 480\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 6.730000\tN \tN \t \tN \tN +14751585\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:19+00:00\t2022-01-31T23:40:56+00:00\t2022-01-31T23:41:58+00:00\t2022-01-31T23:47:44+00:00\t 86\t 86\t 1.530000\t 346\t 7.190000\t 0.000000\t 0.220000\t 0.640000\t 0.000000\t 0.000000\t 0.000000\t 6.680000\tN \tN \t \tN \tN +14751586\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:22:16+00:00\t2022-01-31T23:26:04+00:00\t2022-01-31T23:27:20+00:00\t2022-01-31T23:40:46+00:00\t 77\t 71\t 2.590000\t 806\t 14.280000\t 0.000000\t 0.430000\t 1.270000\t 0.000000\t 0.000000\t 0.000000\t 9.900000\tN \tN \t \tN \tN +14751587\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:42:30+00:00\t2022-01-31T23:45:08+00:00\t2022-01-31T23:45:46+00:00\t2022-01-31T23:59:44+00:00\t 72\t 72\t 1.560000\t 838\t 10.420000\t 0.000000\t 0.310000\t 0.920000\t 0.000000\t 0.000000\t 0.000000\t 9.030000\tN \tN \t \tN \tN +14751588\tHV0003 \tB03404 \tB03404 \t2022-01-31T22:56:50+00:00\t2022-01-31T23:03:17+00:00\t2022-01-31T23:03:25+00:00\t2022-01-31T23:17:17+00:00\t 136\t 20\t 1.230000\t 832\t 7.910000\t 0.000000\t 0.240000\t 0.700000\t 0.000000\t 0.000000\t 0.000000\t 8.730000\tN \tN \t \tN \tN +14751589\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:15:07+00:00\t2022-01-31T23:19:25+00:00\t2022-01-31T23:20:26+00:00\t2022-01-31T23:30:26+00:00\t 20\t 136\t 1.690000\t 600\t 9.320000\t 0.000000\t 0.280000\t 0.830000\t 0.000000\t 0.000000\t 0.000000\t 7.300000\tN \tN \t \tN \tN +14751590\tHV0003 \tB03404 \tB03404 \t2022-01-31T23:33:24+00:00\t2022-01-31T23:36:13+00:00\t2022-01-31T23:38:13+00:00\t2022-02-01T00:07:24+00:00\t 136\t 82\t 14.700000\t 1751\t 27.340000\t 6.550000\t 1.020000\t 3.010000\t 0.000000\t 0.000000\t 0.000000\t 31.280000\tN \tN \t \tN \tN + TABLE + end + + test("#each") do + omit("Skip test of HighVolumeFHVTrip#each because the size of data is too huge to execute.") + records = @dataset.each.to_a + + assert_equal([ + 14751590, + { + hvfhs_license_num: :uber, + dispatching_base_num: "B03404", + originating_base_num: "B03404", + request_datetime: Time.parse("2022-01-01 00:05:31 +0000"), + on_scene_datetime: Time.parse("2022-01-01 00:05:40 +0000"), + pickup_datetime: Time.parse("2022-01-01 00:07:24 +0000"), + dropoff_datetime: Time.parse("2022-01-01 00:18:28 +0000"), + pu_locationID: 170, + do_locationID: 161, + trip_miles: 1.18, + trip_time: 664, + base_passenger_fare: 24.9, + tolls: 0.0, + bcf: 0.75, + sales_tax: 2.21, + congestion_surcharge: 2.75, + airport_fee: 0.0, + tips: 0.0, + driver_pay: 23.03, + shared_request_flag: false, + shared_match_flag: false, + access_a_ride_flag: false, + wav_request_flag: false, + wav_match_flag: false + }, + { + hvfhs_license_num: :uber, + dispatching_base_num: "B03404", + originating_base_num: "B03404", + request_datetime: Time.parse("2022-01-31 23:33:24 +00:00"), + on_scene_datetime: Time.parse("2022-01-31 23:36:13 +00:00"), + pickup_datetime: Time.parse("2022-01-31 23:38:13 +00:00"), + dropoff_datetime: Time.parse("2022-02-01 00:07:24 +00:00"), + pu_locationID: 136, + do_locationID: 82, + trip_miles: 14.7, + trip_time: 1751, + base_passenger_fare: 27.34, + tolls: 6.55, + bcf: 1.02, + sales_tax: 3.01, + congestion_surcharge: 0.0, + airport_fee: 0.0, + tips: 0.0, + driver_pay: 31.28, + shared_request_flag: false, + shared_match_flag: false, + access_a_ride_flag: false, + wav_request_flag: false, + wav_match_flag: false + } + ], + [ + records.size, + records.first.to_h, + records.last.to_h, + ]) + end +end