-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for New York city's Taxi and Limousine Commission yellow …
…taxi trip records (#151) TODO: We should consider about red-parquet dependency before we release a new version. ## Related Issue - https://github.com/red-data-tools/red-datasets/issues/150 ## What I did - added red-parquet to runtime dependencies - added New York city's taxi and limousine commission trip yellow taxi support - added simple unit test ### Details - [Yellow Trips Data Dictionary](https://www1.nyc.gov/assets/tlc/downloads/pdf/data_dictionary_trip_records_yellow.pdf) - ref: https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page ## What I Didn't - added following datasets related with NYC Taxi and Limousine Commission - Green Trips Data - FHV Trips Data - High Volume FHV Trips Data ## What I checked - ran tlc-trip-yellow-taxi example ```console % ruby example/tlc-trip-yellow-taxi.rb [:creative_mobile_technologies, 2022-01-01 09:35:40 +0900, 2022-01-01 09:53:29 +0900, 2.0, 3.8, :standard_rate, false, 142, 236, :credit_card, 14.5, 3.0, 0.5, 3.65, 0.0, 0.3, 21.95, 2.5, 0.0] [:creative_mobile_technologies, 2022-01-01 09:33:43 +0900, 2022-01-01 09:42:07 +0900, 1.0, 2.1, :standard_rate, false, 236, 42, :credit_card, 8.0, 0.5, 0.5, 4.0, 0.0, 0.3, 13.3, 0.0, 0.0] ``` ## Notes for reviewers - Would you give some advice about `Class Name` - `TLCTripYellowTaxi` or `TLCTrip::YellowTaxi` or Should I add `NYC` as a prefix - If you don't mind, I want to try to add the other following datasets too - Green Trips Data - FHV Trips Data - High Volume FHV Trips Data Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
- Loading branch information
Showing
6 changed files
with
210 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/env ruby | ||
|
||
require "datasets" | ||
|
||
trips = Datasets::TLC::YellowTaxiTrip.new(year: 2022, month: 1) | ||
trips.each do |trip| | ||
p [ | ||
trip.vendor, | ||
trip.tpep_pickup_datetime, | ||
trip.tpep_dropoff_datetime, | ||
trip.passenger_count, | ||
trip.trip_distance, | ||
trip.rate_code, | ||
trip.store_and_fwd?, | ||
trip.pu_location_id, | ||
trip.do_location_id, | ||
trip.payment, | ||
trip.fare_amount, | ||
trip.extra, | ||
trip.mta_tax, | ||
trip.tip_amount, | ||
trip.tolls_amount, | ||
trip.improvement_surcharge, | ||
trip.total_amount, | ||
trip.congestion_surcharge, | ||
trip.airport_fee | ||
] | ||
# [:creative_mobile_technologies, 2022-01-01 09:35:40 +0900, 2022-01-01 09:53:29 +0900, 2.0, 3.8, :standard_rate, false, 142, 236, :credit_card, 14.5, 3.0, 0.5, 3.65, 0.0, 0.3, 21.95, 2.5, 0.0] | ||
# [:creative_mobile_technologies, 2022-01-01 09:33:43 +0900, 2022-01-01 09:42:07 +0900, 1.0, 2.1, :standard_rate, false, 236, 42, :credit_card, 8.0, 0.5, 0.5, 4.0, 0.0, 0.3, 13.3, 0.0, 0.0] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
require "parquet" | ||
require_relative "../dataset" | ||
|
||
module Datasets | ||
module TLC | ||
class YellowTaxiTrip < Dataset | ||
class Record < Struct.new(:vendor, | ||
:tpep_pickup_datetime, | ||
:tpep_dropoff_datetime, | ||
:passenger_count, | ||
:trip_distance, | ||
:rate_code, | ||
:store_and_fwd, | ||
:pu_location_id, | ||
:do_location_id, | ||
:payment, | ||
:fare_amount, | ||
:extra, | ||
:mta_tax, | ||
:tip_amount, | ||
:tolls_amount, | ||
:improvement_surcharge, | ||
:total_amount, | ||
:congestion_surcharge, | ||
:airport_fee) | ||
alias_method :store_and_fwd?, :store_and_fwd | ||
|
||
def initialize(*values) | ||
super() | ||
members.zip(values) do |member, value| | ||
__send__("#{member}=", value) | ||
end | ||
end | ||
|
||
def vendor=(vendor) | ||
super(vendor == 1 ? :creative_mobile_technologies : :veri_fone_inc) | ||
end | ||
|
||
def rate_code=(rate_code) | ||
case rate_code | ||
when 1.0 | ||
super(:standard_rate) | ||
when 2.0 | ||
super(:jfk) | ||
when 3.0 | ||
super(:newark) | ||
when 4.0 | ||
super(:Nassau_or_westchester) | ||
when 5.0 | ||
super(:negotiated_fare) | ||
when 6.0 | ||
super(:group_ride) | ||
end | ||
end | ||
|
||
def store_and_fwd=(store_and_fwd) | ||
super(store_and_fwd == 'Y') | ||
end | ||
|
||
def payment=(payment) | ||
case payment | ||
when 1 | ||
super(:credit_card) | ||
when 2 | ||
super(:cash) | ||
when 3 | ||
super(:no_charge) | ||
when 4 | ||
super(:dispute) | ||
when 5 | ||
super(:unknown) | ||
when 6 | ||
super(:voided_trip) | ||
end | ||
end | ||
end | ||
|
||
def initialize(year: Date.today.year, month: Date.today.month) | ||
super() | ||
@metadata.id = "nyc-taxi-and-limousine-commission-yello-taxi-trip" | ||
@metadata.name = "New York city Taxi and Limousine Commission: yellow taxi trip record dataset" | ||
@metadata.url = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page" | ||
@metadata.licenses = [ | ||
{ | ||
name: "NYC Open Data Terms of Use", | ||
url: "https://opendata.cityofnewyork.us/overview/#termsofuse", | ||
} | ||
] | ||
@year = year | ||
@month = month | ||
end | ||
|
||
def each | ||
return to_enum(__method__) unless block_given? | ||
|
||
open_data.raw_records.each do |raw_record| | ||
record = Record.new(*raw_record) | ||
yield(record) | ||
end | ||
end | ||
|
||
private | ||
def open_data | ||
base_name = "yellow_tripdata_%04d-%02d.parquet" % [@year, @month] | ||
data_path = cache_dir_path + base_name | ||
data_url = "https://d37ci6vzurychx.cloudfront.net/trip-data/#{base_name}" | ||
download(data_path, data_url) | ||
Arrow::Table.load(data_path) | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
class TLCYellowTaxiTripTest < Test::Unit::TestCase | ||
def record(*args) | ||
Datasets::TLC::YellowTaxiTrip::Record.new(*args) | ||
end | ||
|
||
test("each") do | ||
dataset = Datasets::TLC::YellowTaxiTrip.new(year: 2022, month: 1) | ||
records = dataset.each.to_a | ||
|
||
assert_equal([ | ||
2463931, | ||
record(1, | ||
Time.parse('2022-01-01 09:35:40 +0900'), | ||
Time.parse('2022-01-01 09:53:29 +0900'), | ||
2.0, | ||
3.8, | ||
1.0, | ||
'N', | ||
142, | ||
236, | ||
1, | ||
14.5, | ||
3.0, | ||
0.5, | ||
3.65, | ||
0.0, | ||
0.3, | ||
21.95, | ||
2.5, | ||
0.0), | ||
record(2, | ||
Time.parse('2022-02-01 08:46:00 +0900'), | ||
Time.parse('2022-02-01 09:13:00 +0900'), | ||
nil, | ||
8.94, | ||
nil, | ||
nil, | ||
186, | ||
181, | ||
nil, | ||
25.48, | ||
0.0, | ||
0.5, | ||
6.28, | ||
0.0, | ||
0.3, | ||
35.06, | ||
nil, | ||
nil) | ||
], | ||
[ | ||
records.size, | ||
records.first, | ||
records.last, | ||
]) | ||
end | ||
end |