Skip to content

Commit

Permalink
Parse Time32/Time64 from formatted string
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey committed Nov 13, 2022
1 parent c7210ce commit 3ca41f5
Show file tree
Hide file tree
Showing 2 changed files with 340 additions and 4 deletions.
309 changes: 305 additions & 4 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,123 @@ impl Parser for TimestampSecondType {
}
}

parser_primitive!(Time64NanosecondType);
parser_primitive!(Time64MicrosecondType);
parser_primitive!(Time32MillisecondType);
parser_primitive!(Time32SecondType);
impl Parser for Time64NanosecondType {
fn parse(string: &str) -> Option<Self::Native> {
[
"%I:%M:%S%.9f %P",
"%I:%M:%S%.9f %p",
"%l:%M:%S%.9f %P",
"%l:%M:%S%.9f %p",
"%H:%M:%S%.9f",
"%k:%M:%S%.9f",
"%I:%M:%S %P",
"%I:%M:%S %p",
"%l:%M:%S %P",
"%l:%M:%S %p",
"%H:%M:%S",
"%k:%M:%S",
"%I:%M %P",
"%I:%M %p",
"%l:%M %P",
"%l:%M %p",
"%H:%M",
"%k:%M",
]
.iter()
.find_map(|f| NaiveTime::parse_from_str(string, f).ok())
.map(|nt| {
nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64
})
.or_else(|| string.parse::<Self::Native>().ok())
}
}

impl Parser for Time64MicrosecondType {
fn parse(string: &str) -> Option<Self::Native> {
[
"%I:%M:%S%.6f %P",
"%I:%M:%S%.6f %p",
"%l:%M:%S%.6f %P",
"%l:%M:%S%.6f %p",
"%H:%M:%S%.6f",
"%k:%M:%S%.6f",
"%I:%M:%S %P",
"%I:%M:%S %p",
"%l:%M:%S %P",
"%l:%M:%S %p",
"%H:%M:%S",
"%k:%M:%S",
"%I:%M %P",
"%I:%M %p",
"%l:%M %P",
"%l:%M %p",
"%H:%M",
"%k:%M",
]
.iter()
.find_map(|f| NaiveTime::parse_from_str(string, f).ok())
.map(|nt| {
nt.num_seconds_from_midnight() as i64 * 1_000_000
+ (nt.nanosecond() as i64) / 1_000
})
.or_else(|| string.parse::<Self::Native>().ok())
}
}

impl Parser for Time32MillisecondType {
fn parse(string: &str) -> Option<Self::Native> {
[
"%I:%M:%S%.3f %P",
"%I:%M:%S%.3f %p",
"%l:%M:%S%.3f %P",
"%l:%M:%S%.3f %p",
"%H:%M:%S%.3f",
"%k:%M:%S%.3f",
"%I:%M:%S %P",
"%I:%M:%S %p",
"%l:%M:%S %P",
"%l:%M:%S %p",
"%H:%M:%S",
"%k:%M:%S",
"%I:%M %P",
"%I:%M %p",
"%l:%M %P",
"%l:%M %p",
"%H:%M",
"%k:%M",
]
.iter()
.find_map(|f| NaiveTime::parse_from_str(string, f).ok())
.map(|nt| {
nt.num_seconds_from_midnight() as i32 * 1_000
+ nt.nanosecond() as i32 / 1_000_000
})
.or_else(|| string.parse::<Self::Native>().ok())
}
}

impl Parser for Time32SecondType {
fn parse(string: &str) -> Option<Self::Native> {
[
"%I:%M:%S %P",
"%I:%M:%S %p",
"%l:%M:%S %P",
"%l:%M:%S %p",
"%H:%M:%S",
"%k:%M:%S",
"%I:%M %P",
"%I:%M %p",
"%l:%M %P",
"%l:%M %p",
"%H:%M",
"%k:%M",
]
.iter()
.find_map(|f| NaiveTime::parse_from_str(string, f).ok())
.map(|nt| nt.num_seconds_from_midnight() as i32)
.or_else(|| string.parse::<Self::Native>().ok())
}
}

/// Number of days between 0001-01-01 and 1970-01-01
const EPOCH_DAYS_FROM_CE: i32 = 719_163;
Expand Down Expand Up @@ -411,4 +524,192 @@ mod tests {
parse_timestamp("2020-09-08 13:42:29").unwrap()
);
}

#[test]
fn parse_time64_nanos() {
assert_eq!(
Time64NanosecondType::parse("12:10:01.123456789 AM"),
Some(601_123_456_789)
);
assert_eq!(
Time64NanosecondType::parse("12:10:01.123456789 am"),
Some(601_123_456_789)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01.12345678 PM"),
Some(51_001_123_456_780)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01.12345678 pm"),
Some(51_001_123_456_780)
);
assert_eq!(
Time64NanosecondType::parse("02:10:01.1234567"),
Some(7_801_123_456_700)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01.1234567"),
Some(7_801_123_456_700)
);
assert_eq!(
Time64NanosecondType::parse("12:10:01 AM"),
Some(601_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("12:10:01 am"),
Some(601_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01 PM"),
Some(51_001_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01 pm"),
Some(51_001_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("02:10:01"),
Some(7_801_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10:01"),
Some(7_801_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("12:10 AM"),
Some(600_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("12:10 am"),
Some(600_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10 PM"),
Some(51_000_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("2:10 pm"),
Some(51_000_000_000_000)
);
assert_eq!(
Time64NanosecondType::parse("02:10"),
Some(7_800_000_000_000)
);
assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
assert_eq!(Time64NanosecondType::parse("1"), Some(1));
}

#[test]
fn parse_time64_micros() {
assert_eq!(
Time64MicrosecondType::parse("12:10:01.123456 AM"),
Some(601_123_456)
);
assert_eq!(
Time64MicrosecondType::parse("12:10:01.123456 am"),
Some(601_123_456)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01.12345 PM"),
Some(51_001_123_450)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01.12345 pm"),
Some(51_001_123_450)
);
assert_eq!(
Time64MicrosecondType::parse("02:10:01.1234"),
Some(7_801_123_400)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01.1234"),
Some(7_801_123_400)
);
assert_eq!(
Time64MicrosecondType::parse("12:10:01 AM"),
Some(601_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("12:10:01 am"),
Some(601_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01 PM"),
Some(51_001_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("2:10:01 pm"),
Some(51_001_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("02:10:01"),
Some(7_801_000_000)
);
assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
assert_eq!(
Time64MicrosecondType::parse("2:10 PM"),
Some(51_000_000_000)
);
assert_eq!(
Time64MicrosecondType::parse("2:10 pm"),
Some(51_000_000_000)
);
assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
}

#[test]
fn parse_time32_millis() {
assert_eq!(
Time32MillisecondType::parse("12:10:01.123 AM"),
Some(601_123)
);
assert_eq!(
Time32MillisecondType::parse("12:10:01.123 am"),
Some(601_123)
);
assert_eq!(
Time32MillisecondType::parse("2:10:01.12 PM"),
Some(51_001_120)
);
assert_eq!(
Time32MillisecondType::parse("2:10:01.12 pm"),
Some(51_001_120)
);
assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
assert_eq!(Time32MillisecondType::parse("1"), Some(1));
}

#[test]
fn parse_time32_secs() {
assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
assert_eq!(Time32SecondType::parse("1"), Some(1));
}
}
35 changes: 35 additions & 0 deletions arrow-csv/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,24 @@ fn parse(
i,
datetime_format,
),
DataType::Time32(TimeUnit::Second) => {
build_primitive_array::<Time32SecondType>(line_number, rows, i, None)
}
DataType::Time32(TimeUnit::Millisecond) => build_primitive_array::<
Time32MillisecondType,
>(
line_number, rows, i, None
),
DataType::Time64(TimeUnit::Microsecond) => build_primitive_array::<
Time64MicrosecondType,
>(
line_number, rows, i, None
),
DataType::Time64(TimeUnit::Nanosecond) => build_primitive_array::<
Time64NanosecondType,
>(
line_number, rows, i, None
),
DataType::Timestamp(TimeUnit::Microsecond, _) => {
build_primitive_array::<TimestampMicrosecondType>(
line_number,
Expand Down Expand Up @@ -1593,6 +1611,23 @@ mod tests {
assert_eq!(parse_item::<Date32Type>("1945-05-08").unwrap(), -9004);
}

#[test]
fn parse_time() {
assert_eq!(
parse_item::<Time64NanosecondType>("12:10:01.123456789 AM"),
Some(601_123_456_789)
);
assert_eq!(
parse_item::<Time64MicrosecondType>("12:10:01.123456 am"),
Some(601_123_456)
);
assert_eq!(
parse_item::<Time32MillisecondType>("2:10:01.12 PM"),
Some(51_001_120)
);
assert_eq!(parse_item::<Time32SecondType>("2:10:01 pm"), Some(51_001));
}

#[test]
fn parse_date64() {
assert_eq!(parse_item::<Date64Type>("1970-01-01T00:00:00").unwrap(), 0);
Expand Down

0 comments on commit 3ca41f5

Please sign in to comment.