Skip to content

Commit c6cb05e

Browse files
CopilotByron
andcommitted
Add support for Git's flexible date formats (ISO8601 dots, compact ISO8601, Z suffix, flexible offsets)
Co-authored-by: Byron <63622+Byron@users.noreply.github.com>
1 parent 3880df5 commit c6cb05e

File tree

4 files changed

+344
-15
lines changed

4 files changed

+344
-15
lines changed

gix-date/src/parse.rs

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ pub(crate) mod function {
156156
Time::new(val.timestamp().as_second(), val.offset().seconds())
157157
} else if let Ok(val) = strptime_relaxed(ISO8601_STRICT.0, input) {
158158
Time::new(val.timestamp().as_second(), val.offset().seconds())
159+
} else if let Some(val) = parse_git_date_format(input) {
160+
// Git-style flexible date parsing (ISO8601 with dots, compact formats, Z suffix, etc.)
161+
val
159162
} else if let Ok(val) = strptime_relaxed(GITOXIDE.0, input) {
160163
Time::new(val.timestamp().as_second(), val.offset().seconds())
161164
} else if let Ok(val) = strptime_relaxed(DEFAULT.0, input) {
@@ -282,6 +285,311 @@ pub(crate) mod function {
282285
Time { seconds, offset }.into()
283286
}
284287

288+
/// Parse Git-style flexible date formats that aren't covered by standard strptime:
289+
/// - ISO8601 with dots: `2008.02.14 20:30:45 -0500`
290+
/// - Compact ISO8601: `20080214T203045`, `20080214T20:30:45`, `20080214T2030`, `20080214T20`
291+
/// - Z suffix for UTC: `1970-01-01 00:00:00 Z`
292+
/// - 2-digit hour offset: `2008-02-14 20:30:45 -05`
293+
/// - Colon-separated offset: `2008-02-14 20:30:45 -05:00`
294+
/// - Subsecond precision (ignored): `20080214T203045.019-04:00`
295+
fn parse_git_date_format(input: &str) -> Option<Time> {
296+
// Try ISO8601 with dots: YYYY.MM.DD HH:MM:SS offset
297+
if let Some(time) = parse_iso8601_dots(input) {
298+
return Some(time);
299+
}
300+
// Try compact ISO8601: YYYYMMDDTHHMMSS or YYYYMMDDT...
301+
if let Some(time) = parse_compact_iso8601(input) {
302+
return Some(time);
303+
}
304+
// Try ISO8601 with Z suffix or flexible timezone
305+
if let Some(time) = parse_flexible_iso8601(input) {
306+
return Some(time);
307+
}
308+
None
309+
}
310+
311+
/// Parse ISO8601 with dots: `2008.02.14 20:30:45 -0500`
312+
fn parse_iso8601_dots(input: &str) -> Option<Time> {
313+
// Format: YYYY.MM.DD HH:MM:SS offset
314+
let input = input.trim();
315+
if !input.contains('.') || !input[..10].contains('.') {
316+
return None;
317+
}
318+
319+
// Replace dots with dashes for date part only
320+
let mut parts = input.splitn(2, ' ');
321+
let date_part = parts.next()?;
322+
let rest = parts.next()?;
323+
324+
// Validate date part has dot separators
325+
if date_part.len() != 10 || date_part.chars().nth(4)? != '.' || date_part.chars().nth(7)? != '.' {
326+
return None;
327+
}
328+
329+
// Convert to standard ISO8601 format
330+
let normalized = format!("{} {}", date_part.replace('.', "-"), rest);
331+
parse_flexible_iso8601(&normalized)
332+
}
333+
334+
/// Parse compact ISO8601 formats:
335+
/// - `20080214T203045` (compact time)
336+
/// - `20080214T20:30:45` (normal time)
337+
/// - `20080214T2030` (hours and minutes only)
338+
/// - `20080214T20` (hours only)
339+
/// - With optional subsecond precision (ignored)
340+
/// - With optional timezone
341+
fn parse_compact_iso8601(input: &str) -> Option<Time> {
342+
let input = input.trim();
343+
344+
// Must have T separator and start with 8 digits for YYYYMMDD
345+
let t_pos = input.find('T')?;
346+
if t_pos != 8 {
347+
return None;
348+
}
349+
350+
let date_part = &input[..8];
351+
let rest = &input[9..]; // after T
352+
353+
// Parse YYYYMMDD
354+
let year: i32 = date_part[0..4].parse().ok()?;
355+
let month: i32 = date_part[4..6].parse().ok()?;
356+
let day: i32 = date_part[6..8].parse().ok()?;
357+
358+
if month < 1 || month > 12 || day < 1 || day > 31 {
359+
return None;
360+
}
361+
362+
// Parse time part - may have colons or not, may have subseconds, may have timezone
363+
let (time_str, offset_str) = split_time_and_offset(rest);
364+
365+
// Strip subseconds (anything after a dot in the time part, before offset)
366+
let time_str = if let Some(dot_pos) = time_str.find('.') {
367+
&time_str[..dot_pos]
368+
} else {
369+
time_str
370+
};
371+
372+
// Parse time - could be HH:MM:SS, HHMMSS, HH:MM, HHMM, or HH
373+
let (hour, minute, second) = parse_time_component(time_str)?;
374+
375+
// Parse offset
376+
let offset = parse_flexible_offset(offset_str)?;
377+
378+
// Construct the datetime
379+
let date = jiff::civil::Date::new(year as i16, month as i8, day as i8).ok()?;
380+
let time = jiff::civil::Time::new(hour as i8, minute as i8, second as i8, 0).ok()?;
381+
let datetime = date.at(time.hour(), time.minute(), time.second(), 0);
382+
let tz_offset = jiff::tz::Offset::from_seconds(offset).ok()?;
383+
let zoned = datetime.to_zoned(tz_offset.to_time_zone()).ok()?;
384+
385+
Some(Time::new(zoned.timestamp().as_second(), offset))
386+
}
387+
388+
/// Parse ISO8601 with flexible timezone (Z suffix, 2-digit offset, colon-separated offset)
389+
/// and optional subsecond precision
390+
fn parse_flexible_iso8601(input: &str) -> Option<Time> {
391+
let input = input.trim();
392+
393+
// Check if this looks like ISO8601 (YYYY-MM-DD format)
394+
if input.len() < 10 {
395+
return None;
396+
}
397+
let date_part = &input[..10];
398+
if date_part.chars().nth(4)? != '-' || date_part.chars().nth(7)? != '-' {
399+
return None;
400+
}
401+
402+
// Parse the date
403+
let year: i32 = date_part[0..4].parse().ok()?;
404+
let month: i32 = date_part[5..7].parse().ok()?;
405+
let day: i32 = date_part[8..10].parse().ok()?;
406+
407+
if month < 1 || month > 12 || day < 1 || day > 31 {
408+
return None;
409+
}
410+
411+
// Rest after date
412+
let rest = &input[10..];
413+
if rest.is_empty() {
414+
return None;
415+
}
416+
417+
// Skip T or space separator
418+
let rest = if rest.starts_with('T') || rest.starts_with(' ') {
419+
&rest[1..]
420+
} else {
421+
return None;
422+
};
423+
424+
// Split into time and offset
425+
let (time_str, offset_str) = split_time_and_offset(rest);
426+
427+
// Strip subseconds
428+
let time_str = if let Some(dot_pos) = time_str.find('.') {
429+
&time_str[..dot_pos]
430+
} else {
431+
time_str
432+
};
433+
434+
// Parse time HH:MM:SS
435+
let (hour, minute, second) = parse_time_component(time_str)?;
436+
437+
// Parse offset
438+
let offset = parse_flexible_offset(offset_str)?;
439+
440+
// Construct the datetime
441+
let date = jiff::civil::Date::new(year as i16, month as i8, day as i8).ok()?;
442+
let time = jiff::civil::Time::new(hour as i8, minute as i8, second as i8, 0).ok()?;
443+
let datetime = date.at(time.hour(), time.minute(), time.second(), 0);
444+
let tz_offset = jiff::tz::Offset::from_seconds(offset).ok()?;
445+
let zoned = datetime.to_zoned(tz_offset.to_time_zone()).ok()?;
446+
447+
Some(Time::new(zoned.timestamp().as_second(), offset))
448+
}
449+
450+
/// Split time string into time component and offset component
451+
fn split_time_and_offset(input: &str) -> (&str, &str) {
452+
// Look for offset indicators: Z, +, - (but - after digits could be in time)
453+
// The offset is at the end, after the time
454+
455+
let input = input.trim();
456+
457+
// Check for Z suffix
458+
if input.ends_with('Z') {
459+
return (&input[..input.len() - 1], "Z");
460+
}
461+
462+
// Look for + or - that indicates timezone (not part of time)
463+
// Time format is HH:MM:SS or HHMMSS, so offset starts after that
464+
// Find the last + or - that's after position 5 (minimum for HH:MM)
465+
let mut offset_start = None;
466+
for (i, c) in input.char_indices().rev() {
467+
if (c == '+' || c == '-') && i >= 5 {
468+
// Check if this looks like an offset (followed by digits)
469+
let after = &input[i + 1..];
470+
if after.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) {
471+
offset_start = Some(i);
472+
break;
473+
}
474+
}
475+
}
476+
477+
// Also handle space-separated offset
478+
if let Some(space_pos) = input.rfind(' ') {
479+
if space_pos > 5 {
480+
let potential_offset = input[space_pos + 1..].trim();
481+
if potential_offset.starts_with('+')
482+
|| potential_offset.starts_with('-')
483+
|| potential_offset == "Z"
484+
{
485+
return (&input[..space_pos], potential_offset);
486+
}
487+
}
488+
}
489+
490+
if let Some(pos) = offset_start {
491+
(&input[..pos], &input[pos..])
492+
} else {
493+
(input, "")
494+
}
495+
}
496+
497+
/// Parse time component: HH:MM:SS, HHMMSS, HH:MM, HHMM, or HH
498+
fn parse_time_component(time: &str) -> Option<(u32, u32, u32)> {
499+
let time = time.trim();
500+
501+
if time.contains(':') {
502+
// Colon-separated: HH:MM:SS or HH:MM
503+
let parts: Vec<&str> = time.split(':').collect();
504+
let hour: u32 = parts.first()?.parse().ok()?;
505+
let minute: u32 = parts.get(1).unwrap_or(&"0").parse().ok()?;
506+
let second: u32 = parts.get(2).unwrap_or(&"0").parse().ok()?;
507+
if hour > 23 || minute > 59 || second > 59 {
508+
return None;
509+
}
510+
Some((hour, minute, second))
511+
} else {
512+
// Compact: HHMMSS, HHMM, or HH
513+
match time.len() {
514+
2 => {
515+
let hour: u32 = time.parse().ok()?;
516+
if hour > 23 {
517+
return None;
518+
}
519+
Some((hour, 0, 0))
520+
}
521+
4 => {
522+
let hour: u32 = time[0..2].parse().ok()?;
523+
let minute: u32 = time[2..4].parse().ok()?;
524+
if hour > 23 || minute > 59 {
525+
return None;
526+
}
527+
Some((hour, minute, 0))
528+
}
529+
6 => {
530+
let hour: u32 = time[0..2].parse().ok()?;
531+
let minute: u32 = time[2..4].parse().ok()?;
532+
let second: u32 = time[4..6].parse().ok()?;
533+
if hour > 23 || minute > 59 || second > 59 {
534+
return None;
535+
}
536+
Some((hour, minute, second))
537+
}
538+
_ => None,
539+
}
540+
}
541+
}
542+
543+
/// Parse flexible timezone offset:
544+
/// - Empty or missing: +0000
545+
/// - Z: +0000
546+
/// - +/-HH: +/-HH00
547+
/// - +/-HHMM: +/-HHMM
548+
/// - +/-HH:MM: +/-HHMM
549+
fn parse_flexible_offset(offset: &str) -> Option<i32> {
550+
let offset = offset.trim();
551+
552+
if offset.is_empty() {
553+
return Some(0);
554+
}
555+
556+
if offset == "Z" {
557+
return Some(0);
558+
}
559+
560+
let (sign, rest) = if offset.starts_with('+') {
561+
(1i32, &offset[1..])
562+
} else if offset.starts_with('-') {
563+
(-1i32, &offset[1..])
564+
} else {
565+
return None;
566+
};
567+
568+
// Remove colon if present
569+
let rest = rest.replace(':', "");
570+
571+
let (hours, minutes) = match rest.len() {
572+
2 => {
573+
// HH format
574+
let hours: i32 = rest.parse().ok()?;
575+
(hours, 0)
576+
}
577+
4 => {
578+
// HHMM format
579+
let hours: i32 = rest[0..2].parse().ok()?;
580+
let minutes: i32 = rest[2..4].parse().ok()?;
581+
(hours, minutes)
582+
}
583+
_ => return None,
584+
};
585+
586+
if hours > 14 || minutes > 59 {
587+
return None;
588+
}
589+
590+
Some(sign * (hours * 3600 + minutes * 60))
591+
}
592+
285593
/// This is just like `Zoned::strptime`, but it allows parsing datetimes
286594
/// whose weekdays are inconsistent with the date. While the day-of-week
287595
/// still must be parsed, it is otherwise ignored. This seems to be

gix-date/tests/fixtures/generate_git_date_baseline.sh

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ function baseline_relative() {
4848
# FIXED DATE FORMATS
4949
# ============================================================================
5050
# Tests from https://github.com/git/git/blob/master/t/t0006-date.sh
51-
# Only including formats that gix-date currently supports.
5251

5352
# Note: SHORT format (YYYY-MM-DD) is NOT included in baseline tests because
5453
# Git fills in current time-of-day, making it non-reproducible for baseline comparison.
@@ -73,16 +72,40 @@ baseline '2008-02-14 20:30:45 +0000' '' # from git t0006
7372
baseline '2008-02-14 20:30:45 -0500' '' # from git t0006
7473
baseline '2016-06-15 16:13:20 +0200' 'ISO8601' # from git t0006
7574

76-
# Note: ISO8601 with dots (2008.02.14 20:30:45 -0500) is supported by Git
77-
# but not yet supported by gix-date.
75+
# ISO8601 with dots: "YYYY.MM.DD HH:MM:SS +/-ZZZZ" from git t0006
76+
baseline '2008.02.14 20:30:45 -0500' ''
7877

7978
# ISO8601_STRICT format: "YYYY-MM-DDTHH:MM:SS+ZZ:ZZ"
8079
baseline '2022-08-17T21:43:13+08:00' 'ISO8601_STRICT'
8180
baseline '2000-01-01T00:00:00+00:00' 'ISO8601_STRICT'
8281
baseline '2009-02-13T23:31:30+00:00' 'ISO8601_STRICT' # Unix timestamp 1234567890
8382
baseline '2016-06-15T16:13:20+02:00' 'ISO8601_STRICT' # from git t0006
8483

85-
# Timezone edge cases from git t0006 (that gix-date supports)
84+
# Z suffix for UTC timezone from git t0006
85+
baseline '1970-01-01 00:00:00 Z' ''
86+
87+
# Compact ISO8601 formats from git t0006 (YYYYMMDDTHHMMSS)
88+
baseline '20080214T20:30:45' ''
89+
baseline '20080214T20:30' ''
90+
baseline '20080214T20' ''
91+
baseline '20080214T203045' ''
92+
baseline '20080214T2030' ''
93+
baseline '20080214T203045-04:00' ''
94+
baseline '20080214T203045 -04:00' ''
95+
96+
# Subsecond precision (Git ignores the subseconds)
97+
baseline '20080214T000000.20' ''
98+
baseline '20080214T00:00:00.20' ''
99+
baseline '20080214T203045.019-04:00' ''
100+
baseline '2008-02-14 20:30:45.019-04:00' ''
101+
102+
# Various timezone formats from git t0006
103+
baseline '2008-02-14 20:30:45 -0015' '' # 15-minute offset
104+
baseline '2008-02-14 20:30:45 -05' '' # 2-digit hour offset
105+
baseline '2008-02-14 20:30:45 -05:00' '' # colon-separated offset
106+
baseline '2008-02-14 20:30:45 +00' '' # 2-digit +00
107+
108+
# Timezone edge cases from git t0006
86109
baseline '1970-01-01 00:00:00 +0000' ''
87110
baseline '1970-01-01 01:00:00 +0100' ''
88111
baseline '1970-01-02 00:00:00 +1100' ''
Binary file not shown.

0 commit comments

Comments
 (0)