forked from jdereg/java-util
-
Notifications
You must be signed in to change notification settings - Fork 0
/
DateUtilities.java
370 lines (342 loc) · 18.1 KB
/
DateUtilities.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
package com.cedarsoftware.util;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.Date;
import java.util.Map;
import java.util.TimeZone;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utility for parsing String dates with optional times, especially when the input String formats
* may be inconsistent. This will parse the following formats:<br/>
* <pre>
* 12-31-2023, 12/31/2023, 12.31.2023 mm is 1-12 or 01-12, dd is 1-31 or 01-31, and yyyy can be 0000 to 9999.
*
* 2023-12-31, 2023/12/31, 2023.12.31 mm is 1-12 or 01-12, dd is 1-31 or 01-31, and yyyy can be 0000 to 9999.
*
* January 6th, 2024 Month (3-4 digit abbreviation or full English name), white-space and optional comma,
* day of month (1-31) with optional suffixes 1st, 3rd, 22nd, whitespace and
* optional comma, and yyyy (0000-9999)
*
* 17th January 2024 day of month (1-31) with optional suffixes (e.g. 1st, 3rd, 22nd),
* Month (3-4 digit abbreviation or full English name), whites space and optional comma,
* and yyyy (0000-9999)
*
* 2024 January 31st 4 digit year, white space and optional comma, Month (3-4 digit abbreviation or full
* English name), white space and optional command, and day of month with optional
* suffixes (1st, 3rd, 22nd)
*
* Sat Jan 6 11:06:10 EST 2024 Unix/Linux style. Day of week (3-letter or full name), Month (3-4 digit or full
* English name), time hh:mm:ss, TimeZone (Java supported Timezone names), Year
* </pre>
* All dates can be followed by a Time, or the time can precede the Date. Whitespace or a single letter T must separate the
* date and the time for the non-Unix time formats. The Time formats supported:<br/>
* <pre>
* hh:mm hours (00-23), minutes (00-59). 24 hour format.
*
* hh:mm:ss hours (00-23), minutes (00-59), seconds (00-59). 24 hour format.
*
* hh:mm:ss.sssss hh:mm:ss and fractional seconds. Variable fractional seconds supported.
*
* hh:mm:offset -or- offset can be specified as +HH:mm, +HHmm, +HH, -HH:mm, -HHmm, -HH, or Z (GMT)
* hh:mm:ss.sss:offset which will match: "12:34", "12:34:56", "12:34.789", "12:34:56.789", "12:34+01:00",
* "12:34:56+1:00", "12:34-01", "12:34:56-1", "12:34Z", "12:34:56Z"
*
* hh:mm:zone -or- Zone can be specified as Z (Zulu = UTC), older short forms: GMT, EST, CST, MST,
* hh:mm:ss.sss:zone PST, IST, JST, BST etc. as well as the long forms: "America/New_York", "Asia/Saigon",
* etc. See ZoneId.getAvailableZoneIds().
* </pre>
* DateUtilities will parse Epoch-based integer-based value. It is considered number of milliseconds since Jan, 1970 GMT.
* <pre>
* "0" to A string of numeric digits will be parsed and returned as the number of milliseconds
* "999999999999999999" the Unix Epoch, January 1st, 1970 00:00:00 UTC.
* </pre>
* On all patterns above (excluding the numeric epoch millis), if a day-of-week (e.g. Thu, Sunday, etc.) is included
* (front, back, or between date and time), it will be ignored, allowing for even more formats than listed here.
* The day-of-week is not be used to influence the Date calculation.
*
* @author John DeRegnaucourt (jdereg@gmail.com)
* <br>
* Copyright (c) Cedar Software LLC
* <br><br>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <br><br>
* <a href="http://www.apache.org/licenses/LICENSE-2.0">License</a>
* <br><br>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public final class DateUtilities {
private static final Pattern allDigits = Pattern.compile("^\\d+$");
private static final String days = "monday|mon|tuesday|tues|tue|wednesday|wed|thursday|thur|thu|friday|fri|saturday|sat|sunday|sun"; // longer before shorter matters
private static final String mos = "January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sept|Sep|October|Oct|November|Nov|December|Dec";
private static final String yr = "[+-]?\\d{4,5}\\b";
private static final String d1or2 = "\\d{1,2}";
private static final String d2 = "\\d{2}";
private static final String ord = "st|nd|rd|th";
private static final String sep = "[./-]";
private static final String ws = "\\s+";
private static final String wsOp = "\\s*";
private static final String wsOrComma = "[ ,]+";
private static final String tzUnix = "[A-Z]{1,3}";
private static final String tz_Hh_MM = "[+-]\\d{1,2}:\\d{2}";
private static final String tz_Hh_MM_SS = "[+-]\\d{1,2}:\\d{2}:\\d{2}";
private static final String tz_HHMM = "[+-]\\d{4}";
private static final String tz_Hh = "[+-]\\d{1,2}";
private static final String tzNamed = wsOp + "\\[?[A-Za-z][A-Za-z0-9~\\/._+-]+]?";
private static final String nano = "\\.\\d+";
// Patterns defined in BNF influenced style using above named elements
private static final Pattern isoDatePattern = Pattern.compile( // Regex's using | (OR)
"(" + yr + ")(" + sep + ")(" + d1or2 + ")" + "\\2" + "(" + d1or2 + ")|" + // 2024/01/21 (yyyy/mm/dd -or- yyyy-mm-dd -or- yyyy.mm.dd) [optional time, optional day of week] \2 references 1st separator (ensures both same)
"(" + d1or2 + ")(" + sep + ")(" + d1or2 + ")" + "\\6(" + yr + ")"); // 01/21/2024 (mm/dd/yyyy -or- mm-dd-yyyy -or- mm.dd.yyyy) [optional time, optional day of week] \6 references 2nd 1st separator (ensures both same)
private static final Pattern alphaMonthPattern = Pattern.compile(
"\\b(" + mos + ")\\b" + wsOrComma + "(" + d1or2 + ")(" + ord + ")?" + wsOrComma + "(" + yr + ")|" + // Jan 21st, 2024 (comma optional between all, day of week optional, time optional, ordinal text optional [st, nd, rd, th])
"(" + d1or2 + ")(" + ord + ")?" + wsOrComma + "\\b(" + mos + ")\\b" + wsOrComma + "(" + yr + ")|" + // 21st Jan, 2024 (ditto)
"(" + yr + ")" + wsOrComma + "\\b(" + mos + "\\b)" + wsOrComma + "(" + d1or2 + ")(" + ord + ")?", // 2024 Jan 21st (ditto)
Pattern.CASE_INSENSITIVE);
private static final Pattern unixDateTimePattern = Pattern.compile(
"\\b(" + days + ")\\b" + ws + "\\b(" + mos + ")\\b" + ws + "(" + d1or2 + ")" + ws + "(" + d2 + ":" + d2 + ":" + d2 + ")" + wsOp + "(" + tzUnix + ")?" + wsOp + "(" + yr + ")",
Pattern.CASE_INSENSITIVE);
private static final Pattern timePattern = Pattern.compile(
"(" + d2 + "):(" + d2 + ")(?::(" + d2 + ")(" + nano + ")?)?(" + tz_Hh_MM_SS + "|" + tz_Hh_MM + "|" + tz_HHMM + "|" + tz_Hh + "|Z)?(" + tzNamed + ")?",
Pattern.CASE_INSENSITIVE);
private static final Pattern dayPattern = Pattern.compile("\\b(" + days + ")\\b", Pattern.CASE_INSENSITIVE);
private static final Map<String, Integer> months = new ConcurrentHashMap<>();
static {
// Month name to number map
months.put("jan", 1);
months.put("january", 1);
months.put("feb", 2);
months.put("february", 2);
months.put("mar", 3);
months.put("march", 3);
months.put("apr", 4);
months.put("april", 4);
months.put("may", 5);
months.put("jun", 6);
months.put("june", 6);
months.put("jul", 7);
months.put("july", 7);
months.put("aug", 8);
months.put("august", 8);
months.put("sep", 9);
months.put("sept", 9);
months.put("september", 9);
months.put("oct", 10);
months.put("october", 10);
months.put("nov", 11);
months.put("november", 11);
months.put("dec", 12);
months.put("december", 12);
}
private DateUtilities() {
}
/**
* Original API. If the date-time given does not include a timezone offset or name, then ZoneId.systemDefault()
* will be used. We recommend using parseDate(String, ZoneId, boolean) version, so you can control the default
* timezone used when one is not specified.
* @param dateStr String containing a date. If there is excess content, it will throw an IllegalArgumentException.
* @return Date instance that represents the passed in date. See comments at top of class for supported
* formats. This API is intended to be super flexible in terms of what it can parse. If a null or empty String is
* passed in, null will be returned.
*/
public static Date parseDate(String dateStr) {
if (StringUtilities.isEmpty(dateStr)) {
return null;
}
Instant instant;
ZonedDateTime dateTime = parseDate(dateStr, ZoneId.systemDefault(), true);
instant = Instant.from(dateTime);
return Date.from(instant);
}
/**
* Main API. Retrieve date-time from passed in String. The boolean ensureDateTimeAlone, if set true, ensures that
* no other non-date content existed in the String.
* @param dateStr String containing a date. See DateUtilities class Javadoc for all the supported formats.
* @param defaultZoneId ZoneId to use if no timezone offset or name is given. Cannot be null.
* @param ensureDateTimeAlone If true, if there is excess non-Date content, it will throw an IllegalArgument exception.
* @return ZonedDateTime instance converted from the passed in date String. See comments at top of class for supported
* formats. This API is intended to be super flexible in terms of what it can parse. If a null or empty String is
* passed in, null will be returned.
*/
public static ZonedDateTime parseDate(String dateStr, ZoneId defaultZoneId, boolean ensureDateTimeAlone) {
dateStr = StringUtilities.trimToNull(dateStr);
if (dateStr == null) {
return null;
}
Convention.throwIfNull(defaultZoneId, "ZoneId cannot be null. Use ZoneId.of(\"America/New_York\"), ZoneId.systemDefault(), etc.");
if (allDigits.matcher(dateStr).matches()) {
return Instant.ofEpochMilli(Long.parseLong(dateStr)).atZone(defaultZoneId);
}
String year, day, remains, tz = null;
int month;
// Determine which date pattern to use
Matcher matcher = isoDatePattern.matcher(dateStr);
String remnant = matcher.replaceFirst("");
if (remnant.length() < dateStr.length()) {
if (matcher.group(1) != null) {
year = matcher.group(1);
month = Integer.parseInt(matcher.group(3));
day = matcher.group(4);
} else {
year = matcher.group(8);
month = Integer.parseInt(matcher.group(5));
day = matcher.group(7);
}
remains = remnant;
} else {
matcher = alphaMonthPattern.matcher(dateStr);
remnant = matcher.replaceFirst("");
if (remnant.length() < dateStr.length()) {
String mon;
if (matcher.group(1) != null) {
mon = matcher.group(1);
day = matcher.group(2);
year = matcher.group(4);
remains = remnant;
} else if (matcher.group(7) != null) {
mon = matcher.group(7);
day = matcher.group(5);
year = matcher.group(8);
remains = remnant;
} else {
year = matcher.group(9);
mon = matcher.group(10);
day = matcher.group(11);
remains = remnant;
}
month = months.get(mon.trim().toLowerCase());
} else {
matcher = unixDateTimePattern.matcher(dateStr);
if (matcher.replaceFirst("").length() == dateStr.length()) {
throw new IllegalArgumentException("Unable to parse: " + dateStr + " as a date-time");
}
year = matcher.group(6);
String mon = matcher.group(2);
month = months.get(mon.trim().toLowerCase());
day = matcher.group(3);
tz = matcher.group(5);
remains = matcher.group(4); // leave optional time portion remaining
}
}
// For the remaining String, match the time portion (which could have appeared ahead of the date portion)
String hour = null, min = null, sec = "00", fracSec = "0";
remains = remains.trim();
matcher = timePattern.matcher(remains);
remnant = matcher.replaceFirst("");
if (remnant.length() < remains.length()) {
hour = matcher.group(1);
min = matcher.group(2);
if (matcher.group(3) != null) {
sec = matcher.group(3);
}
if (matcher.group(4) != null) {
fracSec = "0" + matcher.group(4);
}
if (matcher.group(5) != null) {
tz = matcher.group(5).trim();
}
if (matcher.group(6) != null) {
// to make round trip of ZonedDateTime equivalent we need to use the original Zone as ZoneId
// ZoneId is a much broader definition handling multiple possible dates, and we want this to
// be equivalent to the original zone that was used if one was present.
tz = stripBrackets(matcher.group(6).trim());
}
}
if (ensureDateTimeAlone) {
verifyNoGarbageLeft(remnant);
}
ZoneId zoneId = StringUtilities.isEmpty(tz) ? defaultZoneId : getTimeZone(tz);
ZonedDateTime dateTime = getDate(dateStr, zoneId, year, month, day, hour, min, sec, fracSec);
return dateTime;
}
private static ZonedDateTime getDate(String dateStr,
ZoneId zoneId,
String year,
int month,
String day,
String hour,
String min,
String sec,
String fracSec) {
// Build Calendar from date, time, and timezone components, and retrieve Date instance from Calendar.
int y = Integer.parseInt(year);
int d = Integer.parseInt(day);
if (month < 1 || month > 12) {
throw new IllegalArgumentException("Month must be between 1 and 12 inclusive, date: " + dateStr);
}
if (d < 1 || d > 31) {
throw new IllegalArgumentException("Day must be between 1 and 31 inclusive, date: " + dateStr);
}
if (hour == null) { // no [valid] time portion
return ZonedDateTime.of(y, month, d, 0, 0, 0, 0, zoneId);
} else {
// Regex prevents these from ever failing to parse.
int h = Integer.parseInt(hour);
int mn = Integer.parseInt(min);
int s = Integer.parseInt(sec);
long nanoOfSec = convertFractionToNanos(fracSec);
if (h > 23) {
throw new IllegalArgumentException("Hour must be between 0 and 23 inclusive, time: " + dateStr);
}
if (mn > 59) {
throw new IllegalArgumentException("Minute must be between 0 and 59 inclusive, time: " + dateStr);
}
if (s > 59) {
throw new IllegalArgumentException("Second must be between 0 and 59 inclusive, time: " + dateStr);
}
return ZonedDateTime.of(y, month, d, h, mn, s, (int) nanoOfSec, zoneId);
}
}
private static long convertFractionToNanos(String fracSec) {
double fractionalSecond = Double.parseDouble(fracSec);
return (long) (fractionalSecond * 1_000_000_000);
}
private static ZoneId getTimeZone(String tz) {
if (tz != null) {
if (tz.startsWith("-") || tz.startsWith("+")) {
ZoneOffset offset = ZoneOffset.of(tz);
return ZoneId.ofOffset("GMT", offset);
} else {
try {
return ZoneId.of(tz);
} catch (Exception e) {
TimeZone timeZone = TimeZone.getTimeZone(tz);
if (timeZone.getRawOffset() == 0) {
throw e;
}
return timeZone.toZoneId();
}
}
}
return ZoneId.systemDefault();
}
private static void verifyNoGarbageLeft(String remnant) {
// Clear out day of week (mon, tue, wed, ...)
if (StringUtilities.length(remnant) > 0) {
Matcher dayMatcher = dayPattern.matcher(remnant);
remnant = dayMatcher.replaceFirst("").trim();
}
// Verify that nothing, "T" or "," is all that remains
if (StringUtilities.length(remnant) > 0) {
remnant = remnant.replaceAll("T|,", "").trim();
if (!remnant.isEmpty()) {
throw new IllegalArgumentException("Issue parsing date-time, other characters present: " + remnant);
}
}
}
private static String stripBrackets(String input) {
if (input == null || input.isEmpty()) {
return input;
}
return input.replaceAll("^\\[|\\]$", "");
}
}