diff --git a/expression/builtin_time_test.go b/expression/builtin_time_test.go index 575b354faef9c..b723b689a0bf0 100644 --- a/expression/builtin_time_test.go +++ b/expression/builtin_time_test.go @@ -47,9 +47,60 @@ func (s *testEvaluatorSuite) TestDate(c *C) { Input interface{} Expect interface{} }{ - {"2011-11-11", "2011-11-11"}, {nil, nil}, - {"2011-11-11 10:10:10", "2011-11-11"}, + // standard format + {"2011-12-13", "2011-12-13"}, + {"2011-12-13 10:10:10", "2011-12-13"}, + // alternative delimiters, any ASCII punctuation character is a valid delimiter, + // punctuation character is defined by C++ std::ispunct: any graphical character + // that is not alphanumeric. + {"2011\"12\"13", "2011-12-13"}, + {"2011#12#13", "2011-12-13"}, + {"2011$12$13", "2011-12-13"}, + {"2011%12%13", "2011-12-13"}, + {"2011&12&13", "2011-12-13"}, + {"2011'12'13", "2011-12-13"}, + {"2011(12(13", "2011-12-13"}, + {"2011)12)13", "2011-12-13"}, + {"2011*12*13", "2011-12-13"}, + {"2011+12+13", "2011-12-13"}, + {"2011,12,13", "2011-12-13"}, + {"2011.12.13", "2011-12-13"}, + {"2011/12/13", "2011-12-13"}, + {"2011:12:13", "2011-12-13"}, + {"2011;12;13", "2011-12-13"}, + {"2011<12<13", "2011-12-13"}, + {"2011=12=13", "2011-12-13"}, + {"2011>12>13", "2011-12-13"}, + {"2011?12?13", "2011-12-13"}, + {"2011@12@13", "2011-12-13"}, + {"2011[12[13", "2011-12-13"}, + {"2011\\12\\13", "2011-12-13"}, + {"2011]12]13", "2011-12-13"}, + {"2011^12^13", "2011-12-13"}, + {"2011_12_13", "2011-12-13"}, + {"2011`12`13", "2011-12-13"}, + {"2011{12{13", "2011-12-13"}, + {"2011|12|13", "2011-12-13"}, + {"2011}12}13", "2011-12-13"}, + {"2011~12~13", "2011-12-13"}, + // internal format (YYYYMMDD, YYYYYMMDDHHMMSS) + {"20111213", "2011-12-13"}, + {"111213", "2011-12-13"}, + // leading and trailing space + {" 2011-12-13", "2011-12-13"}, + {"2011-12-13 ", "2011-12-13"}, + {" 2011-12-13 ", "2011-12-13"}, + // extra dashes + {"2011-12--13", "2011-12-13"}, + {"2011--12-13", "2011-12-13"}, + {"2011----12----13", "2011-12-13"}, + // combinations + {" 2011----12----13 ", "2011-12-13"}, + // errors + {"2011 12 13", nil}, + {"2011A12A13", nil}, + {"2011T12T13", nil}, } dtblDate := tblToDtbl(tblDate) for _, t := range dtblDate { diff --git a/expression/integration_test.go b/expression/integration_test.go index 4c8f355b4c3e7..7b11b5b0635ae 100755 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -1442,7 +1442,7 @@ func (s *testIntegrationSuite2) TestTimeBuiltin(c *C) { result.Check(testkit.Rows("00:00:01")) tk.MustExec("drop table if exists t") tk.MustExec("create table t(a datetime, b timestamp, c time)") - tk.MustExec(`insert into t values("2017 01-01 12:30:31", "2017 01-01 12:30:31", "01:01:01")`) + tk.MustExec(`insert into t values("2017-01-01 12:30:31", "2017-01-01 12:30:31", "01:01:01")`) result = tk.MustQuery("select addtime(a, b), addtime(cast(a as date), b), addtime(b,a), addtime(a,c), addtime(b," + "c), addtime(c,a), addtime(c,b)" + " from t;") diff --git a/types/helper.go b/types/helper.go index 917afe756e492..2c8d4b3c8f9f8 100644 --- a/types/helper.go +++ b/types/helper.go @@ -105,6 +105,11 @@ func isDigit(c byte) bool { return c >= '0' && c <= '9' } +// Returns true if the given byte is an ASCII punctuation character (printable and non-alphanumeric). +func isPunctuation(c byte) bool { + return (c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B && c <= 0x7E) +} + func myMax(a, b int) int { if a > b { return a diff --git a/types/time.go b/types/time.go index 8674ab4c505cb..f6c43d9b309b2 100644 --- a/types/time.go +++ b/types/time.go @@ -735,38 +735,60 @@ func TimestampDiff(unit string, t1 Time, t2 Time) int64 { func ParseDateFormat(format string) []string { format = strings.TrimSpace(format) + if len(format) == 0 { + return nil + } + + // Date format must start and end with number. + if !isDigit(format[0]) || !isDigit(format[len(format)-1]) { + return nil + } + start := 0 // Initialize `seps` with capacity of 6. The input `format` is typically // a date time of the form "2006-01-02 15:04:05", which has 6 numeric parts // (the fractional second part is usually removed by `splitDateTime`). // Setting `seps`'s capacity to 6 avoids reallocation in this common case. seps := make([]string, 0, 6) - for i := 0; i < len(format); i++ { - // Date format must start and end with number. - if i == 0 || i == len(format)-1 { - if !unicode.IsNumber(rune(format[i])) { - return nil - } - continue - } + for i := 1; i < len(format)-1; i++ { + if isValidSeparator(format[i], len(seps)) { + prevParts := len(seps) + seps = append(seps, format[start:i]) + start = i + 1 - // Separator is a single none-number char. - if !unicode.IsNumber(rune(format[i])) { - if !unicode.IsNumber(rune(format[i-1])) { - return nil + // consume further consecutive separators + for j := i + 1; j < len(format); j++ { + if !isValidSeparator(format[j], prevParts) { + break + } + + start++ + i++ } - seps = append(seps, format[start:i]) - start = i + 1 + continue } + if !isDigit(format[i]) { + return nil + } } seps = append(seps, format[start:]) return seps } +// helper for date part splitting, punctuation characters are valid separators anywhere, +// while space and 'T' are valid separators only between date and time. +func isValidSeparator(c byte, prevParts int) bool { + if isPunctuation(c) { + return true + } + + return prevParts == 2 && (c == ' ' || c == 'T') +} + // See https://dev.mysql.com/doc/refman/5.7/en/date-and-time-literals.html. // The only delimiter recognized between a date and time part and a fractional seconds part is the decimal point. func splitDateTime(format string) (seps []string, fracStr string) { @@ -782,9 +804,6 @@ func splitDateTime(format string) (seps []string, fracStr string) { // See https://dev.mysql.com/doc/refman/5.7/en/date-and-time-literals.html. func parseDatetime(sc *stmtctx.StatementContext, str string, fsp int8, isFloat bool) (Time, error) { - // Try to split str with delimiter. - // TODO: only punctuation can be the delimiter for date parts or time parts. - // But only space and T can be the delimiter between the date and time part. var ( year, month, day, hour, minute, second int fracStr string diff --git a/types/time_test.go b/types/time_test.go index cb42918fc64cd..063a7fdf05b18 100644 --- a/types/time_test.go +++ b/types/time_test.go @@ -191,15 +191,65 @@ func (s *testTimeSuite) TestDate(c *C) { Input string Expect string }{ - {"2012-12-31", "2012-12-31"}, - {"00-12-31", "2000-12-31"}, - {"20121231", "2012-12-31"}, - {"121231", "2012-12-31"}, + // standard format + {"0001-12-13", "0001-12-13"}, + {"2011-12-13", "2011-12-13"}, + {"2011-12-13 10:10:10", "2011-12-13"}, {"2015-06-01 12:12:12", "2015-06-01"}, {"0001-01-01 00:00:00", "0001-01-01"}, - {"0001-01-01", "0001-01-01"}, - {"2019.01.01", "2019-01-01"}, - {"2019/01/01", "2019-01-01"}, + // 2-digit year + {"00-12-31", "2000-12-31"}, + // alternative delimiters, any ASCII punctuation character is a valid delimiter, + // punctuation character is defined by C++ std::ispunct: any graphical character + // that is not alphanumeric. + {"2011\"12\"13", "2011-12-13"}, + {"2011#12#13", "2011-12-13"}, + {"2011$12$13", "2011-12-13"}, + {"2011%12%13", "2011-12-13"}, + {"2011&12&13", "2011-12-13"}, + {"2011'12'13", "2011-12-13"}, + {"2011(12(13", "2011-12-13"}, + {"2011)12)13", "2011-12-13"}, + {"2011*12*13", "2011-12-13"}, + {"2011+12+13", "2011-12-13"}, + {"2011,12,13", "2011-12-13"}, + {"2011.12.13", "2011-12-13"}, + {"2011/12/13", "2011-12-13"}, + {"2011:12:13", "2011-12-13"}, + {"2011;12;13", "2011-12-13"}, + {"2011<12<13", "2011-12-13"}, + {"2011=12=13", "2011-12-13"}, + {"2011>12>13", "2011-12-13"}, + {"2011?12?13", "2011-12-13"}, + {"2011@12@13", "2011-12-13"}, + {"2011[12[13", "2011-12-13"}, + {"2011\\12\\13", "2011-12-13"}, + {"2011]12]13", "2011-12-13"}, + {"2011^12^13", "2011-12-13"}, + {"2011_12_13", "2011-12-13"}, + {"2011`12`13", "2011-12-13"}, + {"2011{12{13", "2011-12-13"}, + {"2011|12|13", "2011-12-13"}, + {"2011}12}13", "2011-12-13"}, + {"2011~12~13", "2011-12-13"}, + // alternative separators with time + {"2011~12~13 12~12~12", "2011-12-13"}, + {"2011~12~13T12~12~12", "2011-12-13"}, + {"2011~12~13~12~12~12", "2011-12-13"}, + // internal format (YYYYMMDD, YYYYYMMDDHHMMSS) + {"20111213", "2011-12-13"}, + {"111213", "2011-12-13"}, + // leading and trailing space + {" 2011-12-13", "2011-12-13"}, + {"2011-12-13 ", "2011-12-13"}, + {" 2011-12-13 ", "2011-12-13"}, + // extra separators + {"2011-12--13", "2011-12-13"}, + {"2011--12-13", "2011-12-13"}, + {"2011----12----13", "2011-12-13"}, + {"2011~/.12)_#13T T.12~)12[~12", "2011-12-13"}, + // combinations + {" 2011----12----13 ", "2011-12-13"}, } for _, test := range table { @@ -213,6 +263,13 @@ func (s *testTimeSuite) TestDate(c *C) { "1201012736.0000", "1201012736", "2019.01", + // invalid separators + "2019 01 02", + "2019A01A02", + "2019-01T02", + "2011-12-13 10:10T10", + "2019–01–02", // en dash + "2019—01—02", // em dash } for _, test := range errTable { @@ -957,10 +1014,10 @@ func (s *testTimeSuite) TestParseDateFormat(c *C) { {"2011-11-11 10", []string{"2011", "11", "11", "10"}}, {"2011-11-11T10:10:10.123456", []string{"2011", "11", "11", "10", "10", "10", "123456"}}, {"2011:11:11T10:10:10.123456", []string{"2011", "11", "11", "10", "10", "10", "123456"}}, + {"2011-11-11 10:10:10", []string{"2011", "11", "11", "10", "10", "10"}}, {"xx2011-11-11 10:10:10", nil}, {"T10:10:10", nil}, {"2011-11-11x", nil}, - {"2011-11-11 10:10:10", nil}, {"xxx 10:10:10", nil}, } @@ -1821,6 +1878,23 @@ func BenchmarkTimeCompare(b *testing.B) { } } +func benchmarkDateFormat(b *testing.B, name, str string) { + b.Run(name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + types.ParseDateFormat(str) + } + }) +} + +func BenchmarkParseDateFormat(b *testing.B) { + benchmarkDateFormat(b, "date basic", "2011-12-13") + benchmarkDateFormat(b, "date internal", "20111213") + benchmarkDateFormat(b, "datetime basic", "2011-12-13 14:15:16") + benchmarkDateFormat(b, "datetime internal", "20111213141516") + benchmarkDateFormat(b, "datetime basic frac", "2011-12-13 14:15:16.123456") + benchmarkDateFormat(b, "datetime repeated delimiters", "2011---12---13 14::15::16..123456") +} + func BenchmarkParseDatetime(b *testing.B) { sc := &stmtctx.StatementContext{TimeZone: time.UTC} str := "2011-10-10 11:11:11.123456"