Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expression, executor: add iso 8601 and timezone support for temporal string literal (#20534) #20670

Merged
merged 1 commit into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions executor/insert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,93 @@ func (s *testSuite3) TestInsertDateTimeWithTimeZone(c *C) {
tk.MustQuery(`select * from t;`).Check(testkit.Rows(
`1 1970-01-01 09:20:34`,
))

// test for ambiguous cases
cases := []struct {
lit string
expect string
}{
{"2020-10-22", "2020-10-22 00:00:00"},
{"2020-10-22-16", "2020-10-22 16:00:00"},
{"2020-10-22 16-31", "2020-10-22 16:31:00"},
{"2020-10-22 16:31-15", "2020-10-22 16:31:15"},
{"2020-10-22T16:31:15-10", "2020-10-23 10:31:15"},

{"2020.10-22", "2020-10-22 00:00:00"},
{"2020-10.22-16", "2020-10-22 16:00:00"},
{"2020-10-22.16-31", "2020-10-22 16:31:00"},
{"2020-10-22 16.31-15", "2020-10-22 16:31:15"},
{"2020-10-22T16.31.15+14", "2020-10-22 10:31:15"},

{"2020-10:22", "2020-10-22 00:00:00"},
{"2020-10-22:16", "2020-10-22 16:00:00"},
{"2020-10-22-16:31", "2020-10-22 16:31:00"},
{"2020-10-22 16-31:15", "2020-10-22 16:31:15"},
{"2020-10-22T16.31.15+09:30", "2020-10-22 15:01:15"},

{"2020.10-22:16", "2020-10-22 16:00:00"},
{"2020-10.22-16:31", "2020-10-22 16:31:00"},
{"2020-10-22.16-31:15", "2020-10-22 16:31:15"},
{"2020-10-22T16:31.15+09:30", "2020-10-22 15:01:15"},
}
tk.MustExec(`drop table if exists t`)
tk.MustExec(`create table t (dt datetime)`)
tk.MustExec(`set @@time_zone='+08:00'`)
for _, ca := range cases {
tk.MustExec(`delete from t`)
tk.MustExec(fmt.Sprintf("insert into t values ('%s')", ca.lit))
tk.MustQuery(`select * from t`).Check(testkit.Rows(ca.expect))
}

// test for time zone change
tzcCases := []struct {
tz1 string
lit string
tz2 string
exp1 string
exp2 string
}{
{"+08:00", "2020-10-22T16:53:40Z", "+00:00", "2020-10-23 00:53:40", "2020-10-22 16:53:40"},
{"-08:00", "2020-10-22T16:53:40Z", "+08:00", "2020-10-22 08:53:40", "2020-10-23 00:53:40"},
{"-03:00", "2020-10-22T16:53:40+03:00", "+08:00", "2020-10-22 10:53:40", "2020-10-22 21:53:40"},
{"+08:00", "2020-10-22T16:53:40+08:00", "+08:00", "2020-10-22 16:53:40", "2020-10-22 16:53:40"},
}
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (dt datetime, ts timestamp)")
for _, ca := range tzcCases {
tk.MustExec("delete from t")
tk.MustExec(fmt.Sprintf("set @@time_zone='%s'", ca.tz1))
tk.MustExec(fmt.Sprintf("insert into t values ('%s', '%s')", ca.lit, ca.lit))
tk.MustExec(fmt.Sprintf("set @@time_zone='%s'", ca.tz2))
tk.MustQuery("select * from t").Check(testkit.Rows(ca.exp1 + " " + ca.exp2))
}

// test for datetime in compare
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (ts timestamp)")
tk.MustExec("insert into t values ('2020-10-22T12:00:00Z'), ('2020-10-22T13:00:00Z'), ('2020-10-22T14:00:00Z')")
tk.MustQuery(fmt.Sprintf("select count(*) from t where ts > '2020-10-22T12:00:00Z'")).Check(testkit.Rows("2"))

// test for datetime with fsp
fspCases := []struct {
fsp uint
lit string
exp1 string
exp2 string
}{
{2, "2020-10-27T14:39:10.10+00:00", "2020-10-27 22:39:10.10", "2020-10-27 22:39:10.10"},
{1, "2020-10-27T14:39:10.3+0200", "2020-10-27 20:39:10.3", "2020-10-27 20:39:10.3"},
{6, "2020-10-27T14:39:10.3-02", "2020-10-28 00:39:10.300000", "2020-10-28 00:39:10.300000"},
{2, "2020-10-27T14:39:10.10Z", "2020-10-27 22:39:10.10", "2020-10-27 22:39:10.10"},
}

tk.MustExec("set @@time_zone='+08:00'")
for _, ca := range fspCases {
tk.MustExec("drop table if exists t")
tk.MustExec(fmt.Sprintf("create table t (dt datetime(%d), ts timestamp(%d))", ca.fsp, ca.fsp))
tk.MustExec(fmt.Sprintf("insert into t values ('%s', '%s')", ca.lit, ca.lit))
tk.MustQuery("select * from t").Check(testkit.Rows(ca.exp1 + " " + ca.exp2))
}
}

func (s *testSuite3) TestInsertZeroYear(c *C) {
Expand Down
2 changes: 1 addition & 1 deletion expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1740,7 +1740,7 @@ func (s *testIntegrationSuite2) TestTimeBuiltin(c *C) {
result.Check(testkit.Rows("<nil> <nil> 0"))
tk.MustQuery("show warnings").Check(testutil.RowsWithSep("|",
"Warning|1292|Incorrect time value: '0'",
"Warning|1292|Incorrect time value: '0.0'"))
"Warning|1292|Incorrect datetime value: '0.0'"))
result = tk.MustQuery(`SELECT DATE_FORMAT(0, '%W %M %e %Y %r %y'), DATE_FORMAT(0.0, '%W %M %e %Y %r %y');`)
result.Check(testkit.Rows("<nil> <nil>"))
tk.MustQuery("show warnings").Check(testkit.Rows())
Expand Down
247 changes: 225 additions & 22 deletions types/time.go
Original file line number Diff line number Diff line change
Expand Up @@ -789,30 +789,211 @@ func isValidSeparator(c byte, prevParts int) bool {
return prevParts == 2 && (c == ' ' || c == 'T')
}

// See https://dev.mysql.com/doc/refman/5.7/en/date-and-time-literals.html.
// The only delimiter recognized between a date and time part and a fractional seconds part is the decimal point.
func splitDateTime(format string) (seps []string, fracStr string) {
index := GetFracIndex(format)
if index > 0 {
fracStr = format[index+1:]
format = format[:index]
var validIdxCombinations = map[int]struct {
h int
m int
}{
100: {0, 0}, // 23:59:59Z
30: {2, 0}, // 23:59:59+08
50: {4, 2}, // 23:59:59+0800
63: {5, 2}, // 23:59:59+08:00
// postgres supports the following additional syntax that deviates from ISO8601, although we won't support it
// currently, it will be fairly easy to add in the current parsing framework
// 23:59:59Z+08
// 23:59:59Z+08:00
}

// GetTimezone parses the trailing timezone information of a given time string literal. If idx = -1 is returned, it
// means timezone information not found, otherwise it indicates the index of the starting index of the timezone
// information. If the timezone contains sign, hour part and/or minute part, it will be returned as is, otherwise an
// empty string will be returned.
//
// Supported syntax:
// MySQL compatible: ((?P<tz_sign>[-+])(?P<tz_hour>[0-9]{2}):(?P<tz_minute>[0-9]{2})){0,1}$, see
// https://dev.mysql.com/doc/refman/8.0/en/time-zone-support.html and https://dev.mysql.com/doc/refman/8.0/en/datetime.html
// the first link specified that timezone information should be in "[H]H:MM, prefixed with a + or -" while the
// second link specified that for string literal, "hour values less than than 10, a leading zero is required.".
// ISO-8601: Z|((((?P<tz_sign>[-+])(?P<tz_hour>[0-9]{2})(:(?P<tz_minute>[0-9]{2}){0,1}){0,1})|((?P<tz_minute>[0-9]{2}){0,1}){0,1}))$
// see https://www.cl.cam.ac.uk/~mgk25/iso-time.html
func GetTimezone(lit string) (idx int, tzSign, tzHour, tzSep, tzMinute string) {
idx, zidx, sidx, spidx := -1, -1, -1, -1
// idx is for the position of the starting of the timezone information
// zidx is for the z symbol
// sidx is for the sign
// spidx is for the separator
l := len(lit)
// the following loop finds the first index of Z, sign, and separator from backwards.
for i := l - 1; 0 <= i; i-- {
if lit[i] == 'Z' {
zidx = i
break
}
if sidx == -1 && (lit[i] == '-' || lit[i] == '+') {
sidx = i
}
if spidx == -1 && lit[i] == ':' {
spidx = i
}
}
// we could enumerate all valid combinations of these values and look it up in a table, see validIdxCombinations
// zidx can be -1 (23:59:59+08:00), l-1 (23:59:59Z)
// sidx can be -1, l-3, l-5, l-6
// spidx can be -1, l-3
k := 0
if l-zidx == 1 {
k += 100
}
if t := l - sidx; t == 3 || t == 5 || t == 6 {
k += t * 10
}
if l-spidx == 3 {
k += 3
}
if v, ok := validIdxCombinations[k]; ok {
hidx, midx := l-v.h, l-v.m
valid := func(v string) bool {
return '0' <= v[0] && v[0] <= '9' && '0' <= v[1] && v[1] <= '9'
}
if sidx != -1 {
tzSign = lit[sidx : sidx+1]
idx = sidx
}
if zidx != -1 {
idx = zidx
}
if (l - spidx) == 3 {
tzSep = lit[spidx : spidx+1]
}
if v.h != 0 {
tzHour = lit[hidx : hidx+2]
if !valid(tzHour) {
return -1, "", "", "", ""
}
}
if v.m != 0 {
tzMinute = lit[midx : midx+2]
if !valid(tzMinute) {
return -1, "", "", "", ""
}
}
return
}
return -1, "", "", "", ""
}

// See https://dev.mysql.com/doc/refman/5.7/en/date-and-time-literals.html.
// splitDateTime splits the string literal into 3 parts, date & time, FSP and time zone.
// For FSP, The only delimiter recognized between a date & time part and a fractional seconds part is the decimal point,
// therefore we could look from backwards at the literal to find the index of the decimal point.
// For time zone, the possible delimiter could be +/- (w.r.t. MySQL 8.0, see
// https://dev.mysql.com/doc/refman/8.0/en/datetime.html) and Z/z (w.r.t. ISO 8601, see section Time zone in
// https://www.cl.cam.ac.uk/~mgk25/iso-time.html). We also look from backwards for the delimiter, see GetTimezone.
func splitDateTime(format string) (seps []string, fracStr string, hasTZ bool, tzSign, tzHour, tzSep, tzMinute string) {
tzIndex, tzSign, tzHour, tzSep, tzMinute := GetTimezone(format)
if tzIndex > 0 {
hasTZ = true
for ; tzIndex > 0 && isPunctuation(format[tzIndex-1]); tzIndex-- {
// in case of multiple separators, e.g. 2020-10--10
}
format = format[:tzIndex]
}
fracIndex := GetFracIndex(format)
if fracIndex > 0 {
fracStr = format[fracIndex+1:]
for ; fracIndex > 0 && isPunctuation(format[fracIndex-1]); fracIndex-- {
// in case of multiple separators, e.g. 2020-10..10
}
format = format[:fracIndex]
}
seps = ParseDateFormat(format)
return
}

// See https://dev.mysql.com/doc/refman/5.7/en/date-and-time-literals.html.
func parseDatetime(sc *stmtctx.StatementContext, str string, fsp int8, isFloat bool) (Time, error) {
var (
year, month, day, hour, minute, second int
fracStr string
hhmmss bool
err error
year, month, day, hour, minute, second, deltaHour, deltaMinute int
fracStr string
tzSign, tzHour, tzSep, tzMinute string
hasTZ, hhmmss bool
err error
)

seps, fracStr := splitDateTime(str)
seps, fracStr, hasTZ, tzSign, tzHour, tzSep, tzMinute := splitDateTime(str)

var truncatedOrIncorrect bool
/*
if we have timezone parsed, there are the following cases to be considered, however some of them are wrongly parsed, and we should consider absorb them back to seps.

1. Z, then it must be time zone information, and we should not tamper with it
2. -HH, it might be from
1. no fracStr
1. YYYY-MM-DD
2. YYYY-MM-DD-HH
3. YYYY-MM-DD HH-MM
4. YYYY-MM-DD HH:MM-SS
5. YYYY-MM-DD HH:MM:SS-HH (correct, no need absorb)
2. with fracStr
1. YYYY.MM-DD
2. YYYY-MM.DD-HH
3. YYYY-MM-DD.HH-MM
4. YYYY-MM-DD HH.MM-SS
5. YYYY-MM-DD HH:MM.SS-HH (correct, no need absorb)
3. -HH:MM, similarly it might be from
1. no fracStr
1. YYYY-MM:DD
2. YYYY-MM-DD:HH
3. YYYY-MM-DD-HH:MM
4. YYYY-MM-DD HH-MM:SS
5. YYYY-MM-DD HH:MM-SS:HH (invalid)
6. YYYY-MM-DD HH:MM:SS-HH:MM (correct, no need absorb)
2. with fracStr
1. YYYY.MM-DD:HH
2. YYYY-MM.DD-HH:MM
3. YYYY-MM-DD.HH-MM:SS
4. YYYY-MM-DD HH.MM-SS:HH (invalid)
5. YYYY-MM-DD HH:MM.SS-HH:MM (correct, no need absorb)
4. -HHMM, there should only be one case, that is both the date and time part have existed, only then could we have fracStr or time zone
1. YYYY-MM-DD HH:MM:SS.FSP-HHMM (correct, no need absorb)

to summarize, FSP and timezone is only valid if we have date and time presented, otherwise we should consider absorbing
FSP or timezone into seps. additionally, if we want to absorb timezone, we either absorb them all, or not, meaning
we won't only absorb tzHour but not tzMinute.

additional case to consider is that when the time literal is presented in float string (e.g. `YYYYMMDD.HHMMSS`), in
this case, FSP should not be absorbed and only `+HH:MM` would be allowed (i.e. Z, +HHMM, +HH that comes from ISO8601
should be banned), because it only conforms to MySQL's timezone parsing logic, but it is not valid in ISO8601.
However, I think it is generally acceptable to allow a wider spectrum of timezone format in string literal.
*/

// noAbsorb tests if can absorb FSP or TZ
noAbsorb := func(seps []string) bool {
// if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed
// or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can
// not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because
// if date part has no separators, we can't use -/+ as separators between date & time.
return len(seps) > 5 || (len(seps) == 1 && len(seps[0]) > 4)
}
if len(fracStr) != 0 && !isFloat {
if !noAbsorb(seps) {
seps = append(seps, fracStr)
fracStr = ""
}
}
if hasTZ && tzSign != "" {
// if tzSign is empty, we can be sure that the string literal contains timezone (such as 2010-10-10T10:10:10Z),
// therefore we could safely skip this branch.
if !noAbsorb(seps) && !(tzMinute != "" && tzSep == "") {
// we can't absorb timezone if there is no separate between tzHour and tzMinute
if len(tzHour) != 0 {
seps = append(seps, tzHour)
}
if len(tzMinute) != 0 {
seps = append(seps, tzMinute)
}
hasTZ = false
}
}
switch len(seps) {
case 1:
l := len(seps[0])
Expand Down Expand Up @@ -880,15 +1061,6 @@ func parseDatetime(sc *stmtctx.StatementContext, str string, fsp int8, isFloat b
sc.AppendWarning(ErrTruncatedWrongVal.GenWithStackByArgs("datetime", str))
err = nil
}
case 2:
// YYYY-MM is not valid
if len(fracStr) == 0 {
return ZeroDatetime, errors.Trace(ErrWrongValue.GenWithStackByArgs(DateTimeStr, str))
}

// YYYY-MM.DD, DD is treat as fracStr
err = scanTimeArgs(append(seps, fracStr), &year, &month, &day)
fracStr = ""
case 3:
// YYYY-MM-DD
err = scanTimeArgs(seps, &year, &month, &day)
Expand Down Expand Up @@ -924,7 +1096,8 @@ func parseDatetime(sc *stmtctx.StatementContext, str string, fsp int8, isFloat b
var microsecond int
var overflow bool
if hhmmss {
// If input string is "20170118.999", without hhmmss, fsp is meanless.
// If input string is "20170118.999", without hhmmss, fsp is meaningless.
// TODO: this case is not only meaningless, but erroneous, please confirm.
microsecond, overflow, err = ParseFrac(fracStr, fsp)
if err != nil {
return ZeroDatetime, errors.Trace(err)
Expand All @@ -943,6 +1116,36 @@ func parseDatetime(sc *stmtctx.StatementContext, str string, fsp int8, isFloat b
}
tmp = FromGoTime(t1.Add(gotime.Second))
}
if hasTZ {
// without hhmmss, timezone is also meaningless
if !hhmmss {
return ZeroDatetime, errors.Trace(ErrWrongValue.GenWithStack(DateTimeStr, str))
}
if len(tzHour) != 0 {
deltaHour = int((tzHour[0]-'0')*10 + (tzHour[1] - '0'))
}
if len(tzMinute) != 0 {
deltaMinute = int((tzMinute[0]-'0')*10 + (tzMinute[1] - '0'))
}
// allowed delta range is [-14:00, 14:00], and we will intentionally reject -00:00
if deltaHour > 14 || deltaMinute > 59 || (deltaHour == 14 && deltaMinute != 0) || (tzSign == "-" && deltaHour == 0 && deltaMinute == 0) {
return ZeroDatetime, errors.Trace(ErrWrongValue.GenWithStackByArgs(DateTimeStr, str))
}
// by default, if the temporal string literal does not contain timezone information, it will be in the timezone
// specified by the time_zone system variable. However, if the timezone is specified in the string literal, we
// will use the specified timezone to interpret the string literal and convert it into the system timezone.
offset := deltaHour*60*60 + deltaMinute*60
if tzSign == "-" {
offset = -offset
}
loc := gotime.FixedZone(fmt.Sprintf("UTC%s%s:%s", tzSign, tzHour, tzMinute), offset)
t1, err := tmp.GoTime(loc)
if err != nil {
return ZeroDatetime, errors.Trace(err)
}
t1 = t1.In(sc.TimeZone)
tmp = FromGoTime(t1)
}

nt := NewTime(tmp, mysql.TypeDatetime, fsp)

Expand Down
Loading