From dc886dd620cd2c61965f68442330b82a83f386ce Mon Sep 17 00:00:00 2001 From: Chudaykin Alex Date: Tue, 22 Apr 2025 13:38:34 +0300 Subject: [PATCH 1/4] Adding SQL-compliant names to existing MAXVALUE/MINVALUE functions --- .../README.builtin_functions.txt | 35 ++++++++++++++++++- doc/sql.extensions/README.keywords | 2 ++ src/common/ParserTokens.h | 2 ++ src/dsql/parse.y | 4 +++ src/jrd/SysFunction.cpp | 2 ++ 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/doc/sql.extensions/README.builtin_functions.txt b/doc/sql.extensions/README.builtin_functions.txt index de368961eca..2ca9cfa277f 100644 --- a/doc/sql.extensions/README.builtin_functions.txt +++ b/doc/sql.extensions/README.builtin_functions.txt @@ -13,6 +13,7 @@ Authors: Alexey Karyakin Claudio Valderrama C. Alexander Peshkov + Alexey Chudaykin --- @@ -568,6 +569,22 @@ Example: See also: CHAR_TO_UUID and UUID_TO_CHAR +-------- +GREATEST +-------- + +Function: + Returns the maximum value of a list of values. + +Format: + GREATEST( [, ...] ) + +Example: + select greatest(v1, v2, 10) from x; + +See also: MAXVALUE + + ---- HASH ---- @@ -630,6 +647,22 @@ Example: select last_day(of week from date '2017-11-01') from rdb$database; +----- +LEAST +----- + +Function: + Returns the minimun value of a list of values. + +Format: + LEAST( [, ...] ) + +Example: + select least(v1, v2, 10) from x; + +See also: MINVALUE + + ---- LEFT ---- @@ -797,7 +830,7 @@ MINVALUE -------- Function: - Returns the minimun value of a list of values. + Returns the minimum value of a list of values. Format: MINVALUE( [, ...] ) diff --git a/doc/sql.extensions/README.keywords b/doc/sql.extensions/README.keywords index 5f0f040935b..a5dadc166b7 100644 --- a/doc/sql.extensions/README.keywords +++ b/doc/sql.extensions/README.keywords @@ -396,6 +396,8 @@ Firebird 6.0 Added as reserved words: CALL + GREATEST + LEAST Added as non-reserved words: diff --git a/src/common/ParserTokens.h b/src/common/ParserTokens.h index f5477a9cfdd..611b87f3fc7 100644 --- a/src/common/ParserTokens.h +++ b/src/common/ParserTokens.h @@ -237,6 +237,7 @@ PARSER_TOKEN(TOK_GEN_UUID, "GEN_UUID", true) PARSER_TOKEN(TOK_GLOBAL, "GLOBAL", false) PARSER_TOKEN(TOK_GRANT, "GRANT", false) PARSER_TOKEN(TOK_GRANTED, "GRANTED", true) +PARSER_TOKEN(TOK_GREATEST, "GREATEST", false) PARSER_TOKEN(TOK_GROUP, "GROUP", false) PARSER_TOKEN(TOK_HASH, "HASH", true) PARSER_TOKEN(TOK_HAVING, "HAVING", false) @@ -275,6 +276,7 @@ PARSER_TOKEN(TOK_LAST_VALUE, "LAST_VALUE", true) PARSER_TOKEN(TOK_LASTNAME, "LASTNAME", true) PARSER_TOKEN(TOK_LEAD, "LEAD", true) PARSER_TOKEN(TOK_LEADING, "LEADING", false) +PARSER_TOKEN(TOK_LEAST, "LEAST", false) PARSER_TOKEN(TOK_LEAVE, "LEAVE", true) PARSER_TOKEN(TOK_LEFT, "LEFT", false) PARSER_TOKEN(TOK_LEGACY, "LEGACY", true) diff --git a/src/dsql/parse.y b/src/dsql/parse.y index f871cf052c0..636633191cb 100644 --- a/src/dsql/parse.y +++ b/src/dsql/parse.y @@ -710,6 +710,8 @@ using namespace Firebird; %token NAMED_ARG_ASSIGN %token RTRIM %token UNLIST +%token GREATEST +%token LEAST // precedence declarations for expression evaluation @@ -8659,8 +8661,10 @@ system_function_std_syntax | EXP | FLOOR | GEN_UUID + | GREATEST | HEX_DECODE | HEX_ENCODE + | LEAST | LEFT | LN | LOG diff --git a/src/jrd/SysFunction.cpp b/src/jrd/SysFunction.cpp index 5f65a8c6371..cb2d2bdf5b2 100644 --- a/src/jrd/SysFunction.cpp +++ b/src/jrd/SysFunction.cpp @@ -6950,10 +6950,12 @@ const SysFunction SysFunction::functions[] = {"FIRST_DAY", 2, 2, true, setParamsFirstLastDay, makeFirstLastDayResult, evlFirstLastDay, (void*) funFirstDay}, {"FLOOR", 1, 1, true, setParamsDblDec, makeCeilFloor, evlFloor, NULL}, {"GEN_UUID", 0, 1, false, NULL, makeUuid, evlGenUuid, NULL}, + {"GREATEST", 1, -1, true, setParamsFromList, makeFromListResult, evlMaxMinValue, (void*) funMaxValue}, {"HASH", 1, 2, true, setParamsHash, makeHash, evlHash, NULL}, {"HEX_DECODE", 1, 1, true, NULL, makeDecodeHex, evlDecodeHex, NULL}, {"HEX_ENCODE", 1, 1, true, NULL, makeEncodeHex, evlEncodeHex, NULL}, {"LAST_DAY", 2, 2, true, setParamsFirstLastDay, makeFirstLastDayResult, evlFirstLastDay, (void*) funLastDay}, + {"LEAST", 1, -1, true, setParamsFromList, makeFromListResult, evlMaxMinValue, (void*) funMinValue}, {"LEFT", 2, 2, true, setParamsSecondInteger, makeLeftRight, evlLeft, NULL}, {"LN", 1, 1, true, setParamsDblDec, makeDblDecResult, evlLnLog10, (void*) funLnat}, {"LOG", 2, 2, true, setParamsDblDec, makeDblDecResult, evlLog, NULL}, From f3b6d9cb56e7f680638eab6c5f1a1c17655c53b5 Mon Sep 17 00:00:00 2001 From: Chudaykin Alex Date: Thu, 24 Apr 2025 09:22:44 +0300 Subject: [PATCH 2/4] Correcting the description. Adding words to keyword_or_column --- doc/sql.extensions/README.builtin_functions.txt | 5 +++++ src/dsql/parse.y | 2 ++ 2 files changed, 7 insertions(+) diff --git a/doc/sql.extensions/README.builtin_functions.txt b/doc/sql.extensions/README.builtin_functions.txt index 2ca9cfa277f..5eec645462b 100644 --- a/doc/sql.extensions/README.builtin_functions.txt +++ b/doc/sql.extensions/README.builtin_functions.txt @@ -811,6 +811,7 @@ Examples: where rdb$db_key >= make_dbkey('SOMETABLE', 0, 0, 5) and rdb$db_key < make_dbkey('SOMETABLE', 0, 1, 5) + -------- MAXVALUE -------- @@ -824,6 +825,8 @@ Format: Example: select maxvalue(v1, v2, 10) from x; +See also: GREATEST + -------- MINVALUE @@ -838,6 +841,8 @@ Format: Example: select minvalue(v1, v2, 10) from x; +See also: LEAST + --- MOD diff --git a/src/dsql/parse.y b/src/dsql/parse.y index 636633191cb..52eccd1c7f6 100644 --- a/src/dsql/parse.y +++ b/src/dsql/parse.y @@ -4597,6 +4597,8 @@ keyword_or_column | CALL | LTRIM | RTRIM + | GREATEST + | LEAST ; col_opt From 61c4314f12e52f5738163f0f4f2d629835698e98 Mon Sep 17 00:00:00 2001 From: Chudaykin Alex Date: Tue, 29 Apr 2025 14:16:18 +0300 Subject: [PATCH 3/4] Improved description --- doc/sql.extensions/README.builtin_functions.txt | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/sql.extensions/README.builtin_functions.txt b/doc/sql.extensions/README.builtin_functions.txt index 5eec645462b..150913ea735 100644 --- a/doc/sql.extensions/README.builtin_functions.txt +++ b/doc/sql.extensions/README.builtin_functions.txt @@ -582,7 +582,8 @@ Format: Example: select greatest(v1, v2, 10) from x; -See also: MAXVALUE +Notes: + This function is a SQL-compliant alias to the MAXVALUE function. They work identically. ---- @@ -660,7 +661,8 @@ Format: Example: select least(v1, v2, 10) from x; -See also: MINVALUE +Notes: + This function is a SQL-compliant alias to the MINVALUE function. They work identically. ---- @@ -825,7 +827,8 @@ Format: Example: select maxvalue(v1, v2, 10) from x; -See also: GREATEST +Notes: + This function is a legacy name for the SQL-compliant GREATEST function. They work identically. -------- @@ -841,7 +844,8 @@ Format: Example: select minvalue(v1, v2, 10) from x; -See also: LEAST +Notes: + This function is a legacy name for the SQL-compliant LEAST function. They work identically. --- From 63b5dbb003741a663553a1227b70f206dd3b8dbc Mon Sep 17 00:00:00 2001 From: Chudaykin Alex Date: Mon, 12 May 2025 14:36:47 +0300 Subject: [PATCH 4/4] Add support non-decimal integer literals and underscores in numeric literals --- ...EADME.decimal_and_non_decimal_literals.txt | 131 ++++ doc/sql.extensions/README.hex_literals.txt | 6 + src/dsql/Parser.cpp | 634 ++++++++---------- src/dsql/Parser.h | 8 + src/dsql/chars.h | 44 +- 5 files changed, 466 insertions(+), 357 deletions(-) create mode 100644 doc/sql.extensions/README.decimal_and_non_decimal_literals.txt diff --git a/doc/sql.extensions/README.decimal_and_non_decimal_literals.txt b/doc/sql.extensions/README.decimal_and_non_decimal_literals.txt new file mode 100644 index 00000000000..ed384475858 --- /dev/null +++ b/doc/sql.extensions/README.decimal_and_non_decimal_literals.txt @@ -0,0 +1,131 @@ +============================================= +Decimal integer literals +Non-decimal integer literals (SQ:2023 T661) +Underscores in numeric literals (SQ:2023 T662) +============================================== + +Supports unsigned hexadecimal integers, unsigned octal integers, and unsigned binary integers. +Also support for underscores in numeric and non-decimal literals + +Authors: + Alexey Chudaykin + +Syntax rules: + + ::= + + !! U+002B + + ::= + - !! U+002D + + ::= + . !! U+002E + + ::= + _ !! U+005F + + ::= + 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F | a | b | c | d | e | f + + ::= + 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 + + ::= + 0 | 1 + + ::= + [ ] + + ::= + | + + ::= + | [ ] | + + + ::= + | + + ::= + E + + ::= + + + ::= + + + ::= + [ ] + + ::= + [ ] + + ::= + | | | + + + ::= + [ { [ ] }... ] + + ::= + 0X { [ ] }... + + ::= + 0O { [ ] }... + + ::= + 0B { [ ] }... + +Notes (non-decimal integer literals): + 1. The standard allows non-decimal literals in the mantissa of an exponential number entry. + For example (0xAAAe10), but there may be a conflict (0xEEE10). Therefore, a restriction on + the use of non-decimal literals in the exponential number record is introduced. + 2. The value is a numeric value defined by applying the usual + mathematical interpretation of positional hexadecimal notation to a string that is an + unsigned hexadecimal integer. Similarly for an unsigned octal integer and an + unsigned binary integer. + 3. To represent negative values, place a minus sign in front of an unsigned hexadecimal literal. + Similarly for an unsigned octal integer and an unsigned binary integer. Similarly for an + unsigned octal integer and an unsigned binary integer. + 4. Numbers with type SMALLINT cannot be written in hexadecimal, strictly speaking, + since even 0x1 evaluates to INTEGER. However, if you write a positive integer within + the 16-bit range 0x0000 (decimal zero) to 0x7FFF (decimal 32767), it will be converted to + SMALLINT transparently. Similarly for an unsigned octal integer and an + unsigned binary integer. + +Notes (underscores in numeric literals): + 1. Limitations for non-decimal integer literals: + 1.1. It is considered unacceptable for there to be two or more consecutive underscores; + 1.2. Underscores are not allowed after the last character. + 2. Limitations for decimal integer literals: + 1.1. Underscores before the first character and after the last character are not allowed; + 1.2. Underscores are not permitted before or after the symbol; + 1.3. Underscores are not allowed before or after the character; + 1.4. Underscores are not allowed after the last character. + +Examples (non-decimal integer literals): + 1. Unsigned binary integer: + 1.1. select 0b11010100, 0B11010100 from rdb$database; --> 212; + 1.2. select 0b0000000 from rdb$database; --> 0; + 1.3. select -0b11010100, -0B11010100 from rdb$database; --> -212. + 2. Unsigned octal intege: + 2.1. select 0o12345670, 0O12345670 from rdb$database; --> 2739128; + 2.2. select 0o00000000 from rdb$database; --> 0; + 2.3. select -0o12345670, -0O12345670 from rdb$database; --> -2739128. + 3. Unsigned hexadecimal integer: + 3.1. select 0xABC123, 0XABC123 from rdb$database; --> 11256099; + 3.2. select 0x00000000 from rdb$database; --> 0; + 3.3. select -0xABC123, -0XABC123 from rdb$database; --> -11256099. + 3.4. select 0x7FFFFFFFFF from rdb$database; --> 2147483647 type INTEGER + 3.5. select 0x80000000 from rdb$database; --> 2147483648 type BIGINT + 3.5. select 0x7FFFFFFFFFFFFFFF from rdb$database; --> 9223372036854775807 type BIGINT + 3.6. select 0x8000000000000000 from rdb$database; --> 9223372036854775808 type INT128 + +Examples (underscores in numeric literals): + 1. For non-decimal integer literals: + 1.1 Permitted: select 0x_FFFF, 0xFF_FF, 0x_FF_FF, 0x_FF_FF from rdb$database; + 1.2 Forbidden: select 0x_FF__FF, 0xFFFFFF_, 0x_FF_FF_FF_ from rdb$database; + 2. For decimal integer literals: + 2.1 Permitted: select 10_10, 10_10.10_10, 10.10E-10_0 from rdb$database; + 2.2 Forbidden: select _1010, 100_, 1010._1010, 1010_.1, 10.10E_-100_ from rdb$database; diff --git a/doc/sql.extensions/README.hex_literals.txt b/doc/sql.extensions/README.hex_literals.txt index add4a1b3acd..67093ae60b2 100644 --- a/doc/sql.extensions/README.hex_literals.txt +++ b/doc/sql.extensions/README.hex_literals.txt @@ -7,6 +7,7 @@ Support for hexadecimal numeric and binary string literals. Authors: Bill Oliver Adriano dos Santos Fernandes + Alexey Chudaykin Syntax: @@ -35,3 +36,8 @@ Notes (binary string literal): Example: select 0x10, cast('0x0F0000000' as bigint) from rdb$database; select x'deadbeef' from rdb$database; + +Notes by Alexey Chudaykin: + ISO/IEC 9075-2:2023(E) introduces rules for hexadecimal numbers, so this document should be + considered obsolete in the part. Refer to the new document + README.decimal_and_non_decimal_literals. diff --git a/src/dsql/Parser.cpp b/src/dsql/Parser.cpp index c9d267c55c3..523691b2cbe 100644 --- a/src/dsql/Parser.cpp +++ b/src/dsql/Parser.cpp @@ -292,7 +292,7 @@ int Parser::yylex() bool Parser::yylexSkipSpaces() { - UCHAR tok_class; + USHORT tok_class; SSHORT c; // Find end of white space and skip comments @@ -399,7 +399,7 @@ int Parser::yylexAux() MemoryPool& pool = *tdbb->getDefaultPool(); SSHORT c = lex.ptr[-1]; - UCHAR tok_class = classes(c); + USHORT tok_class = classes(c); char string[MAX_TOKEN_LEN]; // Depending on tok_class of token, parse token @@ -408,6 +408,10 @@ int Parser::yylexAux() if (tok_class & CHR_INTRODUCER) { + // restriction for underscores before numeric literals + if ((classes(*lex.ptr) & CHR_DIGIT) || *lex.ptr == '.') + exceptionNumericLiterals(Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); + // The Introducer (_) is skipped, all other idents are copied // to become the name of the character set. char* p = string; @@ -799,417 +803,373 @@ int Parser::yylexAux() lex.ptr = lex.last_token + 1; } - // Hexadecimal numeric constants - 0xBBBBBB - // - // where the '0' and the 'X' (or 'x') are literal, followed - // by a set of nibbles, using 0-9, a-f, or A-F. Odd numbers - // of nibbles assume a leading '0'. The result is converted - // to an integer, and the result returned to the caller. The - // token is identified as a NUMBER32BIT if it's a 32-bit or less - // value, or a NUMBER64INT if it requires a 64-bit number. - if (c == '0' && lex.ptr + 1 < lex.end && (*lex.ptr == 'x' || *lex.ptr == 'X') && - (classes(lex.ptr[1]) & CHR_HEX)) - { - bool hexerror = false; - - // Remember where we start from, to rescan later. - // Also we'll need to know the length of the buffer. - - ++lex.ptr; // Skip the 'X' and point to the first digit - const char* hexstring = lex.ptr; - int charlen = 0; + // Non-decimal integer literals (SQL:2023 T661) + // Underscores in numeric literal support (SQ:2023 T662) + // See README.decimal_and_non_decimal_literals - // Time to scan the string. Make sure the characters are legal, - // and find out how long the hex digit string is. + if (c == '0' && lex.ptr + 1 < lex.end) + { + auto base = 0; + SSHORT currExpcChar; - while (lex.ptr < lex.end) + if (*lex.ptr == 'x' || *lex.ptr == 'X') { - c = *lex.ptr; - - if (!(classes(c) & CHR_HEX)) // End of digit string - break; - - ++charlen; // Okay, just count 'em - ++lex.ptr; // and advance... - - if (charlen > 32) // Too many digits... - { - hexerror = true; - break; - } + base = 4; // 2^4 0b1111 + currExpcChar = CHR_HEX; } - - // we have a valid hex token. Now give it back, either as - // an NUMBER32BIT or NUMBER64BIT. - if (!hexerror) + else if (*lex.ptr == 'o' || *lex.ptr == 'O') { - if (charlen > 16) - { - // we deal with int128 - fb_assert(charlen <= 32); // charlen is always <= 32, see 10-15 lines upper - - Firebird::string sbuff(hexstring, charlen); - sbuff.insert(0, "0X"); + base = 3; // 2^3 0b111 + currExpcChar = CHR_OCT; + } + else if (*lex.ptr == 'b' || *lex.ptr == 'B') + { + base = 1; // 2^1 0b1 + currExpcChar = CHR_BIN; + } - yylval.lim64ptr = newLim64String(sbuff, 0); + if (base) + { + const auto decimalConversion = 10; + auto isLastIntroducer = false; + Int128 value128; + value128.set(0.0); - return TOK_NUM128; - } + const CInt128 MAX_VALUE(MAX_Int128 >> base); - // if charlen > 8 (something like FFFF FFFF 0, w/o the spaces) - // then we have to return a NUMBER64BIT. We'll make a string - // node here, and let make.cpp worry about converting the - // string to a number and building the node later. - else if (charlen > 8) + // Skip the 'X' or 'O' or 'B' and point to the first digit + for (++lex.ptr; lex.ptr < lex.end; lex.ptr++) { - char cbuff[32]; - fb_assert(charlen <= 16); // charlen is always <= 16, see 10-15 lines upper - cbuff[0] = 'X'; - fb_utils::copy_terminate(&cbuff[1], hexstring, charlen + 1); - - char* p = &cbuff[1]; - UCHAR byte = 0; - bool nibble = strlen(p) & 1; - - yylval.scaledNumber.number = 0; - yylval.scaledNumber.scale = 0; - yylval.scaledNumber.hex = true; + c = *lex.ptr; - while (*p) + if ((classes(c) & CHR_INTRODUCER)) { - if ((*p >= 'a') && (*p <= 'f')) - *p = UPPER(*p); - - // Now convert the character to a nibble - SSHORT c; - - if (*p >= 'A') - c = (*p - 'A') + 10; - else - c = (*p - '0'); - - if (nibble) + if (isLastIntroducer) { - byte = (byte << 4) + (UCHAR) c; - nibble = false; - yylval.scaledNumber.number = (yylval.scaledNumber.number << 8) + byte; - } - else - { - byte = c; - nibble = true; + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); } - ++p; + isLastIntroducer = true; } + else if ((classes(c) & currExpcChar)) + { + // check overflow + if (value128 > MAX_VALUE) + exceptionNumericLiterals(Firebird::string("Overflow of the number")); - // The return value can be a negative number. - return TOK_NUMBER64BIT; - } - else - { - // we have an integer value. we'll return NUMBER32BIT. - // but we have to make a number value to be compatible - // with existing code. - - // See if the string length is odd. If so, - // we'll assume a leading zero. Then figure out the length - // of the actual resulting hex string. Allocate a second - // temporary buffer for it. - - bool nibble = (charlen & 1); // IS_ODD(temp.length) - - // Re-scan over the hex string we got earlier, converting - // adjacent bytes into nibble values. Every other nibble, - // write the saved byte to the temp space. At the end of - // this, the temp.space area will contain the binary - // representation of the hex constant. + auto ch = UPPER(c); + if (ch >= 'A') + ch = (ch - 'A') + decimalConversion; + else + ch = (ch - '0'); - UCHAR byte = 0; - SINT64 value = 0; + value128 *= 1 << base; + value128 += ch; - for (int i = 0; i < charlen; i++) + isLastIntroducer = false; + } + else if ((classes(c) & CHR_IDENT) && !(classes(c) & CHR_BRACE)) { - c = UPPER(hexstring[i]); + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); + } + else // We have reached the separator + break; + } - // Now convert the character to a nibble + // Error 0x + const auto minimalLength = 3U; + if ((lex.ptr - lex.last_token) < minimalLength) + { + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token)); + } - if (c >= 'A') - c = (c - 'A') + 10; - else - c = (c - '0'); + // Error of having '_' at the end + if (isLastIntroducer) + { + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token)); + } - if (nibble) - { - byte = (byte << 4) + (UCHAR) c; - nibble = false; - value = (value << 8) + byte; - } - else - { - byte = c; - nibble = true; - } - } + Int128 tmp; + tmp.set(MAX_SINT64, 0); + if (value128 > tmp) + { + Firebird::string strValue; + value128.toString(0, strValue); + yylval.lim64ptr = newLim64String(strValue, 0); + return TOK_NUM128; + } - yylval.int32Val = (SLONG) value; + tmp.set(MAX_SLONG); + if (value128 > tmp) + { + yylval.scaledNumber.number = value128.toInt64(0); + yylval.scaledNumber.scale = 0; + yylval.scaledNumber.hex = false; + return TOK_NUMBER64BIT; + } + else + { + yylval.int32Val = (SLONG)value128.toInteger(0); return TOK_NUMBER32BIT; - } // integer value - } // if (!hexerror)... - - // If we got here, there was a parsing error. Set the - // position back to where it was before we messed with - // it. Then fall through to the next thing we might parse. - - c = *lex.last_token; - lex.ptr = lex.last_token + 1; - } // headecimal numeric constants + } + } + } if ((tok_class & CHR_DIGIT) || ((c == '.') && (lex.ptr < lex.end) && (classes(*lex.ptr) & CHR_DIGIT))) { - // The following variables are used to recognize kinds of numbers. - - bool have_error = false; // syntax error or value too large - bool have_digit = false; // we've seen a digit - bool have_decimal = false; // we've seen a '.' - bool have_exp = false; // digit ... [eE] - bool have_exp_sign = false; // digit ... [eE] {+-] - bool have_exp_digit = false; // digit ... [eE] ... digit - bool have_overflow = false; // value of digits > MAX_SINT64 - bool positive_overflow = false; // number is exactly (MAX_SINT64 + 1) - bool have_128_over = false; // value of digits > MAX_INT128 - FB_UINT64 number = 0; - Int128 num128; - int expVal = 0; - FB_UINT64 limit_by_10 = MAX_SINT64 / 10; - int scale = 0; - int expSign = 1; + Firebird::string pureString; + auto isLastIntroducer = false; + SCHAR scale = 0; + auto exponentValue = 0; + auto isOverExponent64b = false; + auto isOverMantisa64b = false; + auto isOverMantisa128b = false; + auto signExponent = 0; + + Int128 mantisaValue; + mantisaValue.set(0.0); + + const auto decimalConversion = 10; + const CInt128 MAX_MANTISA_128(MAX_Int128 / decimalConversion); + const CInt128 MAX_MANTISA_64(MAX_SINT64 / decimalConversion); + + enum + { + state_mantisa = 0, + state_precision, + state_exponent, + } state = state_mantisa; for (--lex.ptr; lex.ptr < lex.end; lex.ptr++) { c = *lex.ptr; - if (have_exp_digit && (! (classes(c) & CHR_DIGIT))) - // First non-digit after exponent and digit terminates the token. - break; - if (have_exp_sign && (! (classes(c) & CHR_DIGIT))) + if (classes(c) & CHR_INTRODUCER) { - // only digits can be accepted after "1E-" - have_error = true; - break; - } + if (isLastIntroducer) + { + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); + } - if (have_exp) - { - // We've seen e or E, but nothing beyond that. - if ( ('-' == c) || ('+' == c) ) + const char lastSymbol = *(lex.ptr - 1); + if ((lastSymbol == '.') || (UPPER(lastSymbol) == 'E') || (lastSymbol == '-') || + (lastSymbol == '+')) { - have_exp_sign = true; - if ('-' == c) - expSign = -1; + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); } - else if ( classes(c) & CHR_DIGIT ) + + isLastIntroducer = true; + continue; + } + if (classes(c) & CHR_DIGIT) + { + pureString += static_cast(c); + auto ch = (c - '0'); + + if (state == state_exponent) { - // We have a digit: we haven't seen a sign yet, but it's too late now. - have_exp_digit = have_exp_sign = true; - if (!have_overflow) + if (signExponent == 0) + signExponent = 1; + + exponentValue *= decimalConversion; + + if (signExponent == 1) + exponentValue += ch; + else + exponentValue -= ch; + + if (!isOverExponent64b) { - expVal = expVal * 10 + (c - '0'); - if (expVal > DBL_MAX_10_EXP) - have_overflow = true; + if (exponentValue > DBL_MAX_10_EXP || exponentValue < DBL_MIN_10_EXP) + isOverExponent64b = true; } + else if (exponentValue > DECQUAD_Emax || exponentValue < DECQUAD_Emin) + exceptionNumericLiterals(Firebird::string("Overflow of the exponent")); } else { - // end of the token - have_error = true; - break; - } - } - else if ('.' == c) - { - if (!have_decimal) - have_decimal = true; - else - { - have_error = true; - break; - } - } - else if (classes(c) & CHR_DIGIT) - { - // Before computing the next value, make sure there will be no overflow. + if (!isOverMantisa64b) + { + if (mantisaValue >= MAX_MANTISA_64 && + ((mantisaValue > MAX_MANTISA_64) || (c >= '8'))) + isOverMantisa64b = true; + } + else if (!isOverMantisa128b) + { + if ((mantisaValue >= MAX_MANTISA_128) && + ((mantisaValue > MAX_MANTISA_128) || (c >= '8'))) + { + isOverMantisa128b = true; + isOverExponent64b = true; + } + } - if (!have_overflow) - { - have_digit = true; + if (!isOverMantisa64b || !isOverMantisa128b) + { + mantisaValue *= decimalConversion; + mantisaValue += ch; + } - if (number >= limit_by_10) + if (state == state_precision) { - // possibility of an overflow - if ((number > limit_by_10) || (c >= '8')) + --scale; + // protection against too low precision over 15 characters + // next, we assume that the number is "Decimal 128-bit" + if (-scale > DBL_DIG) { - have_overflow = true; - fb_assert(number <= MAX_SINT64); - num128.set((SINT64)number, 0); - if ((number == limit_by_10) && (c == '8')) - positive_overflow = true; + isOverMantisa128b = true; + isOverExponent64b = true; } } } - else + } + else if (c == '.') + { + if (isLastIntroducer) { - positive_overflow = false; - if (!have_128_over) - { - static const CInt128 MAX_BY10(MAX_Int128 / 10); - if ((num128 >= MAX_BY10) && ((num128 > MAX_BY10) || (c >= '8'))) - have_128_over = true; - } + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); } - if (!have_overflow) - number = number * 10 + (c - '0'); - else if (!have_128_over) + pureString += static_cast(c); + + if (state == state_mantisa) + state = state_precision; + else { - num128 *= 10; - num128 += (c - '0'); + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); } - - if (have_decimal) - --scale; } - else if ( (('E' == c) || ('e' == c)) && have_digit ) - have_exp = true; - else - // Unexpected character: this is the end of the number. - break; - } - - // We're done scanning the characters: now return the right kind - // of number token, if any fits the bill. - - if (!have_error) - { - fb_assert(have_digit); - - if (positive_overflow) - have_overflow = false; - - if (scale < MIN_SCHAR || scale > MAX_SCHAR) + else if (UPPER(c) == 'E') { - have_overflow = true; - positive_overflow = false; - have_128_over = true; - } + if (isLastIntroducer) + { + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); + } - // check for a more complex overflow case - if ((!have_overflow) && (expSign > 0) && (expVal > -scale)) - { - expVal += scale; - double maxNum = DBL_MAX / pow(10.0, expVal); - if (double(number) > maxNum) + pureString += static_cast(c); + + if (state != state_exponent) + state = state_exponent; + else { - have_overflow = true; - positive_overflow = false; - have_128_over = true; + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); } } - - // Special case - on the boarder of positive number - if (positive_overflow) + else if ((classes(c) & CHR_IDENT) && (c != '{') && (c != '}')) { - yylval.lim64ptr = newLim64String( - Firebird::string(lex.last_token, lex.ptr - lex.last_token), scale); - lex.last_token_bk = lex.last_token; - lex.line_start_bk = lex.line_start; - lex.lines_bk = lex.lines; - - return scale ? TOK_LIMIT64_NUMBER : TOK_LIMIT64_INT; + exceptionNumericLiterals( + Firebird::string(lex.last_token, lex.ptr - lex.last_token + 1)); } - - // Should we use floating point type? - if (have_exp_digit || have_128_over) + else // We have reached the separator { - yylval.stringPtr = newString( - Firebird::string(lex.last_token, lex.ptr - lex.last_token)); - lex.last_token_bk = lex.last_token; - lex.line_start_bk = lex.line_start; - lex.lines_bk = lex.lines; + if ((c == '-') || (c == '+')) + { + if (state == state_exponent && signExponent == 0) + { + pureString += static_cast(c); - return have_overflow ? TOK_DECIMAL_NUMBER : TOK_FLOAT_NUMBER; + if (c == '-') + signExponent = -1; + else + signExponent = 1; + continue; + } + } + break; } - // May be 128-bit integer? - if (have_overflow) - { - yylval.lim64ptr = newLim64String( - Firebird::string(lex.last_token, lex.ptr - lex.last_token), scale); - lex.last_token_bk = lex.last_token; - lex.line_start_bk = lex.line_start; - lex.lines_bk = lex.lines; + isLastIntroducer = false; + } + // We have reached the separator or the end of the line - return TOK_NUM128; - } + if (isLastIntroducer) + exceptionNumericLiterals(Firebird::string(lex.last_token, lex.ptr - lex.last_token)); - if (!have_exp) - { - // We should return some kind (scaled-) integer type - // except perhaps in dialect 1. + if (state == state_exponent && signExponent == 0) + exceptionNumericLiterals(Firebird::string(lex.last_token, lex.ptr - lex.last_token)); - if (!have_decimal && (number <= MAX_SLONG)) - { - yylval.int32Val = (SLONG) number; - //printf ("parse.y %p %d\n", yylval.legacyStr, number); - return TOK_NUMBER32BIT; - } - else - { - /* We have either a decimal point with no exponent - or a string of digits whose value exceeds MAX_SLONG: - the returned type depends on the client dialect, - so warn of the difference if the client dialect is - SQL_DIALECT_V6_TRANSITION. - */ - - if (SQL_DIALECT_V6_TRANSITION == client_dialect) - { - /* Issue a warning about the ambiguity of the numeric - * numeric literal. There are multiple calls because - * the message text exceeds the 119-character limit - * of our message database. - */ - ERRD_post_warning(Arg::Warning(isc_dsql_warning_number_ambiguous) << - Arg::Str(Firebird::string(lex.last_token, lex.ptr - lex.last_token))); - ERRD_post_warning(Arg::Warning(isc_dsql_warning_number_ambiguous1)); - } + if (state == state_precision && scale == 0) + exceptionNumericLiterals(Firebird::string(lex.last_token, lex.ptr - lex.last_token)); - lex.last_token_bk = lex.last_token; - lex.line_start_bk = lex.line_start; - lex.lines_bk = lex.lines; + lex.last_token_bk = lex.last_token; + lex.line_start_bk = lex.line_start; + lex.lines_bk = lex.lines; - if (client_dialect < SQL_DIALECT_V6_TRANSITION) - { - yylval.stringPtr = newString( - Firebird::string(lex.last_token, lex.ptr - lex.last_token)); - return TOK_FLOAT_NUMBER; - } + // Any with an exponent "E" or a very big number + if (state == state_exponent || isOverMantisa128b) + { + // Check for a more complex overflow case + if ((!isOverExponent64b) && (signExponent == 1) && (exponentValue > (-scale))) + { + const auto degreeBase = 10.0; + exponentValue += scale; + double check_num = DBL_MAX / pow(degreeBase, exponentValue); + if (mantisaValue.toDouble() > check_num) + isOverExponent64b = true; + } - yylval.scaledNumber.number = number; - yylval.scaledNumber.scale = scale; - yylval.scaledNumber.hex = false; + yylval.stringPtr = newString(pureString); + // Long double or double + return isOverExponent64b ? TOK_DECIMAL_NUMBER : TOK_FLOAT_NUMBER; + } + + // 128-bit + if (isOverMantisa64b) + { + yylval.lim64ptr = newLim64String(pureString, scale); + return TOK_NUM128; + } - if (have_decimal) - return TOK_SCALEDINT; + Int128 tmp; + if (state != state_precision) + { + tmp.set(MAX_SLONG, 0); + if (tmp >= mantisaValue) + { + // A natural 32 bit number + yylval.int32Val = (SLONG)mantisaValue.toInteger(0); + return TOK_NUMBER32BIT; + } + } + /* We have either a decimal point with no exponent + or a string of digits whose value exceeds MAX_SLONG: + the returned type depends on the client dialect, + so warn of the difference if the client dialect is + SQL_DIALECT_V6_TRANSITION. + */ + if (SQL_DIALECT_V6_TRANSITION == client_dialect) + { + /* Issue a warning about the ambiguity of the numeric + * numeric literal. There are multiple calls because + * the message text exceeds the 119-character limit + * of our message database. + */ + ERRD_post_warning(Arg::Warning(isc_dsql_warning_number_ambiguous) << Arg::Str( + Firebird::string(lex.last_token, lex.ptr - lex.last_token))); + ERRD_post_warning(Arg::Warning(isc_dsql_warning_number_ambiguous1)); + } - return TOK_NUMBER64BIT; - } - } // else if (!have_exp) - } // if (!have_error) + if (client_dialect < SQL_DIALECT_V6_TRANSITION) + { + yylval.stringPtr = newString(pureString); + return TOK_FLOAT_NUMBER; + } - // we got some kind of error or overflow, so don't recognize this - // as a number: just pass it through to the next part of the lexer. + yylval.scaledNumber.number = mantisaValue.toInt64(0); + yylval.scaledNumber.scale = scale; + yylval.scaledNumber.hex = false; + return state == state_precision ? TOK_SCALEDINT : TOK_NUMBER64BIT; } // Restore the status quo ante, before we started our unsuccessful diff --git a/src/dsql/Parser.h b/src/dsql/Parser.h index 51cb5195623..ac743adae8c 100644 --- a/src/dsql/Parser.h +++ b/src/dsql/Parser.h @@ -347,6 +347,14 @@ class Parser : public Firebird::PermanentStorage } void checkTimeDialect(); + void exceptionNumericLiterals(const Firebird::string& text) + { + using namespace Firebird; + ERRD_post(Arg::Gds(isc_sqlerr) << Arg::Num(-104) << Arg::Gds(isc_dsql_token_unk_err) + << Arg::Num(yyposn.firstLine) << Arg::Num(yyposn.firstColumn) + << Arg::Gds(isc_random) << Arg::Str(text)); + } + // start - defined in btyacc_fb.ske private: static void yySCopy(YYSTYPE* to, YYSTYPE* from, int size); diff --git a/src/dsql/chars.h b/src/dsql/chars.h index 47a768bf49c..23ff633ebc2 100644 --- a/src/dsql/chars.h +++ b/src/dsql/chars.h @@ -21,17 +21,21 @@ * Contributor(s): ______________________________________. */ -const SCHAR CHR_LETTER = 1; -const SCHAR CHR_DIGIT = 2; -const SCHAR CHR_IDENT = 4; -const SCHAR CHR_QUOTE = 8; -const SCHAR CHR_WHITE = 16; -const SCHAR CHR_HEX = 32; -const SCHAR CHR_INTRODUCER = 64; +const SSHORT CHR_LETTER = (1 << 0); +const SSHORT CHR_DIGIT = (1 << 1); +const SSHORT CHR_IDENT = (1 << 2); +const SSHORT CHR_QUOTE = (1 << 3); +const SSHORT CHR_WHITE = (1 << 4); +const SSHORT CHR_HEX = (1 << 5); +const SSHORT CHR_INTRODUCER = (1 << 6); +const SSHORT CHR_BIN = (1 << 7); +const SSHORT CHR_OCT = (1 << 8); +const SSHORT CHR_BRACE = (1 << 9); + // Use the functions at the end of this file; do not reference the array directly. -static const SCHAR classes_array[] = { +static const SSHORT classes_array[] = { /* 000 */ 0, /* 001 */ 0, /* 002 */ 0, @@ -80,14 +84,14 @@ static const SCHAR classes_array[] = { /* 045 - */ 0, /* 046 . */ 0, /* 047 / */ 0, -/* 048 0 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, -/* 049 1 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, -/* 050 2 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, -/* 051 3 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, -/* 052 4 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, -/* 053 5 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, -/* 054 6 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, -/* 055 7 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, +/* 048 0 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT | CHR_BIN, +/* 049 1 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT | CHR_BIN, +/* 050 2 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT, +/* 051 3 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT, +/* 052 4 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT, +/* 053 5 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT, +/* 054 6 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT, +/* 055 7 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX | CHR_OCT, /* 056 8 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, /* 057 9 */ 0 | CHR_DIGIT | CHR_IDENT | CHR_HEX, /* 058 : */ 0, @@ -155,9 +159,9 @@ static const SCHAR classes_array[] = { /* 120 x */ 0 | CHR_LETTER | CHR_IDENT, /* 121 y */ 0 | CHR_LETTER | CHR_IDENT, /* 122 z */ 0 | CHR_LETTER | CHR_IDENT, -/* 123 { */ 0 | CHR_LETTER | CHR_IDENT, +/* 123 { */ 0 | CHR_LETTER | CHR_IDENT | CHR_BRACE, /* 124 | */ 0, -/* 125 } */ 0 | CHR_LETTER | CHR_IDENT, +/* 125 } */ 0 | CHR_LETTER | CHR_IDENT | CHR_BRACE, /* 126 ~ */ 0, /* 127 */ 0, /* 128 */ 0, @@ -290,12 +294,12 @@ static const SCHAR classes_array[] = { /* 255 */ 0 }; -inline SCHAR classes(int idx) +inline SSHORT classes(int idx) { return classes_array[(UCHAR) idx]; } -inline SCHAR classes(UCHAR idx) +inline SSHORT classes(UCHAR idx) { return classes_array[idx]; }