Skip to content

Commit cacdf33

Browse files
authored
Add support for unit on char length units for small character string types. (#663)
This results in complete support for ANSI CHARACTER, CHAR, CHARACTER VARYING, CHAR VARYING, and VARCHAR.
1 parent 7776726 commit cacdf33

File tree

6 files changed

+244
-34
lines changed

6 files changed

+244
-34
lines changed

src/ast/data_type.rs

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@ use super::value::escape_single_quote_string;
2626
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
2727
pub enum DataType {
2828
/// Fixed-length character type e.g. CHARACTER(10)
29-
Character(Option<u64>),
29+
Character(Option<CharacterLength>),
3030
/// Fixed-length char type e.g. CHAR(10)
31-
Char(Option<u64>),
31+
Char(Option<CharacterLength>),
3232
/// Character varying type e.g. CHARACTER VARYING(10)
33-
CharacterVarying(Option<u64>),
33+
CharacterVarying(Option<CharacterLength>),
3434
/// Char varying type e.g. CHAR VARYING(10)
35-
CharVarying(Option<u64>),
35+
CharVarying(Option<CharacterLength>),
3636
/// Variable-length character type e.g. VARCHAR(10)
37-
Varchar(Option<u64>),
37+
Varchar(Option<CharacterLength>),
3838
/// Variable-length character type e.g. NVARCHAR(10)
3939
Nvarchar(Option<u64>),
4040
/// Uuid type
@@ -133,17 +133,14 @@ pub enum DataType {
133133
impl fmt::Display for DataType {
134134
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
135135
match self {
136-
DataType::Character(size) => {
137-
format_type_with_optional_length(f, "CHARACTER", size, false)
138-
}
139-
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
136+
DataType::Character(size) => format_character_string_type(f, "CHARACTER", size),
137+
DataType::Char(size) => format_character_string_type(f, "CHAR", size),
140138
DataType::CharacterVarying(size) => {
141-
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
142-
}
143-
DataType::CharVarying(size) => {
144-
format_type_with_optional_length(f, "CHAR VARYING", size, false)
139+
format_character_string_type(f, "CHARACTER VARYING", size)
145140
}
146-
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),
141+
142+
DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size),
143+
DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size),
147144
DataType::Nvarchar(size) => {
148145
format_type_with_optional_length(f, "NVARCHAR", size, false)
149146
}
@@ -247,6 +244,18 @@ fn format_type_with_optional_length(
247244
Ok(())
248245
}
249246

247+
fn format_character_string_type(
248+
f: &mut fmt::Formatter,
249+
sql_type: &str,
250+
size: &Option<CharacterLength>,
251+
) -> fmt::Result {
252+
write!(f, "{}", sql_type)?;
253+
if let Some(size) = size {
254+
write!(f, "({})", size)?;
255+
}
256+
Ok(())
257+
}
258+
250259
/// Timestamp and Time data types information about TimeZone formatting.
251260
///
252261
/// This is more related to a display information than real differences between each variant. To
@@ -324,3 +333,50 @@ impl fmt::Display for ExactNumberInfo {
324333
}
325334
}
326335
}
336+
337+
/// Information about [character length][1], including length and possibly unit.
338+
///
339+
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
340+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
341+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
342+
pub struct CharacterLength {
343+
/// Default (if VARYING) or maximum (if not VARYING) length
344+
pub length: u64,
345+
/// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
346+
pub unit: Option<CharLengthUnits>,
347+
}
348+
349+
impl fmt::Display for CharacterLength {
350+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
351+
write!(f, "{}", self.length)?;
352+
if let Some(unit) = &self.unit {
353+
write!(f, " {}", unit)?;
354+
}
355+
Ok(())
356+
}
357+
}
358+
359+
/// Possible units for characters, initially based on 2016 ANSI [standard][1].
360+
///
361+
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
362+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
363+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
364+
pub enum CharLengthUnits {
365+
/// CHARACTERS unit
366+
Characters,
367+
/// OCTETS unit
368+
Octets,
369+
}
370+
371+
impl fmt::Display for CharLengthUnits {
372+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
373+
match self {
374+
Self::Characters => {
375+
write!(f, "CHARACTERS")
376+
}
377+
Self::Octets => {
378+
write!(f, "OCTETS")
379+
}
380+
}
381+
}
382+
}

src/ast/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ use core::fmt;
2222
#[cfg(feature = "serde")]
2323
use serde::{Deserialize, Serialize};
2424

25-
pub use self::data_type::DataType;
26-
pub use self::data_type::ExactNumberInfo;
27-
pub use self::data_type::TimezoneInfo;
25+
pub use self::data_type::{
26+
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
27+
};
2828
pub use self::ddl::{
2929
AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef,
3030
ReferentialAction, TableConstraint,

src/keywords.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ define_keywords!(
123123
CHANGE,
124124
CHAR,
125125
CHARACTER,
126+
CHARACTERS,
126127
CHARACTER_LENGTH,
127128
CHARSET,
128129
CHAR_LENGTH,
@@ -372,6 +373,7 @@ define_keywords!(
372373
NVARCHAR,
373374
OBJECT,
374375
OCCURRENCES_REGEX,
376+
OCTETS,
375377
OCTET_LENGTH,
376378
OF,
377379
OFFSET,

src/parser.rs

Lines changed: 145 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3426,20 +3426,24 @@ impl<'a> Parser<'a> {
34263426
Ok(DataType::BigInt(optional_precision?))
34273427
}
34283428
}
3429-
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
3429+
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
34303430
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
34313431
Keyword::CHARACTER => {
34323432
if self.parse_keyword(Keyword::VARYING) {
3433-
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
3433+
Ok(DataType::CharacterVarying(
3434+
self.parse_optional_character_length()?,
3435+
))
34343436
} else {
3435-
Ok(DataType::Character(self.parse_optional_precision()?))
3437+
Ok(DataType::Character(self.parse_optional_character_length()?))
34363438
}
34373439
}
34383440
Keyword::CHAR => {
34393441
if self.parse_keyword(Keyword::VARYING) {
3440-
Ok(DataType::CharVarying(self.parse_optional_precision()?))
3442+
Ok(DataType::CharVarying(
3443+
self.parse_optional_character_length()?,
3444+
))
34413445
} else {
3442-
Ok(DataType::Char(self.parse_optional_precision()?))
3446+
Ok(DataType::Char(self.parse_optional_character_length()?))
34433447
}
34443448
}
34453449
Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)),
@@ -3680,6 +3684,31 @@ impl<'a> Parser<'a> {
36803684
}
36813685
}
36823686

3687+
pub fn parse_optional_character_length(
3688+
&mut self,
3689+
) -> Result<Option<CharacterLength>, ParserError> {
3690+
if self.consume_token(&Token::LParen) {
3691+
let character_length = self.parse_character_length()?;
3692+
self.expect_token(&Token::RParen)?;
3693+
Ok(Some(character_length))
3694+
} else {
3695+
Ok(None)
3696+
}
3697+
}
3698+
3699+
pub fn parse_character_length(&mut self) -> Result<CharacterLength, ParserError> {
3700+
let length = self.parse_literal_uint()?;
3701+
let unit = if self.parse_keyword(Keyword::CHARACTERS) {
3702+
Some(CharLengthUnits::Characters)
3703+
} else if self.parse_keyword(Keyword::OCTETS) {
3704+
Some(CharLengthUnits::Octets)
3705+
} else {
3706+
None
3707+
};
3708+
3709+
Ok(CharacterLength { length, unit })
3710+
}
3711+
36833712
pub fn parse_optional_precision_scale(
36843713
&mut self,
36853714
) -> Result<(Option<u64>, Option<u64>), ParserError> {
@@ -5337,7 +5366,9 @@ mod tests {
53375366

53385367
#[cfg(test)]
53395368
mod test_parse_data_type {
5340-
use crate::ast::{DataType, ExactNumberInfo, TimezoneInfo};
5369+
use crate::ast::{
5370+
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
5371+
};
53415372
use crate::dialect::{AnsiDialect, GenericDialect};
53425373
use crate::test_utils::TestedDialects;
53435374

@@ -5360,21 +5391,124 @@ mod tests {
53605391

53615392
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));
53625393

5363-
test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));
5394+
test_parse_data_type!(
5395+
dialect,
5396+
"CHARACTER(20)",
5397+
DataType::Character(Some(CharacterLength {
5398+
length: 20,
5399+
unit: None
5400+
}))
5401+
);
5402+
5403+
test_parse_data_type!(
5404+
dialect,
5405+
"CHARACTER(20 CHARACTERS)",
5406+
DataType::Character(Some(CharacterLength {
5407+
length: 20,
5408+
unit: Some(CharLengthUnits::Characters)
5409+
}))
5410+
);
5411+
5412+
test_parse_data_type!(
5413+
dialect,
5414+
"CHARACTER(20 OCTETS)",
5415+
DataType::Character(Some(CharacterLength {
5416+
length: 20,
5417+
unit: Some(CharLengthUnits::Octets)
5418+
}))
5419+
);
53645420

53655421
test_parse_data_type!(dialect, "CHAR", DataType::Char(None));
53665422

5367-
test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));
5423+
test_parse_data_type!(
5424+
dialect,
5425+
"CHAR(20)",
5426+
DataType::Char(Some(CharacterLength {
5427+
length: 20,
5428+
unit: None
5429+
}))
5430+
);
5431+
5432+
test_parse_data_type!(
5433+
dialect,
5434+
"CHAR(20 CHARACTERS)",
5435+
DataType::Char(Some(CharacterLength {
5436+
length: 20,
5437+
unit: Some(CharLengthUnits::Characters)
5438+
}))
5439+
);
5440+
5441+
test_parse_data_type!(
5442+
dialect,
5443+
"CHAR(20 OCTETS)",
5444+
DataType::Char(Some(CharacterLength {
5445+
length: 20,
5446+
unit: Some(CharLengthUnits::Octets)
5447+
}))
5448+
);
53685449

53695450
test_parse_data_type!(
53705451
dialect,
53715452
"CHARACTER VARYING(20)",
5372-
DataType::CharacterVarying(Some(20))
5453+
DataType::CharacterVarying(Some(CharacterLength {
5454+
length: 20,
5455+
unit: None
5456+
}))
5457+
);
5458+
5459+
test_parse_data_type!(
5460+
dialect,
5461+
"CHARACTER VARYING(20 CHARACTERS)",
5462+
DataType::CharacterVarying(Some(CharacterLength {
5463+
length: 20,
5464+
unit: Some(CharLengthUnits::Characters)
5465+
}))
5466+
);
5467+
5468+
test_parse_data_type!(
5469+
dialect,
5470+
"CHARACTER VARYING(20 OCTETS)",
5471+
DataType::CharacterVarying(Some(CharacterLength {
5472+
length: 20,
5473+
unit: Some(CharLengthUnits::Octets)
5474+
}))
53735475
);
53745476

5375-
test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));
5477+
test_parse_data_type!(
5478+
dialect,
5479+
"CHAR VARYING(20)",
5480+
DataType::CharVarying(Some(CharacterLength {
5481+
length: 20,
5482+
unit: None
5483+
}))
5484+
);
5485+
5486+
test_parse_data_type!(
5487+
dialect,
5488+
"CHAR VARYING(20 CHARACTERS)",
5489+
DataType::CharVarying(Some(CharacterLength {
5490+
length: 20,
5491+
unit: Some(CharLengthUnits::Characters)
5492+
}))
5493+
);
53765494

5377-
test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
5495+
test_parse_data_type!(
5496+
dialect,
5497+
"CHAR VARYING(20 OCTETS)",
5498+
DataType::CharVarying(Some(CharacterLength {
5499+
length: 20,
5500+
unit: Some(CharLengthUnits::Octets)
5501+
}))
5502+
);
5503+
5504+
test_parse_data_type!(
5505+
dialect,
5506+
"VARCHAR(20)",
5507+
DataType::Varchar(Some(CharacterLength {
5508+
length: 20,
5509+
unit: None
5510+
}))
5511+
);
53785512
}
53795513

53805514
#[test]

tests/sqlparser_common.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1945,7 +1945,10 @@ fn parse_create_table() {
19451945
vec![
19461946
ColumnDef {
19471947
name: "name".into(),
1948-
data_type: DataType::Varchar(Some(100)),
1948+
data_type: DataType::Varchar(Some(CharacterLength {
1949+
length: 100,
1950+
unit: None
1951+
})),
19491952
collation: None,
19501953
options: vec![ColumnOptionDef {
19511954
name: None,
@@ -2401,7 +2404,10 @@ fn parse_create_external_table() {
24012404
vec![
24022405
ColumnDef {
24032406
name: "name".into(),
2404-
data_type: DataType::Varchar(Some(100)),
2407+
data_type: DataType::Varchar(Some(CharacterLength {
2408+
length: 100,
2409+
unit: None
2410+
})),
24052411
collation: None,
24062412
options: vec![ColumnOptionDef {
24072413
name: None,
@@ -2469,7 +2475,10 @@ fn parse_create_or_replace_external_table() {
24692475
columns,
24702476
vec![ColumnDef {
24712477
name: "name".into(),
2472-
data_type: DataType::Varchar(Some(100)),
2478+
data_type: DataType::Varchar(Some(CharacterLength {
2479+
length: 100,
2480+
unit: None
2481+
})),
24732482
collation: None,
24742483
options: vec![ColumnOptionDef {
24752484
name: None,

0 commit comments

Comments
 (0)