From e5bd9c3756ee3bbc03e24e78b37e5f7e90c89d18 Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Fri, 8 Jan 2021 20:06:15 +0300
Subject: [PATCH 1/9] w

---
 src/tokenizer.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index bbad1a4c4..f1ba71252 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -253,6 +253,20 @@ impl fmt::Display for Whitespace {
     }
 }
 
+/// Location in input string
+#[derive(Debug, PartialEq)]
+pub struct Location {
+    pub line: usize,
+    pub position: usize,
+}
+
+/// A [Token] with [Location] attached to it
+#[derive(Debug, PartialEq)]
+pub struct TokenWithLocation {
+    pub token: Token,
+    pub location: Location,
+}
+
 /// Tokenizer error
 #[derive(Debug, PartialEq)]
 pub struct TokenizerError {

From e55116f79587a212e811aab319875b50123fc55a Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Fri, 8 Jan 2021 20:41:58 +0300
Subject: [PATCH 2/9] add test

---
 src/tokenizer.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 4 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index f1ba71252..017f243c9 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -256,8 +256,8 @@ impl fmt::Display for Whitespace {
 /// Location in input string
 #[derive(Debug, PartialEq)]
 pub struct Location {
-    pub line: usize,
-    pub position: usize,
+    pub line: u64,
+    pub column: u64,
 }
 
 /// A [Token] with [Location] attached to it
@@ -267,6 +267,15 @@ pub struct TokenWithLocation {
     pub location: Location,
 }
 
+impl TokenWithLocation {
+    pub fn new(token: Token, line: u64, column: u64) -> TokenWithLocation {
+        TokenWithLocation {
+            token,
+            location: Location { line, column },
+        }
+    }
+}
+
 /// Tokenizer error
 #[derive(Debug, PartialEq)]
 pub struct TokenizerError {
@@ -296,11 +305,27 @@ impl<'a> Tokenizer<'a> {
 
     /// Tokenize the statement and produce a vector of tokens
     pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
+        let twl = self.tokenize_with_location()?;
+        let mut tokens: Vec<Token> = vec![];
+
+        tokens.reserve(twl.len());
+        for token_with_location in twl {
+            tokens.push(token_with_location.token);
+        }
+        Ok(tokens)
+    }
+
+    /// Tokenize the statement and produce a vector of tokens
+    pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
         let mut peekable = self.query.chars().peekable();
 
-        let mut tokens: Vec<Token> = vec![];
+        let mut tokens: Vec<TokenWithLocation> = vec![];
 
         while let Some(token) = self.next_token(&mut peekable)? {
+            let location = Location {
+                line: self.line,
+                column: self.col,
+            };
             match &token {
                 Token::Whitespace(Whitespace::Newline) => {
                     self.line += 1;
@@ -315,7 +340,10 @@ impl<'a> Tokenizer<'a> {
                 _ => self.col += 1,
             }
 
-            tokens.push(token);
+            tokens.push(TokenWithLocation {
+                token: token,
+                location: location,
+            });
         }
         Ok(tokens)
     }
@@ -1078,4 +1106,32 @@ mod tests {
         //println!("------------------------------");
         assert_eq!(expected, actual);
     }
+
+    fn compare_with_location(expected: Vec<TokenWithLocation>, actual: Vec<TokenWithLocation>) {
+        //println!("------------------------------");
+        //println!("tokens   = {:?}", actual);
+        //println!("expected = {:?}", expected);
+        //println!("------------------------------");
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_location_newlines() {
+        let sql = String::from("line1\nline2\rline3\r\nline4\r");
+
+        let dialect = GenericDialect {};
+        let mut tokenizer = Tokenizer::new(&dialect, &sql);
+        let tokens = tokenizer.tokenize_with_location().unwrap();
+        let expected = vec![
+            TokenWithLocation::new(Token::make_word("line1", None), 1, 1),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 1, 6),
+            TokenWithLocation::new(Token::make_word("line2", None), 2, 1),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 2, 6),
+            TokenWithLocation::new(Token::make_word("line3", None), 3, 1),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 3, 6),
+            TokenWithLocation::new(Token::make_word("line4", None), 4, 1),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 4, 6),
+        ];
+        compare_with_location(expected, tokens);
+    }
 }

From 3de151ce843e7782154f871ebb8a61edcedafffc Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Fri, 8 Jan 2021 21:35:27 +0300
Subject: [PATCH 3/9] Clone

---
 Cargo.toml       | 2 +-
 src/tokenizer.rs | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 231d7fee5..a26156953 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "sqlparser"
 description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011"
-version = "0.7.1-alpha.0"
+version = "0.7.1"
 authors = ["Andy Grove <andygrove73@gmail.com>"]
 homepage = "https://github.com/ballista-compute/sqlparser-rs"
 documentation = "https://docs.rs/sqlparser/"
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 017f243c9..39728b0c2 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -254,14 +254,16 @@ impl fmt::Display for Whitespace {
 }
 
 /// Location in input string
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Clone)]
 pub struct Location {
+    /// Line number, starting from 1
     pub line: u64,
+    /// Line column, starting from 1
     pub column: u64,
 }
 
 /// A [Token] with [Location] attached to it
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Clone)]
 pub struct TokenWithLocation {
     pub token: Token,
     pub location: Location,

From 6a04ed35ae673b4b710225aa5fcbaa20595a1918 Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Fri, 8 Jan 2021 22:48:06 +0300
Subject: [PATCH 4/9] work

---
 src/tokenizer.rs | 295 ++++++++++++++++++++++++++---------------------
 1 file changed, 165 insertions(+), 130 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 39728b0c2..4055e39aa 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -286,12 +286,44 @@ pub struct TokenizerError {
     pub col: u64,
 }
 
+struct State<'a> {
+    peekable: Peekable<Chars<'a>>,
+    pub line: u64,
+    pub col: u64,
+}
+
+impl<'a> State<'a> {
+    pub fn next(&mut self) -> Option<char> {
+        match self.peekable.next() {
+            None => None,
+            Some(s) => {
+                if s == '\n' {
+                    self.line += 1;
+                    self.col = 1;
+                } else {
+                    self.col += 1;
+                }
+                Some(s)
+            }
+        }
+    }
+
+    pub fn peek(&mut self) -> Option<&char> {
+        self.peekable.peek()
+    }
+
+    pub fn location(&self) -> Location {
+        Location {
+            line: self.line,
+            column: self.col,
+        }
+    }
+}
+
 /// SQL Tokenizer
 pub struct Tokenizer<'a> {
     dialect: &'a dyn Dialect,
     pub query: String,
-    pub line: u64,
-    pub col: u64,
 }
 
 impl<'a> Tokenizer<'a> {
@@ -300,8 +332,6 @@ impl<'a> Tokenizer<'a> {
         Self {
             dialect,
             query: query.to_string(),
-            line: 1,
-            col: 1,
         }
     }
 
@@ -319,28 +349,19 @@ impl<'a> Tokenizer<'a> {
 
     /// Tokenize the statement and produce a vector of tokens
     pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
-        let mut peekable = self.query.chars().peekable();
+        let mut state = State {
+            peekable: self.query.chars().peekable(),
+            line: 1,
+            col: 1,
+        };
 
         let mut tokens: Vec<TokenWithLocation> = vec![];
 
-        while let Some(token) = self.next_token(&mut peekable)? {
+        while let Some(token) = self.next_token(&mut state)? {
             let location = Location {
-                line: self.line,
-                column: self.col,
+                line: state.line,
+                column: state.col,
             };
-            match &token {
-                Token::Whitespace(Whitespace::Newline) => {
-                    self.line += 1;
-                    self.col = 1;
-                }
-
-                Token::Whitespace(Whitespace::Tab) => self.col += 4,
-                Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64,
-                Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
-                Token::Number(s) => self.col += s.len() as u64,
-                Token::SingleQuotedString(s) => self.col += s.len() as u64,
-                _ => self.col += 1,
-            }
 
             tokens.push(TokenWithLocation {
                 token: token,
@@ -351,32 +372,32 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Get the next token or return None
-    fn next_token(&self, chars: &mut Peekable<Chars<'_>>) -> Result<Option<Token>, TokenizerError> {
+    fn next_token(&self, state: &mut State) -> Result<Option<Token>, TokenizerError> {
         //println!("next_token: {:?}", chars.peek());
-        match chars.peek() {
+        match state.peekable.peek() {
             Some(&ch) => match ch {
-                ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
-                '\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
-                '\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
+                ' ' => self.consume_and_return(state, Token::Whitespace(Whitespace::Space)),
+                '\t' => self.consume_and_return(state, Token::Whitespace(Whitespace::Tab)),
+                '\n' => self.consume_and_return(state, Token::Whitespace(Whitespace::Newline)),
                 '\r' => {
                     // Emit a single Whitespace::Newline token for \r and \r\n
-                    chars.next();
-                    if let Some('\n') = chars.peek() {
-                        chars.next();
+                    state.next();
+                    if let Some('\n') = state.peek() {
+                        state.next();
                     }
                     Ok(Some(Token::Whitespace(Whitespace::Newline)))
                 }
                 'N' => {
-                    chars.next(); // consume, to check the next char
-                    match chars.peek() {
+                    state.next(); // consume, to check the next char
+                    match state.peek() {
                         Some('\'') => {
                             // N'...' - a <national character string literal>
-                            let s = self.tokenize_single_quoted_string(chars)?;
+                            let s = self.tokenize_single_quoted_string(state)?;
                             Ok(Some(Token::NationalStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "N"
-                            let s = self.tokenize_word('N', chars);
+                            let s = self.tokenize_word('N', state);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
@@ -384,40 +405,41 @@ impl<'a> Tokenizer<'a> {
                 // The spec only allows an uppercase 'X' to introduce a hex
                 // string, but PostgreSQL, at least, allows a lowercase 'x' too.
                 x @ 'x' | x @ 'X' => {
-                    chars.next(); // consume, to check the next char
-                    match chars.peek() {
+                    state.next(); // consume, to check the next char
+                    match state.peek() {
                         Some('\'') => {
                             // X'...' - a <binary string literal>
-                            let s = self.tokenize_single_quoted_string(chars)?;
+                            let s = self.tokenize_single_quoted_string(state)?;
                             Ok(Some(Token::HexStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "X"
-                            let s = self.tokenize_word(x, chars);
+                            let s = self.tokenize_word(x, state);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // identifier or keyword
                 ch if self.dialect.is_identifier_start(ch) => {
-                    chars.next(); // consume the first char
-                    let s = self.tokenize_word(ch, chars);
+                    state.next(); // consume the first char
+                    let s = self.tokenize_word(ch, state);
                     Ok(Some(Token::make_word(&s, None)))
                 }
                 // string
                 '\'' => {
-                    let s = self.tokenize_single_quoted_string(chars)?;
+                    let s = self.tokenize_single_quoted_string(state)?;
                     Ok(Some(Token::SingleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
                 quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
-                    chars.next(); // consume the opening quote
+                    state.next(); // consume the opening quote
                     let quote_end = Word::matching_end_quote(quote_start);
-                    let s = peeking_take_while(chars, |ch| ch != quote_end);
-                    if chars.next() == Some(quote_end) {
+                    let s = peeking_take_while(state, |ch| ch != quote_end);
+                    if state.next() == Some(quote_end) {
                         Ok(Some(Token::make_word(&s, Some(quote_start))))
                     } else {
                         self.tokenizer_error(
+                            state,
                             format!("Expected close delimiter '{}' before EOF.", quote_end)
                                 .as_str(),
                         )
@@ -426,20 +448,20 @@ impl<'a> Tokenizer<'a> {
                 // numbers
                 '0'..='9' => {
                     // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal
-                    let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.'));
+                    let s = peeking_take_while(state, |ch| matches!(ch, '0'..='9' | '.'));
                     Ok(Some(Token::Number(s)))
                 }
                 // punctuation
-                '(' => self.consume_and_return(chars, Token::LParen),
-                ')' => self.consume_and_return(chars, Token::RParen),
-                ',' => self.consume_and_return(chars, Token::Comma),
+                '(' => self.consume_and_return(state, Token::LParen),
+                ')' => self.consume_and_return(state, Token::RParen),
+                ',' => self.consume_and_return(state, Token::Comma),
                 // operators
                 '-' => {
-                    chars.next(); // consume the '-'
-                    match chars.peek() {
+                    state.next(); // consume the '-'
+                    match state.peek() {
                         Some('-') => {
-                            chars.next(); // consume the second '-', starting a single-line comment
-                            let comment = self.tokenize_single_line_comment(chars);
+                            state.next(); // consume the second '-', starting a single-line comment
+                            let comment = self.tokenize_single_line_comment(state);
                             Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                                 prefix: "--".to_owned(),
                                 comment,
@@ -450,15 +472,15 @@ impl<'a> Tokenizer<'a> {
                     }
                 }
                 '/' => {
-                    chars.next(); // consume the '/'
-                    match chars.peek() {
+                    state.next(); // consume the '/'
+                    match state.peek() {
                         Some('*') => {
-                            chars.next(); // consume the '*', starting a multi-line comment
-                            self.tokenize_multiline_comment(chars)
+                            state.next(); // consume the '*', starting a multi-line comment
+                            self.tokenize_multiline_comment(state)
                         }
                         Some('/') if dialect_of!(self is SnowflakeDialect) => {
-                            chars.next(); // consume the second '/', starting a snowflake single-line comment
-                            let comment = self.tokenize_single_line_comment(chars);
+                            state.next(); // consume the second '/', starting a snowflake single-line comment
+                            let comment = self.tokenize_single_line_comment(state);
                             Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                                 prefix: "//".to_owned(),
                                 comment,
@@ -468,17 +490,17 @@ impl<'a> Tokenizer<'a> {
                         _ => Ok(Some(Token::Div)),
                     }
                 }
-                '+' => self.consume_and_return(chars, Token::Plus),
-                '*' => self.consume_and_return(chars, Token::Mult),
-                '%' => self.consume_and_return(chars, Token::Mod),
+                '+' => self.consume_and_return(state, Token::Plus),
+                '*' => self.consume_and_return(state, Token::Mult),
+                '%' => self.consume_and_return(state, Token::Mod),
                 '|' => {
-                    chars.next(); // consume the '|'
-                    match chars.peek() {
-                        Some('/') => self.consume_and_return(chars, Token::PGSquareRoot),
+                    state.next(); // consume the '|'
+                    match state.peek() {
+                        Some('/') => self.consume_and_return(state, Token::PGSquareRoot),
                         Some('|') => {
-                            chars.next(); // consume the second '|'
-                            match chars.peek() {
-                                Some('/') => self.consume_and_return(chars, Token::PGCubeRoot),
+                            state.next(); // consume the second '|'
+                            match state.peek() {
+                                Some('/') => self.consume_and_return(state, Token::PGCubeRoot),
                                 _ => Ok(Some(Token::StringConcat)),
                             }
                         }
@@ -487,82 +509,83 @@ impl<'a> Tokenizer<'a> {
                     }
                 }
                 '=' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('>') => self.consume_and_return(chars, Token::RArrow),
+                    state.next(); // consume
+                    match state.peek() {
+                        Some('>') => self.consume_and_return(state, Token::RArrow),
                         _ => Ok(Some(Token::Eq)),
                     }
                 }
-                '.' => self.consume_and_return(chars, Token::Period),
+                '.' => self.consume_and_return(state, Token::Period),
                 '!' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('=') => self.consume_and_return(chars, Token::Neq),
-                        Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
+                    state.next(); // consume
+                    match state.peek() {
+                        Some('=') => self.consume_and_return(state, Token::Neq),
+                        Some('!') => self.consume_and_return(state, Token::DoubleExclamationMark),
                         _ => Ok(Some(Token::ExclamationMark)),
                     }
                 }
                 '<' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('=') => self.consume_and_return(chars, Token::LtEq),
-                        Some('>') => self.consume_and_return(chars, Token::Neq),
-                        Some('<') => self.consume_and_return(chars, Token::ShiftLeft),
+                    state.next(); // consume
+                    match state.peek() {
+                        Some('=') => self.consume_and_return(state, Token::LtEq),
+                        Some('>') => self.consume_and_return(state, Token::Neq),
+                        Some('<') => self.consume_and_return(state, Token::ShiftLeft),
                         _ => Ok(Some(Token::Lt)),
                     }
                 }
                 '>' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('=') => self.consume_and_return(chars, Token::GtEq),
-                        Some('>') => self.consume_and_return(chars, Token::ShiftRight),
+                    state.next(); // consume
+                    match state.peek() {
+                        Some('=') => self.consume_and_return(state, Token::GtEq),
+                        Some('>') => self.consume_and_return(state, Token::ShiftRight),
                         _ => Ok(Some(Token::Gt)),
                     }
                 }
                 ':' => {
-                    chars.next();
-                    match chars.peek() {
-                        Some(':') => self.consume_and_return(chars, Token::DoubleColon),
+                    state.next();
+                    match state.peek() {
+                        Some(':') => self.consume_and_return(state, Token::DoubleColon),
                         _ => Ok(Some(Token::Colon)),
                     }
                 }
-                ';' => self.consume_and_return(chars, Token::SemiColon),
-                '\\' => self.consume_and_return(chars, Token::Backslash),
-                '[' => self.consume_and_return(chars, Token::LBracket),
-                ']' => self.consume_and_return(chars, Token::RBracket),
-                '&' => self.consume_and_return(chars, Token::Ampersand),
-                '^' => self.consume_and_return(chars, Token::Caret),
-                '{' => self.consume_and_return(chars, Token::LBrace),
-                '}' => self.consume_and_return(chars, Token::RBrace),
+                ';' => self.consume_and_return(state, Token::SemiColon),
+                '\\' => self.consume_and_return(state, Token::Backslash),
+                '[' => self.consume_and_return(state, Token::LBracket),
+                ']' => self.consume_and_return(state, Token::RBracket),
+                '&' => self.consume_and_return(state, Token::Ampersand),
+                '^' => self.consume_and_return(state, Token::Caret),
+                '{' => self.consume_and_return(state, Token::LBrace),
+                '}' => self.consume_and_return(state, Token::RBrace),
                 '#' if dialect_of!(self is SnowflakeDialect) => {
-                    chars.next(); // consume the '#', starting a snowflake single-line comment
-                    let comment = self.tokenize_single_line_comment(chars);
+                    state.next(); // consume the '#', starting a snowflake single-line comment
+                    let comment = self.tokenize_single_line_comment(state);
                     Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "#".to_owned(),
                         comment,
                     })))
                 }
-                '~' => self.consume_and_return(chars, Token::Tilde),
-                '#' => self.consume_and_return(chars, Token::Sharp),
-                '@' => self.consume_and_return(chars, Token::AtSign),
-                other => self.consume_and_return(chars, Token::Char(other)),
+                '~' => self.consume_and_return(state, Token::Tilde),
+                '#' => self.consume_and_return(state, Token::Sharp),
+                '@' => self.consume_and_return(state, Token::AtSign),
+                other => self.consume_and_return(state, Token::Char(other)),
             },
             None => Ok(None),
         }
     }
 
-    fn tokenizer_error<R>(&self, message: &str) -> Result<R, TokenizerError> {
+    fn tokenizer_error<R>(&self, state: &State, message: &str) -> Result<R, TokenizerError> {
+        let loc = state.location();
         Err(TokenizerError {
             message: message.to_string(),
-            col: self.col,
-            line: self.line,
+            col: loc.column,
+            line: loc.line,
         })
     }
 
     // Consume characters until newline
-    fn tokenize_single_line_comment(&self, chars: &mut Peekable<Chars<'_>>) -> String {
-        let mut comment = peeking_take_while(chars, |ch| ch != '\n');
-        if let Some(ch) = chars.next() {
+    fn tokenize_single_line_comment(&self, state: &mut State) -> String {
+        let mut comment = peeking_take_while(state, |ch| ch != '\n');
+        if let Some(ch) = state.next() {
             assert_eq!(ch, '\n');
             comment.push(ch);
         }
@@ -570,51 +593,48 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Tokenize an identifier or keyword, after the first char is already consumed.
-    fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
+    fn tokenize_word(&self, first_char: char, state: &mut State) -> String {
         let mut s = first_char.to_string();
-        s.push_str(&peeking_take_while(chars, |ch| {
+        s.push_str(&peeking_take_while(state, |ch| {
             self.dialect.is_identifier_part(ch)
         }));
         s
     }
 
     /// Read a single quoted string, starting with the opening quote.
-    fn tokenize_single_quoted_string(
-        &self,
-        chars: &mut Peekable<Chars<'_>>,
-    ) -> Result<String, TokenizerError> {
+    fn tokenize_single_quoted_string(&self, state: &mut State) -> Result<String, TokenizerError> {
         let mut s = String::new();
-        chars.next(); // consume the opening quote
-        while let Some(&ch) = chars.peek() {
+        state.next(); // consume the opening quote
+        while let Some(&ch) = state.peek() {
             match ch {
                 '\'' => {
-                    chars.next(); // consume
-                    let escaped_quote = chars.peek().map(|c| *c == '\'').unwrap_or(false);
+                    state.next(); // consume
+                    let escaped_quote = state.peek().map(|c| *c == '\'').unwrap_or(false);
                     if escaped_quote {
                         s.push('\'');
-                        chars.next();
+                        state.next();
                     } else {
                         return Ok(s);
                     }
                 }
                 _ => {
-                    chars.next(); // consume
+                    state.next(); // consume
                     s.push(ch);
                 }
             }
         }
-        self.tokenizer_error("Unterminated string literal")
+        self.tokenizer_error(state, "Unterminated string literal")
     }
 
     fn tokenize_multiline_comment(
         &self,
-        chars: &mut Peekable<Chars<'_>>,
+        state: &mut State,
     ) -> Result<Option<Token>, TokenizerError> {
         let mut s = String::new();
         let mut maybe_closing_comment = false;
         // TODO: deal with nested comments
         loop {
-            match chars.next() {
+            match state.next() {
                 Some(ch) => {
                     if maybe_closing_comment {
                         if ch == '/' {
@@ -628,17 +648,20 @@ impl<'a> Tokenizer<'a> {
                         s.push(ch);
                     }
                 }
-                None => break self.tokenizer_error("Unexpected EOF while in a multi-line comment"),
+                None => {
+                    break self
+                        .tokenizer_error(state, "Unexpected EOF while in a multi-line comment")
+                }
             }
         }
     }
 
     fn consume_and_return(
         &self,
-        chars: &mut Peekable<Chars<'_>>,
+        state: &mut State,
         t: Token,
     ) -> Result<Option<Token>, TokenizerError> {
-        chars.next();
+        state.next();
         Ok(Some(t))
     }
 }
@@ -646,14 +669,11 @@ impl<'a> Tokenizer<'a> {
 /// Read from `chars` until `predicate` returns `false` or EOF is hit.
 /// Return the characters read as String, and keep the first non-matching
 /// char available as `chars.next()`.
-fn peeking_take_while(
-    chars: &mut Peekable<Chars<'_>>,
-    mut predicate: impl FnMut(char) -> bool,
-) -> String {
+fn peeking_take_while(state: &mut State, mut predicate: impl FnMut(char) -> bool) -> String {
     let mut s = String::new();
-    while let Some(&ch) = chars.peek() {
+    while let Some(&ch) = state.peek() {
         if predicate(ch) {
-            chars.next(); // consume
+            state.peekable.next(); // consume
             s.push(ch);
         } else {
             break;
@@ -1117,6 +1137,21 @@ mod tests {
         assert_eq!(expected, actual);
     }
 
+    #[test]
+    fn tokenize_location_multiline_string_literal() {
+        let sql = String::from("'some\nthing' foo");
+
+        let dialect = GenericDialect {};
+        let mut tokenizer = Tokenizer::new(&dialect, &sql);
+        let tokens = tokenizer.tokenize_with_location().unwrap();
+        let expected = vec![
+            TokenWithLocation::new(Token::make_word("some\nthing", None), 1, 1),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 2, 1),
+            TokenWithLocation::new(Token::make_word("foo", None), 2, 2),
+        ];
+        compare_with_location(expected, tokens);
+    }
+
     #[test]
     fn tokenize_location_newlines() {
         let sql = String::from("line1\nline2\rline3\r\nline4\r");

From 04276b4877abca62ac8c340dbe2fbb5d448831e3 Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Fri, 8 Jan 2021 23:04:22 +0300
Subject: [PATCH 5/9] fix

---
 src/tokenizer.rs | 44 +++++++++++---------------------------------
 1 file changed, 11 insertions(+), 33 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 4055e39aa..f2ce01544 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -357,16 +357,14 @@ impl<'a> Tokenizer<'a> {
 
         let mut tokens: Vec<TokenWithLocation> = vec![];
 
+        let mut location = state.location();
         while let Some(token) = self.next_token(&mut state)? {
-            let location = Location {
-                line: state.line,
-                column: state.col,
-            };
-
             tokens.push(TokenWithLocation {
                 token: token,
-                location: location,
+                location: location.clone(),
             });
+
+            location = state.location();
         }
         Ok(tokens)
     }
@@ -374,7 +372,7 @@ impl<'a> Tokenizer<'a> {
     /// Get the next token or return None
     fn next_token(&self, state: &mut State) -> Result<Option<Token>, TokenizerError> {
         //println!("next_token: {:?}", chars.peek());
-        match state.peekable.peek() {
+        match state.peek() {
             Some(&ch) => match ch {
                 ' ' => self.consume_and_return(state, Token::Whitespace(Whitespace::Space)),
                 '\t' => self.consume_and_return(state, Token::Whitespace(Whitespace::Tab)),
@@ -673,7 +671,7 @@ fn peeking_take_while(state: &mut State, mut predicate: impl FnMut(char) -> bool
     let mut s = String::new();
     while let Some(&ch) = state.peek() {
         if predicate(ch) {
-            state.peekable.next(); // consume
+            state.next(); // consume
             s.push(ch);
         } else {
             break;
@@ -930,7 +928,7 @@ mod tests {
             Err(TokenizerError {
                 message: "Unterminated string literal".to_string(),
                 line: 1,
-                col: 8
+                col: 12
             })
         );
     }
@@ -1074,7 +1072,7 @@ mod tests {
             Err(TokenizerError {
                 message: "Expected close delimiter '\"' before EOF.".to_string(),
                 line: 1,
-                col: 1
+                col: 5,
             })
         );
     }
@@ -1145,29 +1143,9 @@ mod tests {
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         let tokens = tokenizer.tokenize_with_location().unwrap();
         let expected = vec![
-            TokenWithLocation::new(Token::make_word("some\nthing", None), 1, 1),
-            TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 2, 1),
-            TokenWithLocation::new(Token::make_word("foo", None), 2, 2),
-        ];
-        compare_with_location(expected, tokens);
-    }
-
-    #[test]
-    fn tokenize_location_newlines() {
-        let sql = String::from("line1\nline2\rline3\r\nline4\r");
-
-        let dialect = GenericDialect {};
-        let mut tokenizer = Tokenizer::new(&dialect, &sql);
-        let tokens = tokenizer.tokenize_with_location().unwrap();
-        let expected = vec![
-            TokenWithLocation::new(Token::make_word("line1", None), 1, 1),
-            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 1, 6),
-            TokenWithLocation::new(Token::make_word("line2", None), 2, 1),
-            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 2, 6),
-            TokenWithLocation::new(Token::make_word("line3", None), 3, 1),
-            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 3, 6),
-            TokenWithLocation::new(Token::make_word("line4", None), 4, 1),
-            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 4, 6),
+            TokenWithLocation::new(Token::SingleQuotedString("some\nthing".to_string()), 1, 1),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 2, 7),
+            TokenWithLocation::new(Token::make_word("foo", None), 2, 8),
         ];
         compare_with_location(expected, tokens);
     }

From da2395411fed51c7aa645df05f0425ef3e6c646a Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Fri, 8 Jan 2021 23:43:23 +0300
Subject: [PATCH 6/9] do not copy input str

---
 src/tokenizer.rs | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index f2ce01544..0fcaf922b 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -323,16 +323,13 @@ impl<'a> State<'a> {
 /// SQL Tokenizer
 pub struct Tokenizer<'a> {
     dialect: &'a dyn Dialect,
-    pub query: String,
+    pub query: &'a str,
 }
 
 impl<'a> Tokenizer<'a> {
     /// Create a new SQL tokenizer for the specified SQL statement
-    pub fn new(dialect: &'a dyn Dialect, query: &str) -> Self {
-        Self {
-            dialect,
-            query: query.to_string(),
-        }
+    pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
+        Self { dialect, query }
     }
 
     /// Tokenize the statement and produce a vector of tokens

From 5ff9614be3799fa511ba905d8e23a8ba914f4c06 Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Fri, 8 Jan 2021 23:44:10 +0300
Subject: [PATCH 7/9] reset version

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index a26156953..231d7fee5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "sqlparser"
 description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011"
-version = "0.7.1"
+version = "0.7.1-alpha.0"
 authors = ["Andy Grove <andygrove73@gmail.com>"]
 homepage = "https://github.com/ballista-compute/sqlparser-rs"
 documentation = "https://docs.rs/sqlparser/"

From 0287038983d0b3768c31ea271af58d0530a8bb0d Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Sat, 9 Jan 2021 00:07:20 +0300
Subject: [PATCH 8/9] fix clippy

---
 src/tokenizer.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 0fcaf922b..9d92fe681 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -357,7 +357,7 @@ impl<'a> Tokenizer<'a> {
         let mut location = state.location();
         while let Some(token) = self.next_token(&mut state)? {
             tokens.push(TokenWithLocation {
-                token: token,
+                token,
                 location: location.clone(),
             });
 

From ee257fd478a20f7ce1cbc37d6b998d7b6148072e Mon Sep 17 00:00:00 2001
From: Vasily Kulikov <segoon@yandex-team.ru>
Date: Sat, 9 Jan 2021 00:16:36 +0300
Subject: [PATCH 9/9] +docs

---
 src/tokenizer.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9d92fe681..44dfea3dd 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -344,7 +344,7 @@ impl<'a> Tokenizer<'a> {
         Ok(tokens)
     }
 
-    /// Tokenize the statement and produce a vector of tokens
+    /// Tokenize the statement and produce a vector of tokens with location information
     pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
         let mut state = State {
             peekable: self.query.chars().peekable(),