Skip to content

Commit

Permalink
in progress parser updates
Browse files Browse the repository at this point in the history
  • Loading branch information
devinjdangelo committed Feb 19, 2024
1 parent b2a0451 commit dc03282
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 27 deletions.
9 changes: 7 additions & 2 deletions datafusion/common/src/file_options/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,16 @@ use self::{
csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions,
};

#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum OptionValue{
Single(String),
List(Vec<String>),
}
/// Represents a single arbitrary setting in a
/// [StatementOptions] where OptionTuple.0 determines
/// the specific setting to be modified and OptionTuple.1
/// determines the value which should be applied
pub type OptionTuple = (String, String);
pub type OptionTuple = (String, OptionValue);

/// Represents arbitrary tuples of options passed as String
/// tuples from SQL statements. As in the following statement:
Expand All @@ -66,7 +71,7 @@ impl From<&HashMap<String, String>> for StatementOptions {
Self {
options: value
.iter()
.map(|(k, v)| (k.to_owned(), v.to_owned()))
.map(|(k, v)| (k.to_owned(), OptionValue::Single(v.to_owned())))
.collect::<Vec<OptionTuple>>(),
}
}
Expand Down
90 changes: 70 additions & 20 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,32 @@ pub struct CopyToStatement {
/// The URL to where the data is heading
pub target: String,
/// Target specific options
pub options: Vec<(String, Value)>,
pub options: Vec<(String, CopyToOptionValue)>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CopyToOptionValue {
/// A single [Value], e.g. (format parquet)
Single(Value),
/// A list of [Value]s, e.g. (partition_by ("a", "b", "c"))
List(Vec<String>),
}

impl fmt::Display for CopyToOptionValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self {
CopyToOptionValue::Single(val) => write!(f, "{val}")?,
CopyToOptionValue::List(vals) => write!(
f,
"({})",
vals.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(", ")
)?,
}
Ok(())
}
}

impl fmt::Display for CopyToStatement {
Expand Down Expand Up @@ -336,7 +361,7 @@ impl<'a> DFParser<'a> {
self.parse_copy()
}
Keyword::EXPLAIN => {
// (TODO parse all supported statements)
// (TODO parse all supported statements)parse_copyparse_copyparse_copy
self.parser.next_token(); // EXPLAIN
self.parse_explain()
}
Expand All @@ -359,6 +384,7 @@ impl<'a> DFParser<'a> {

/// Parse a SQL `COPY TO` statement
pub fn parse_copy(&mut self) -> Result<Statement, ParserError> {
println!("deleteme... parse_copy called");
// parse as a query
let source = if self.parser.consume_token(&Token::LParen) {
let query = self.parser.parse_query()?;
Expand Down Expand Up @@ -411,20 +437,37 @@ impl<'a> DFParser<'a> {
/// word or keyword in this location.
///
/// [`parse_value`]: sqlparser::parser::Parser::parse_value
pub fn parse_option_value(&mut self) -> Result<Value, ParserError> {
let next_token = self.parser.next_token();
pub fn parse_option_value(&mut self) -> Result<CopyToOptionValue, ParserError> {
let next_token = self.parser.peek_token();
match next_token.token {
Token::Word(Word { value, .. }) => Ok(Value::UnQuotedString(value)),
Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)),
Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
Token::Number(ref n, l) => match n.parse() {
Ok(n) => Ok(Value::Number(n, l)),
// The tokenizer should have ensured `n` is an integer
// so this should not be possible
Err(e) => parser_err!(format!(
"Unexpected error: could not parse '{n}' as number: {e}"
)),
Token::Word(Word { value, .. }) => {
self.parser.next_token();
Ok(CopyToOptionValue::Single(Value::UnQuotedString(value)))
},
Token::SingleQuotedString(s) => {
self.parser.next_token();
Ok(CopyToOptionValue::Single(Value::SingleQuotedString(s)))
},
Token::DoubleQuotedString(s) => {
self.parser.next_token();
Ok(CopyToOptionValue::Single(Value::DoubleQuotedString(s)))
},
Token::EscapedStringLiteral(s) => {
self.parser.next_token();
Ok(CopyToOptionValue::Single(Value::EscapedStringLiteral(s)))
},
Token::Number(ref n, l) => {
self.parser.next_token();
match n.parse() {
Ok(n) => Ok(CopyToOptionValue::Single(Value::Number(n, l))),
// The tokenizer should have ensured `n` is an integer
// so this should not be possible
Err(e) => parser_err!(format!(
"Unexpected error: could not parse '{n}' as number: {e}"
)),
}},
Token::LParen => {
Ok(CopyToOptionValue::List(self.parse_partitions()?))
},
_ => self.parser.expected("string or numeric value", next_token),
}
Expand Down Expand Up @@ -457,13 +500,19 @@ impl<'a> DFParser<'a> {

fn parse_partitions(&mut self) -> Result<Vec<String>, ParserError> {
let mut partitions: Vec<String> = vec![];
println!("parse parts");
for i in 0..10{
println!("{}",self.parser.peek_nth_token(i));
}
if !self.parser.consume_token(&Token::LParen)
|| self.parser.consume_token(&Token::RParen)
{
println!("exit parse parts early");
return Ok(partitions);
}

loop {
println!("loop parse parts");
if let Token::Word(_) = self.parser.peek_token().token {
let identifier = self.parser.parse_identifier(false)?;
partitions.push(identifier.to_string());
Expand Down Expand Up @@ -781,14 +830,15 @@ impl<'a> DFParser<'a> {
/// Unlike [`Self::parse_string_options`], this method supports
/// keywords as key names as well as multiple value types such as
/// Numbers as well as Strings.
fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
fn parse_value_options(&mut self) -> Result<Vec<(String, CopyToOptionValue)>, ParserError> {
let mut options = vec![];
self.parser.expect_token(&Token::LParen)?;

loop {
let key = self.parse_option_key()?;
let value = self.parse_option_value()?;
options.push((key, value));
println!("deleteme options: {options:?}");
let comma = self.parser.consume_token(&Token::Comma);
if self.parser.consume_token(&Token::RParen) {
// allow a trailing comma, even though it's not in standard
Expand Down Expand Up @@ -1393,7 +1443,7 @@ mod tests {
target: "bar".to_string(),
options: vec![(
"row_group_size".to_string(),
Value::Number("55".to_string(), false),
CopyToOptionValue::Single(Value::Number("55".to_string(), false)),
)],
});
assert_eq!(verified_stmt(sql), expected);
Expand All @@ -1409,15 +1459,15 @@ mod tests {
let expected_options = vec![
(
"format".to_string(),
Value::UnQuotedString("parquet".to_string()),
CopyToOptionValue::Single(Value::UnQuotedString("parquet".to_string())),
),
(
"row_group_size".to_string(),
Value::Number("55".to_string(), false),
CopyToOptionValue::Single(Value::Number("55".to_string(), false)),
),
(
"compression".to_string(),
Value::UnQuotedString("snappy".to_string()),
CopyToOptionValue::Single(Value::UnQuotedString("snappy".to_string())),
),
];

Expand Down
12 changes: 8 additions & 4 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use std::sync::Arc;

use crate::parser::{
CopyToSource, CopyToStatement, CreateExternalTable, DFParser, ExplainStatement,
LexOrdering, Statement as DFStatement,
CopyToOptionValue, CopyToSource, CopyToStatement, CreateExternalTable, DFParser, ExplainStatement, LexOrdering, Statement as DFStatement
};
use crate::planner::{
object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel,
Expand Down Expand Up @@ -713,8 +712,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let options = statement
.options
.iter()
.map(|(s, v)| (s.to_owned(), v.to_string()))
.collect::<Vec<(String, String)>>();
.map(|(s, value)| {
match value{
CopyToOptionValue::Single(v) =>(s.to_owned(), v.to_string()),
CopyToOptionValue::List(v)=>(s, v.to_owned())
}
})
.collect::<Vec<(_, _)>>();

let mut statement_options = StatementOptions::new(options);
let file_format = statement_options.try_infer_file_type(&statement.target)?;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/copy.slt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ select * from validate_partitioned_parquet_bar order by col1;
# Copy to directory as partitioned files
query ITT
COPY (values (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table2/'
(format parquet, compression 'zstd(10)', partition_by 'column2, column3');
(format parquet, compression 'zstd(10)', partition_by (column2, column3));
----
3

Expand Down

0 comments on commit dc03282

Please sign in to comment.