Skip to content

Commit

Permalink
truly flexible csv/tsv parsing (nushell#14399)
Browse files Browse the repository at this point in the history
- fixes nushell#14398

I will properly fill out this PR and fix any tests that might break when
I have the time, this was a quick fix.

# Description

This PR makes `from csv` and `from tsv`, with the `--flexible` flag,
stop dropping extra/unexpected columns.

# User-Facing Changes

`$text`'s contents
```csv
value
1,aaa
2,bbb
3
4,ddd
5,eee,extra
```

Old behavior
```nushell
> $text | from csv --flexible --noheaders 
╭─#─┬─column0─╮
│ 0 │ value   │
│ 1 │       1 │
│ 2 │       2 │
│ 3 │       3 │
│ 4 │       4 │
│ 5 │       5 │
╰─#─┴─column0─╯
```

New behavior
```nushell
> $text | from csv --flexible --noheaders 
╭─#─┬─column0─┬─column1─┬─column2─╮
│ 0 │ value   │   ❎    │   ❎    │
│ 1 │       1 │ aaa     │   ❎    │
│ 2 │       2 │ bbb     │   ❎    │
│ 3 │       3 │   ❎    │   ❎    │
│ 4 │       4 │ ddd     │   ❎    │
│ 5 │       5 │ eee     │ extra   │
╰─#─┴─column0─┴─column1─┴─column2─╯
```

- The first line in a csv (or tsv) document no longer limits the number
of columns
- Missing values in columns are longer automatically filled with `null`
with this change, as a later row can introduce new columns. **BREAKING
CHANGE**

Because missing columns are different from empty columns, operations on
possibly missing columns will have to use optional access syntax e.g.
`get foo` => `get foo?`
  
# Tests + Formatting
Added examples that run as tests and adjusted existing tests to confirm
the new behavior.

# After Submitting

Update the workaround with fish completer mentioned
[here](https://www.nushell.sh/cookbook/external_completers.html#fish-completer)
  • Loading branch information
Bahex authored Nov 21, 2024
1 parent 2a90cb7 commit 5f7082f
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 30 deletions.
25 changes: 24 additions & 1 deletion crates/nu-command/src/formats/from/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ impl Command for FromCsv {

fn signature(&self) -> Signature {
Signature::build("from csv")
.input_output_types(vec![(Type::String, Type::table())])
.input_output_types(vec![
(Type::String, Type::table()),
(Type::String, Type::list(Type::Any)),
])
.named(
"separator",
SyntaxShape::String,
Expand Down Expand Up @@ -82,6 +85,26 @@ impl Command for FromCsv {
})],
))
},
Example {
description: "Convert comma-separated data to a table, allowing variable number of columns per row",
example: "\"ColA,ColB\n1,2\n3,4,5\n6\" | from csv --flexible",
result: Some(Value::test_list (
vec![
Value::test_record(record! {
"ColA" => Value::test_int(1),
"ColB" => Value::test_int(2),
}),
Value::test_record(record! {
"ColA" => Value::test_int(3),
"ColB" => Value::test_int(4),
"column2" => Value::test_int(5),
}),
Value::test_record(record! {
"ColA" => Value::test_int(6),
}),
],
))
},
Example {
description: "Convert comma-separated data to a table, ignoring headers",
example: "open data.txt | from csv --noheaders",
Expand Down
43 changes: 17 additions & 26 deletions crates/nu-command/src/formats/from/delimited.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,7 @@ fn from_delimited_stream(
.from_reader(input_reader);

let headers = if noheaders {
(0..reader
.headers()
.map_err(|err| from_csv_error(err, span))?
.len())
.map(|i| format!("column{i}"))
.collect::<Vec<String>>()
vec![]
} else {
reader
.headers()
Expand All @@ -54,32 +49,28 @@ fn from_delimited_stream(
.collect()
};

let n = headers.len();
let columns = headers
.into_iter()
.chain((n..).map(|i| format!("column{i}")));
let iter = reader.into_records().map(move |row| {
let row = match row {
Ok(row) => row,
Err(err) => return Value::error(from_csv_error(err, span), span),
};
let columns = headers.iter().cloned();
let values = row
.into_iter()
.map(|s| {
if no_infer {
Value::string(s, span)
} else if let Ok(i) = s.parse() {
Value::int(i, span)
} else if let Ok(f) = s.parse() {
Value::float(f, span)
} else {
Value::string(s, span)
}
})
.chain(std::iter::repeat(Value::nothing(span)));
let columns = columns.clone();
let values = row.into_iter().map(|s| {
if no_infer {
Value::string(s, span)
} else if let Ok(i) = s.parse() {
Value::int(i, span)
} else if let Ok(f) = s.parse() {
Value::float(f, span)
} else {
Value::string(s, span)
}
});

// If there are more values than the number of headers,
// then the remaining values are ignored.
//
// Otherwise, if there are less values than headers,
// then `Value::nothing(span)` is used to fill the remaining columns.
Value::record(columns.zip(values).collect(), span)
});

Expand Down
20 changes: 19 additions & 1 deletion crates/nu-command/src/formats/from/tsv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ impl Command for FromTsv {

fn signature(&self) -> Signature {
Signature::build("from tsv")
.input_output_types(vec![(Type::String, Type::table())])
.input_output_types(vec![
(Type::String, Type::table()),
(Type::String, Type::list(Type::Any)),
])
.named(
"comment",
SyntaxShape::String,
Expand Down Expand Up @@ -76,6 +79,21 @@ impl Command for FromTsv {
})],
))
},
Example {
description: "Convert comma-separated data to a table, allowing variable number of columns per row and ignoring headers",
example: "\"value 1\nvalue 2\tdescription 2\" | from tsv --flexible --noheaders",
result: Some(Value::test_list (
vec![
Value::test_record(record! {
"column0" => Value::test_string("value 1"),
}),
Value::test_record(record! {
"column0" => Value::test_string("value 2"),
"column1" => Value::test_string("description 2"),
}),
],
))
},
Example {
description: "Create a tsv file with header columns and open it",
example: r#"$'c1(char tab)c2(char tab)c3(char nl)1(char tab)2(char tab)3' | save tsv-data | open tsv-data | from tsv"#,
Expand Down
4 changes: 2 additions & 2 deletions crates/nu-command/tests/format_conversions/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ fn from_csv_test_flexible_extra_vals() {
echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, 2]");
assert_eq!(actual.out, "[1, 2, 3]");
}

#[test]
Expand All @@ -479,5 +479,5 @@ fn from_csv_test_flexible_missing_vals() {
echo "a,b\n1" | from csv --flexible | first | values | to nuon
"#
));
assert_eq!(actual.out, "[1, null]");
assert_eq!(actual.out, "[1]");
}

0 comments on commit 5f7082f

Please sign in to comment.