Skip to content

Commit

Permalink
[file-based cdk] Remove CSV quoting_behavior config option (#29388)
Browse files Browse the repository at this point in the history
* remove CSV quoting_behavior config option

* cleanup after getting latest master
  • Loading branch information
brianjlai authored Aug 15, 2023
1 parent 82b8274 commit 5908b85
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
from typing_extensions import Literal


class QuotingBehavior(Enum):
QUOTE_ALL = "Quote All"
QUOTE_SPECIAL_CHARACTERS = "Quote Special Characters"
QUOTE_NONNUMERIC = "Quote Non-numeric"
QUOTE_NONE = "Quote None"


class InferenceType(Enum):
NONE = "None"
PRIMITIVE_TYPES_ONLY = "Primitive Types Only"
Expand Down Expand Up @@ -53,11 +46,6 @@ class Config:
double_quote: bool = Field(
title="Double Quote", default=True, description="Whether two quotes in a quoted CSV value denote a single quote in the data."
)
quoting_behavior: QuotingBehavior = Field(
title="Quoting Behavior",
default=QuotingBehavior.QUOTE_SPECIAL_CHARACTERS,
description="The quoting behavior determines when a value in a row should have quote marks added around it. For example, if Quote Non-numeric is specified, while reading, quotes are expected for row values that do not contain numbers. Or for Quote All, every row value will be expecting quotes.",
)
null_values: Set[str] = Field(
title="Null Values",
default=[],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from io import IOBase
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set

from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat, InferenceType, QuotingBehavior
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat, InferenceType
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
Expand All @@ -21,13 +21,6 @@

DIALECT_NAME = "_config_dialect"

config_to_quoting: Mapping[QuotingBehavior, int] = {
QuotingBehavior.QUOTE_ALL: csv.QUOTE_ALL,
QuotingBehavior.QUOTE_SPECIAL_CHARACTERS: csv.QUOTE_MINIMAL,
QuotingBehavior.QUOTE_NONNUMERIC: csv.QUOTE_NONNUMERIC,
QuotingBehavior.QUOTE_NONE: csv.QUOTE_NONE,
}


class _CsvReader:
def read_data(
Expand All @@ -50,7 +43,7 @@ def read_data(
quotechar=config_format.quote_char,
escapechar=config_format.escape_char,
doublequote=config_format.double_quote,
quoting=config_to_quoting.get(config_format.quoting_behavior, csv.QUOTE_MINIMAL),
quoting=csv.QUOTE_MINIMAL,
)
with stream_reader.open_file(file, file_read_mode, config_format.encoding, logger) as fp:
headers = self._get_headers(fp, config_format, dialect_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,23 @@
from typing import Any, Mapping, Type

import pytest as pytest
from airbyte_cdk.sources.file_based.config.csv_format import QuotingBehavior
from airbyte_cdk.sources.file_based.config.file_based_stream_config import CsvFormat, FileBasedStreamConfig
from pydantic import ValidationError


@pytest.mark.parametrize(
"file_type, input_format, expected_format, expected_error",
[
pytest.param("csv", {"filetype": "csv", "delimiter": "d", "quote_char": "q", "escape_char": "e", "encoding": "ascii", "double_quote": True, "quoting_behavior": "Quote All"}, {"filetype": "csv", "delimiter": "d", "quote_char": "q", "escape_char": "e", "encoding": "ascii", "double_quote": True, "quoting_behavior": QuotingBehavior.QUOTE_ALL}, None, id="test_valid_format"),
pytest.param("csv", {"filetype": "csv", "double_quote": False}, {"delimiter": ",", "quote_char": "\"", "encoding": "utf8", "double_quote": False, "quoting_behavior": QuotingBehavior.QUOTE_SPECIAL_CHARACTERS}, None, id="test_default_format_values"),
pytest.param("csv", {"filetype": "csv", "delimiter": "d", "quote_char": "q", "escape_char": "e", "encoding": "ascii", "double_quote": True}, {"filetype": "csv", "delimiter": "d", "quote_char": "q", "escape_char": "e", "encoding": "ascii", "double_quote": True}, None, id="test_valid_format"),
pytest.param("csv", {"filetype": "csv", "double_quote": False}, {"delimiter": ",", "quote_char": "\"", "encoding": "utf8", "double_quote": False}, None, id="test_default_format_values"),
pytest.param("csv", {"filetype": "csv", "delimiter": "nope", "double_quote": True}, None, ValidationError, id="test_invalid_delimiter"),
pytest.param("csv", {"filetype": "csv", "quote_char": "nope", "double_quote": True}, None, ValidationError, id="test_invalid_quote_char"),
pytest.param("csv", {"filetype": "csv", "escape_char": "nope", "double_quote": True}, None, ValidationError, id="test_invalid_escape_char"),
pytest.param("csv", {"filetype": "csv", "delimiter": ",", "quote_char": "\"", "encoding": "not_a_format", "double_quote": True}, {}, ValidationError, id="test_invalid_encoding_type"),
pytest.param("csv", {"filetype": "csv", "double_quote": True, "quoting_behavior": "Quote Invalid"}, None, ValidationError, id="test_invalid_quoting_behavior"),
pytest.param("invalid", {"filetype": "invalid", "double_quote": False}, {}, ValidationError, id="test_config_format_file_type_mismatch"),
]
)
def test_csv_config(file_type: str, input_format: Mapping[str, Any], expected_format: Mapping[str, QuotingBehavior], expected_error: Type[Exception]) -> None:
def test_csv_config(file_type: str, input_format: Mapping[str, Any], expected_format: Mapping[str, Any], expected_error: Type[Exception]) -> None:
stream_config = {
"name": "stream1",
"file_type": file_type,
Expand Down Expand Up @@ -58,7 +56,6 @@ def test_invalid_validation_policy() -> None:
"escape_char": "e",
"encoding": "ascii",
"double_quote": True,
"quoting_behavior": "Quote All"
},
}
with pytest.raises(ValidationError):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,6 @@
"default": True,
"type": "boolean",
},
"quoting_behavior": {
"title": "Quoting Behavior",
"description": "The quoting behavior determines when a value in a row should have quote marks added around it. For example, if Quote Non-numeric is specified, while reading, quotes are expected for row values that do not contain numbers. Or for Quote All, every row value will be expecting quotes.",
"default": "Quote Special Characters",
"enum": ["Quote All", "Quote Special Characters", "Quote Non-numeric", "Quote None"],
},
"null_values": {
"title": "Null Values",
"description": "A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.",
Expand Down Expand Up @@ -781,7 +775,6 @@
"quote_char": "|",
"escape_char": "!",
"double_quote": True,
"quoting_behavior": "Quote Special Characters",
},
}
]
Expand Down Expand Up @@ -896,7 +889,6 @@
"escape_char": "@",
"double_quote": True,
"newlines_in_values": False,
"quoting_behavior": "Quote All",
},
},
]
Expand Down Expand Up @@ -1733,7 +1725,9 @@
"file_type": "csv",
"globs": ["*"],
"validation_policy": "Emit Record",
"format": {"filetype": "csv", "quoting_behavior": "Quote All"},
"format": {
"filetype": "csv",
}
}
],
"start_date": "2023-06-04T03:54:07.000000Z",
Expand Down Expand Up @@ -1883,8 +1877,7 @@
"quote_char": '"',
"delimiter": ",",
"escape_char": "\\",
"quoting_behavior": "Quote All",
},
}
}
],
"start_date": "2023-06-04T03:54:07.000000Z",
Expand Down Expand Up @@ -1955,8 +1948,7 @@
"double_quotes": True,
"quote_char": '"',
"delimiter": ",",
"quoting_behavior": "Quote All",
},
}
}
],
"start_date": "2023-06-04T03:54:07.000000Z",
Expand Down

0 comments on commit 5908b85

Please sign in to comment.