Skip to content

Commit

Permalink
feat: forbid schema_sample_rows=0 (#304)
Browse files Browse the repository at this point in the history
* fix: forbid schema_sample_rows=0

closes #236

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>

* docs: update docstrings

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>

---------

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
  • Loading branch information
lukapeschke authored Oct 15, 2024
1 parent 152ff50 commit 17f0186
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
6 changes: 4 additions & 2 deletions python/fastexcel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ def load_sheet(
- if `skip_rows` is a number, it skips the specified number
of rows from the start of the sheet.
:param schema_sample_rows: Specifies how many rows should be used to determine
the dtype of a column.
the dtype of a column. Cannot be 0. A specific dtype can be
enforced for some or all columns through the `dtypes` parameter.
If `None`, all rows will be used.
:param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
will try to coerce different dtypes in a column to the same one,
Expand Down Expand Up @@ -336,7 +337,8 @@ def load_table(
If `header_row` is `None`, it skips the number of rows from the
start of the sheet.
:param schema_sample_rows: Specifies how many rows should be used to determine
the dtype of a column.
the dtype of a column. Cannot be 0. A specific dtype can be
enforced for some or all columns through the `dtypes` parameter.
If `None`, all rows will be used.
:param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
will try to coerce different dtypes in a column to the same one,
Expand Down
16 changes: 16 additions & 0 deletions python/tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,19 @@ def test_sheet_name_not_found_error() -> None:
)
def test_docstrings(exc_class: type[Exception], expected_docstring: str) -> None:
assert exc_class.__doc__ == expected_docstring


def test_schema_sample_rows_must_be_nonzero() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))

with pytest.raises(
fastexcel.InvalidParametersError,
match="schema_sample_rows cannot be 0, as it would prevent dtype inferring",
):
excel_reader.load_sheet(0, schema_sample_rows=0)

with pytest.raises(
fastexcel.InvalidParametersError,
match="schema_sample_rows cannot be 0, as it would prevent dtype inferring",
):
excel_reader.load_table("my-table", schema_sample_rows=0)
16 changes: 16 additions & 0 deletions src/types/python/excelreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,14 @@ impl ExcelReader {
eager: bool,
py: Python<'_>,
) -> PyResult<PyObject> {
// Cannot use NonZeroUsize in the parameters, as it is not supported by pyo3
if let Some(0) = schema_sample_rows {
return Err(FastExcelErrorKind::InvalidParameters(
"schema_sample_rows cannot be 0, as it would prevent dtype inferring".to_string(),
)
.into())
.into_pyresult();
}
let sheet = idx_or_name
.try_into()
.and_then(|idx_or_name| match idx_or_name {
Expand Down Expand Up @@ -420,6 +428,14 @@ impl ExcelReader {
eager: bool,
py: Python<'_>,
) -> PyResult<PyObject> {
// Cannot use NonZeroUsize in the parameters, as it is not supported by pyo3
if let Some(0) = schema_sample_rows {
return Err(FastExcelErrorKind::InvalidParameters(
"schema_sample_rows cannot be 0, as it would prevent dtype inferring".to_string(),
)
.into())
.into_pyresult();
}
self.build_table(
name.to_string(),
header_row,
Expand Down

0 comments on commit 17f0186

Please sign in to comment.