feat: forbid schema_sample_rows=0 (#304)

* fix: forbid schema_sample_rows=0 closes #236 Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * docs: update docstrings Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> --------- Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
ToucanToco · Oct 15, 2024 · 17f0186 · 17f0186
1 parent 152ff50
commit 17f0186
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 2 deletions.
diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
@@ -233,7 +233,8 @@ def load_sheet(
                             - if `skip_rows` is a number, it skips the specified number
                             of rows from the start of the sheet.
         :param schema_sample_rows: Specifies how many rows should be used to determine
-                                   the dtype of a column.
+                                   the dtype of a column. Cannot be 0. A specific dtype can be
+                                   enforced for some or all columns through the `dtypes` parameter.
                                    If `None`, all rows will be used.
         :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
                                will try to coerce different dtypes in a column to the same one,
@@ -336,7 +337,8 @@ def load_table(
                           If `header_row` is `None`, it skips the number of rows from the
                           start of the sheet.
         :param schema_sample_rows: Specifies how many rows should be used to determine
-                                   the dtype of a column.
+                                   the dtype of a column. Cannot be 0. A specific dtype can be
+                                   enforced for some or all columns through the `dtypes` parameter.
                                    If `None`, all rows will be used.
         :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
                                will try to coerce different dtypes in a column to the same one,

diff --git a/python/tests/test_errors.py b/python/tests/test_errors.py
@@ -78,3 +78,19 @@ def test_sheet_name_not_found_error() -> None:
 )
 def test_docstrings(exc_class: type[Exception], expected_docstring: str) -> None:
     assert exc_class.__doc__ == expected_docstring
+
+
+def test_schema_sample_rows_must_be_nonzero() -> None:
+    excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
+
+    with pytest.raises(
+        fastexcel.InvalidParametersError,
+        match="schema_sample_rows cannot be 0, as it would prevent dtype inferring",
+    ):
+        excel_reader.load_sheet(0, schema_sample_rows=0)
+
+    with pytest.raises(
+        fastexcel.InvalidParametersError,
+        match="schema_sample_rows cannot be 0, as it would prevent dtype inferring",
+    ):
+        excel_reader.load_table("my-table", schema_sample_rows=0)
diff --git a/src/types/python/excelreader.rs b/src/types/python/excelreader.rs
@@ -350,6 +350,14 @@ impl ExcelReader {
         eager: bool,
         py: Python<'_>,
     ) -> PyResult<PyObject> {
+        // Cannot use NonZeroUsize in the parameters, as it is not supported by pyo3
+        if let Some(0) = schema_sample_rows {
+            return Err(FastExcelErrorKind::InvalidParameters(
+                "schema_sample_rows cannot be 0, as it would prevent dtype inferring".to_string(),
+            )
+            .into())
+            .into_pyresult();
+        }
         let sheet = idx_or_name
             .try_into()
             .and_then(|idx_or_name| match idx_or_name {
@@ -420,6 +428,14 @@ impl ExcelReader {
         eager: bool,
         py: Python<'_>,
     ) -> PyResult<PyObject> {
+        // Cannot use NonZeroUsize in the parameters, as it is not supported by pyo3
+        if let Some(0) = schema_sample_rows {
+            return Err(FastExcelErrorKind::InvalidParameters(
+                "schema_sample_rows cannot be 0, as it would prevent dtype inferring".to_string(),
+            )
+            .into())
+            .into_pyresult();
+        }
         self.build_table(
             name.to_string(),
             header_row,