raulcd · raulcd · Nov 14, 2025 · Nov 13, 2025 · Nov 13, 2025 · Nov 13, 2025
diff --git a/src/datanomy/cli.py b/src/datanomy/cli.py
@@ -19,10 +19,6 @@ def main(file: Path) -> None:
     ----------
         file: Path to a Parquet file to inspect
     """
-    if not file.suffix.lower() == ".parquet":
-        click.echo(f"Error: {file} does not appear to be a Parquet file", err=True)
-        sys.exit(1)
-
     try:
         reader = ParquetReader(file)
         app = DatanomyApp(reader)

diff --git a/src/datanomy/reader/parquet.py b/src/datanomy/reader/parquet.py
@@ -4,6 +4,7 @@
 from typing import Any
 
 import pyarrow.parquet as pq
+from pyarrow.lib import ArrowInvalid
 
 
 class ParquetReader:
@@ -16,9 +17,22 @@ def __init__(self, file_path: Path) -> None:
         Parameters
         ----------
             file_path: Path to the Parquet file
-        """
-        self.file_path = file_path
-        self.parquet_file = pq.ParquetFile(file_path)
+
+        Raises
+        ------
+            FileNotFoundError: If the file does not exist
+            ArrowInvalid: If the file is not a valid Parquet file
+        """
+        if not file_path.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        try:
+            self.file_path = file_path
+            self.parquet_file = pq.ParquetFile(file_path)
+        except ArrowInvalid as e:
+            raise ArrowInvalid(
+                f"{file_path} does not appear to be a Parquet file"
+            ) from e
 
     @property
     def schema_arrow(self) -> Any:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -106,3 +106,33 @@ def large_schema_parquet(tmp_path: Path) -> Path:
     file_path = tmp_path / "large_schema.parquet"
     pq.write_table(table, file_path)
     return file_path
+
+
+@pytest.fixture
+def parquet_without_extension(tmp_path: Path) -> Path:
+    """Create a valid Parquet file without .parquet extension.
+
+    Returns:
+        Path to the created Parquet file
+    """
+    table = pa.table(
+        {
+            "id": [1, 2, 3],
+            "name": ["a", "b", "c"],
+        }
+    )
+    file_path = tmp_path / "data_file"
+    pq.write_table(table, file_path)
+    return file_path
+
+
+@pytest.fixture
+def invalid_parquet_file(tmp_path: Path) -> Path:
+    """Create a file with invalid Parquet content.
+
+    Returns:
+        Path to the created invalid file
+    """
+    file_path = tmp_path / "not_a_parquet.dat"
+    file_path.write_text("This is not a Parquet file")
+    return file_path
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -8,18 +8,6 @@
 from datanomy.cli import main
 
 
-def test_cli_rejects_non_parquet_extension(tmp_path: Path) -> None:
-    """Test that CLI rejects files without .parquet extension."""
-    bad_file = tmp_path / "test.txt"
-    bad_file.write_text("not parquet")
-
-    runner = CliRunner()
-    result = runner.invoke(main, [str(bad_file)])
-
-    assert result.exit_code == 1
-    assert "does not appear to be a Parquet file" in result.output
-
-
 def test_cli_rejects_nonexistent_file() -> None:
     """Test that CLI rejects files that don't exist."""
     runner = CliRunner()
@@ -60,19 +48,13 @@ def test_cli_creates_reader(
     mock_reader.assert_called_once_with(simple_parquet)
 
 
-def test_cli_case_insensitive_extension(tmp_path: Path) -> None:
-    """Test that CLI accepts .PARQUET extension (case insensitive)."""
-    # Create a valid parquet file with uppercase extension
-    import pyarrow as pa
-    import pyarrow.parquet as pq
-
-    file_path = tmp_path / "test.PARQUET"
-    table = pa.table({"id": [1, 2, 3]})
-    pq.write_table(table, file_path)
-
+def test_cli_accepts_parquet_without_extension(
+    parquet_without_extension: Path,
+) -> None:
+    """Test that CLI accepts valid Parquet files regardless of extension."""
     with patch("datanomy.cli.DatanomyApp"):
         runner = CliRunner()
-        result = runner.invoke(main, [str(file_path)])
+        result = runner.invoke(main, [str(parquet_without_extension)])
 
-        # Should accept uppercase extension
+        # Should accept file based on content, not extension
         assert result.exit_code == 0
diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -2,6 +2,9 @@
 
 from pathlib import Path
 
+import pytest
+from pyarrow.lib import ArrowInvalid
+
 from datanomy.reader.parquet import ParquetReader
 
 
@@ -59,3 +62,27 @@ def test_reader_large_schema(large_schema_parquet: Path) -> None:
     field_names = [field.name for field in reader.schema_arrow]
     for i in range(50):
         assert f"col_{i}" in field_names
+
+
+def test_reader_nonexistent_file(tmp_path: Path) -> None:
+    """Test that ParquetReader raises FileNotFoundError for nonexistent files."""
+    nonexistent = tmp_path / "nonexistent.parquet"
+
+    with pytest.raises(FileNotFoundError, match="File not found"):
+        ParquetReader(nonexistent)
+
+
+def test_reader_invalid_parquet_file(invalid_parquet_file: Path) -> None:
+    """Test that ParquetReader raises ArrowInvalid for non-Parquet files."""
+    with pytest.raises(ArrowInvalid, match="does not appear to be a Parquet file"):
+        ParquetReader(invalid_parquet_file)
+
+
+def test_reader_accepts_file_without_parquet_extension(
+    parquet_without_extension: Path,
+) -> None:
+    """Test that ParquetReader accepts valid Parquet files regardless of extension."""
+    # Should successfully read the file
+    reader = ParquetReader(parquet_without_extension)
+    assert reader.num_rows == 3
+    assert len(reader.schema_arrow) == 2