Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

float casting issue #75

Merged
merged 1 commit into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions assets/coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 21 additions & 2 deletions src/shmessy/types/float.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import locale
import logging
from typing import Optional
from typing import Any, Optional, Tuple

from numpy import ndarray
from pandas import Series, to_numeric
from pandas.api.types import is_numeric_dtype

from ..exceptions import FieldCastingException
from ..schema import InferredField
from .base import BaseType

Expand All @@ -30,7 +31,25 @@ def validate(self, data: ndarray) -> Optional[InferredField]:
def fix(self, column: Series, inferred_field: InferredField) -> Series:
if is_numeric_dtype(column):
return column
return to_numeric(column.apply(locale.atof))
try:
return to_numeric(column.apply(locale.atof))
except Exception as e:
logger.debug(f"Couldn't cast column to type {self.name}: {e}")
line_number, bad_value = self._extract_bad_value(column)
raise FieldCastingException(
type_=self.name, line_number=line_number, bad_value=bad_value
)

@staticmethod
def _extract_bad_value(column: Series) -> Tuple[int, Any]:
for idx, row in enumerate(column):
try:
float(row) # noqa
except Exception: # noqa
return idx, row

# If we reached this piece of code - The dtype is probably an object - do nothing!
raise NotImplementedError()


def get_type() -> FloatType:
Expand Down
Loading