From f5191b8affff08a66baadde175f0b1152896491d Mon Sep 17 00:00:00 2001 From: ohadmata Date: Tue, 30 Jan 2024 14:12:24 +0200 Subject: [PATCH] float casting issue --- assets/coverage.svg | 4 ++-- src/shmessy/types/float.py | 23 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/assets/coverage.svg b/assets/coverage.svg index 3438732..ee07d4c 100644 --- a/assets/coverage.svg +++ b/assets/coverage.svg @@ -15,7 +15,7 @@ coverage coverage - 97% - 97% + 96% + 96% diff --git a/src/shmessy/types/float.py b/src/shmessy/types/float.py index f82fe8c..f567573 100644 --- a/src/shmessy/types/float.py +++ b/src/shmessy/types/float.py @@ -1,11 +1,12 @@ import locale import logging -from typing import Optional +from typing import Any, Optional, Tuple from numpy import ndarray from pandas import Series, to_numeric from pandas.api.types import is_numeric_dtype +from ..exceptions import FieldCastingException from ..schema import InferredField from .base import BaseType @@ -30,7 +31,25 @@ def validate(self, data: ndarray) -> Optional[InferredField]: def fix(self, column: Series, inferred_field: InferredField) -> Series: if is_numeric_dtype(column): return column - return to_numeric(column.apply(locale.atof)) + try: + return to_numeric(column.apply(locale.atof)) + except Exception as e: + logger.debug(f"Couldn't cast column to type {self.name}: {e}") + line_number, bad_value = self._extract_bad_value(column) + raise FieldCastingException( + type_=self.name, line_number=line_number, bad_value=bad_value + ) + + @staticmethod + def _extract_bad_value(column: Series) -> Tuple[int, Any]: + for idx, row in enumerate(column): + try: + float(row) # noqa + except Exception: # noqa + return idx, row + + # If we reached this piece of code - The dtype is probably an object - do nothing! + raise NotImplementedError() def get_type() -> FloatType: