Skip to content

Commit

Permalink
remove fixed schema parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
ohadmata committed May 29, 2024
1 parent ee7bea3 commit 2175ba1
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 14 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,7 @@ shmessy.infer_schema(df: Dataframe) -> ShmessySchema

### fix_schema
```python
shmessy.fix_schema(
df: Dataframe,
fixed_schema: Optional[ShmessySchema] = None, # Fix the given DF according to this schema
) -> DataFrame
shmessy.fix_schema(df: Dataframe) -> DataFrame
```

### get_inferred_schema
Expand Down
2 changes: 1 addition & 1 deletion examples/read_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
init_logger()
shmessy = Shmessy()
start_time = time.time()
df = shmessy.read_csv('../tests/data/data_1.csv')
df = shmessy.read_csv('../tests/data/data_30.csv')
inferred_schema = shmessy.get_inferred_schema()
pretty_print_df(df=df, inferred_schema=inferred_schema)
print(f"Duration: {int((time.time() - start_time) * 1000)}")
11 changes: 2 additions & 9 deletions src/shmessy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,10 @@ def infer_schema(self, df: DataFrame) -> ShmessySchema:
self.__inferred_schema = inferred_schema
return inferred_schema

def fix_schema(
self,
df: DataFrame,
*,
fixed_schema: Optional[ShmessySchema] = None,
) -> DataFrame:
def fix_schema(self, df: DataFrame) -> DataFrame:
try:
_check_number_of_columns(df=df, max_columns_num=self.__max_columns_num)

if fixed_schema is None:
fixed_schema = self.infer_schema(df)
fixed_schema = self.infer_schema(df)

for column in fixed_schema.columns:
df[column.field_name] = self.__types_handler.fix_field(
Expand Down

0 comments on commit 2175ba1

Please sign in to comment.