diff --git a/src/data_selector/cli.py b/src/data_selector/cli.py index 49c6be2..7a473b0 100644 --- a/src/data_selector/cli.py +++ b/src/data_selector/cli.py @@ -67,6 +67,15 @@ def version(): default=',', help="File separator (csv).", ) +@click.option( + "-r", + "--nbOfRows", + "nb_rows", + type=int, + required=False, + default=10, + help="Number of rows to import from input_file.", +) @click.option( "-S", "--select", @@ -101,6 +110,7 @@ def select_cli( path_columns_to_delete: str, path_to_data_and_columns: str, file_sep: str, + nb_rows: int, data_frame=None ): """Start service to select Data to Keep/Delete""" @@ -110,11 +120,12 @@ def select_cli( overwrite, file_format_in, file_format_out, + nb_rows, path_columns_to_keep, path_columns_to_delete, path_to_data_and_columns, file_sep, - data_frame + data_frame=data_frame ) diff --git a/src/data_selector/selector.py b/src/data_selector/selector.py index 3950367..489638a 100644 --- a/src/data_selector/selector.py +++ b/src/data_selector/selector.py @@ -12,6 +12,7 @@ def select( overwrite: bool, input_format: str, format_choice: str, + nb_rows=1000, path_columns_to_keep=None, path_columns_to_delete=None, path_to_data_and_columns=None, @@ -26,7 +27,7 @@ def select( user for the choices. """ if data_frame is None: - data_frame = pd.read_csv(input_file) + data_frame = pd.read_csv(input_file, nrows=nb_rows, engine='python', sep=file_sep) print("\nINPUT :\n" + str(len(data_frame)) + " rows") print(str(len(data_frame.columns)) + " columns") @@ -184,6 +185,7 @@ def select_data_and_column( for column in param_dict['column_names'].keys(): for val in param_dict["column_names"][column]['value']: list_inter_value.append(data_frame[data_frame[column] == val]) + print(val) list_inter_column.append(pd.concat(list_inter_value)) list_inter_value = []