From b5c50a6d00e84253f10ea0905eb9a682873d409b Mon Sep 17 00:00:00 2001 From: rdeshmukh15 <107538720+rdeshmukh15@users.noreply.github.com> Date: Tue, 12 Mar 2024 14:48:44 +0530 Subject: [PATCH] removes date datatype support from the tap code (#95) * Bump version 3.0.0 * Removes date support from the tap code * Fixes unit tests * Fixes datatype integration test * Update null time unittest --------- Co-authored-by: RushiT0122 --- CHANGELOG.md | 3 +++ README.md | 8 ++++---- setup.py | 2 +- tap_google_sheets/schema.py | 14 ++++---------- tap_google_sheets/transform.py | 17 ----------------- tests/test_google_sheets_datatypes.py | 4 +--- tests/unittests/test_null_cell_format.py | 2 +- 7 files changed, 14 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8ba0d2..7ab2220 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 3.0.0 + * Remove support for date datatype [#95](https://github.com/singer-io/tap-google-sheets/pull/95) + ## 2.1.0 * Updates to run on python 3.11.7 [#94](https://github.com/singer-io/tap-google-sheets/pull/94) diff --git a/README.md b/README.md index b813b5f..772c28d 100644 --- a/README.md +++ b/README.md @@ -48,13 +48,13 @@ This tap: - Invalid types: formulaValue, errorValue - Then check: - [effectiveFormat.numberFormat.type](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType) - - Valid types: UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, DATE, TIME, DATE_TIME, SCIENTIFIC + - Valid types: UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, TIME, DATE_TIME, SCIENTIFIC - Determine JSON schema column data type based on the value and the above cell metadata settings. - - If DATE, DATE_TIME, or TIME, set JSON schema format accordingly + - If DATE_TIME, or TIME, set JSON schema format accordingly [**values (GET)**](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/get) - Endpoint: https://sheets.googleapis.com/v4/spreadsheets/${spreadsheet_id}/values/'${sheet_name}'!${row_range}?dateTimeRenderOption=SERIAL_NUMBER&valueRenderOption=UNFORMATTED_VALUE&majorDimension=ROWS -- This endpoint loops through sheets and row ranges to get the [unformatted values](https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption) (effective values only), dates and datetimes as [serial numbers](https://developers.google.com/sheets/api/reference/rest/v4/DateTimeRenderOption) +- This endpoint loops through sheets and row ranges to get the [unformatted values](https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption) (effective values only), datetimes as [serial numbers](https://developers.google.com/sheets/api/reference/rest/v4/DateTimeRenderOption) - Primary keys: _sdc_row - Replication strategy: Full (GET file audit data for spreadsheet_id in config) - Process/Transformations: @@ -62,7 +62,7 @@ This tap: - Send metadata for sheet - Loop through ALL columns for columns having a column header - Loop through ranges of rows for ALL rows in sheet available area max row (from sheet metadata) - - Transform values, if necessary (dates, date-times, times, boolean). + - Transform values, if necessary (date-times, times, boolean). - Date/time serial numbers converted to date, date-time, and time strings. Google Sheets uses Lotus 1-2-3 [Serial Number](https://developers.google.com/sheets/api/reference/rest/v4/DateTimeRenderOption) format for date/times. These are converted to normal UTC date-time strings. - Process/send records to target diff --git a/setup.py b/setup.py index 5ce7c58..3f42238 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup(name='tap-google-sheets', - version='2.1.0', + version='3.0.0', description='Singer.io tap for extracting data from the Google Sheets v4 API', author='jeff.huth@bytecode.io', classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py index d5d0676..c7f79a1 100644 --- a/tap_google_sheets/schema.py +++ b/tap_google_sheets/schema.py @@ -123,7 +123,7 @@ def get_sheet_schema_columns(sheet): # INVALID: errorType, formulaType # https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ExtendedValue # - # column_number_format_type = UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, DATE, + # column_number_format_type = UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, # TIME, DATE_TIME, SCIENTIFIC # https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType # @@ -136,18 +136,12 @@ def get_sheet_schema_columns(sheet): col_properties = {'type': ['null', 'boolean', 'string']} column_gs_type = 'boolValue' elif column_effective_value_type == 'numberValue': - if column_number_format_type == 'DATE_TIME': + if column_number_format_type in ['DATE_TIME', 'DATE']: col_properties = { 'type': ['null', 'string'], 'format': 'date-time' } column_gs_type = 'numberType.DATE_TIME' - elif column_number_format_type == 'DATE': - col_properties = { - 'type': ['null', 'string'], - 'format': 'date' - } - column_gs_type = 'numberType.DATE' elif column_number_format_type == 'TIME': col_properties = { 'type': ['null', 'string'], @@ -215,11 +209,11 @@ def get_sheet_schema_columns(sheet): } columns.append(column) - if column_gs_type in {'numberType.DATE_TIME', 'numberType.DATE', 'numberType.TIME', 'numberType'}: + if column_gs_type in {'numberType.DATE_TIME', 'numberType.TIME', 'numberType'}: col_properties = { 'anyOf': [ col_properties, - {'type': ['null', 'string']} # all the date, time has string types in schema + {'type': ['null', 'string']} # all the time has string types in schema ] } # add the column properties in the `properties` in json schema for the respective column name diff --git a/tap_google_sheets/transform.py b/tap_google_sheets/transform.py index 47f2f36..cebeca2 100644 --- a/tap_google_sheets/transform.py +++ b/tap_google_sheets/transform.py @@ -80,19 +80,6 @@ def transform_sheet_datetime_data(value, unformatted_value, sheet_title, col_nam sheet_title, col_name, col_letter, row_num, col_type)) return str(value) -# transform date values in the sheet -def transform_sheet_date_data(value, unformatted_value, sheet_title, col_name, col_letter, row_num, col_type): - if isinstance(unformatted_value, (int, float)): - # passing both the formatted as well as the unformatted value, so we can use the string value in - # case of any errors while date transform - date_str, is_error = excel_to_dttm_str(value, unformatted_value) - return_str = date_str if is_error else date_str[:10] - return return_str - else: - LOGGER.info('WARNING: POSSIBLE DATA TYPE ERROR; SHEET: {}, COL: {}, CELL: {}{}, TYPE: {}'.format( - sheet_title, col_name, col_letter, row_num, col_type)) - return str(value) - # transform time values in the sheet def transform_sheet_time_data(value, unformatted_value, sheet_title, col_name, col_letter, row_num, col_type): if isinstance(unformatted_value, (int, float)): @@ -231,10 +218,6 @@ def get_column_value(value, unformatted_value, sheet_title, col_name, col_letter elif col_type == 'numberType.DATE_TIME': return transform_sheet_datetime_data(value, unformatted_value, sheet_title, col_name, col_letter, row_num, col_type) - # DATE - elif col_type == 'numberType.DATE': - return transform_sheet_date_data(value, unformatted_value, sheet_title, col_name, col_letter, row_num, col_type) - # TIME ONLY (NO DATE) elif col_type == 'numberType.TIME': return transform_sheet_time_data(value, unformatted_value, sheet_title, col_name, col_letter, row_num, col_type) diff --git a/tests/test_google_sheets_datatypes.py b/tests/test_google_sheets_datatypes.py index 20f8858..74a352d 100644 --- a/tests/test_google_sheets_datatypes.py +++ b/tests/test_google_sheets_datatypes.py @@ -164,8 +164,7 @@ def test_run(self): } string_column_formats = { "Datetime": "%Y-%m-%dT%H:%M:%S.%fZ", - "Time": "%H:%M:%S", - "Date": "%Y-%m-%d", + "Time": "%H:%M:%S" } for record in record_data: @@ -207,7 +206,6 @@ def test_run(self): "Currency": "stringValue", "Datetime": "numberType.DATE_TIME", "Time": "numberType.TIME", - "Date": "numberType.DATE", "String": "stringValue", "Number": "numberType", "Boolean": "boolValue", diff --git a/tests/unittests/test_null_cell_format.py b/tests/unittests/test_null_cell_format.py index 65d5260..0b2f4e1 100644 --- a/tests/unittests/test_null_cell_format.py +++ b/tests/unittests/test_null_cell_format.py @@ -74,7 +74,7 @@ def test_null_date_effectiveFormat(self): "null", "string" ], - "format": "date" + "format": "date-time" } sheet_json_schema, columns = schema.get_sheet_schema_columns(sheet)