From 28db934698842903dae936e746a1c72ead1e579a Mon Sep 17 00:00:00 2001 From: yutopp Date: Thu, 31 Aug 2023 15:28:12 +0900 Subject: [PATCH] Google Spreadsheet Data Source: support to find a worksheet by title (#5334) * Support to find a worksheet by the title for google_spreadsheets * Add tests for parse_query * Fix types * Add tests when finding a worksheet by a title is failed * Proxy by a wrapper instead of using spreadsheet directly. Do not use gspread in tests * Add tests. Fix format of quoted strings for titles * Fix an error format * Add a newline * Fix formatting --------- Co-authored-by: Justin Clift --- redash/query_runner/google_spreadsheets.py | 64 +++++++++++++++---- .../query_runner/test_google_spreadsheets.py | 43 ++++++++++++- 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/redash/query_runner/google_spreadsheets.py b/redash/query_runner/google_spreadsheets.py index 03b59c9631..4c6b9fc19a 100644 --- a/redash/query_runner/google_spreadsheets.py +++ b/redash/query_runner/google_spreadsheets.py @@ -1,4 +1,5 @@ import logging +import re from base64 import b64decode from dateutil import parser @@ -22,6 +23,7 @@ try: import gspread from gspread.exceptions import APIError + from gspread.exceptions import WorksheetNotFound as GSWorksheetNotFound from oauth2client.service_account import ServiceAccountCredentials enabled = True @@ -88,14 +90,27 @@ def __init__(self, worksheet_num, worksheet_count): super(WorksheetNotFoundError, self).__init__(message) +class WorksheetNotFoundByTitleError(Exception): + def __init__(self, worksheet_title): + message = "Worksheet title '{}' not found.".format(worksheet_title) + super(WorksheetNotFoundByTitleError, self).__init__(message) + + def parse_query(query): values = query.split("|") key = values[0] # key of the spreadsheet - worksheet_num = ( - 0 if len(values) != 2 else int(values[1]) - ) # if spreadsheet contains more than one worksheet - this is the number of it + worksheet_num_or_title = 0 # A default value for when a number of inputs is invalid + if len(values) == 2: + s = values[1].strip() + if len(s) > 0: + if re.match(r"^\"(.*?)\"$", s): + # A string quoted by " means a title of worksheet + worksheet_num_or_title = s[1:-1] + else: + # if spreadsheet contains more than one worksheet - this is the number of it + worksheet_num_or_title = int(s) - return key, worksheet_num + return key, worksheet_num_or_title def parse_worksheet(worksheet): @@ -115,15 +130,21 @@ def parse_worksheet(worksheet): return data -def parse_spreadsheet(spreadsheet, worksheet_num): - worksheets = spreadsheet.worksheets() - worksheet_count = len(worksheets) - if worksheet_num >= worksheet_count: - raise WorksheetNotFoundError(worksheet_num, worksheet_count) +def parse_spreadsheet(spreadsheet, worksheet_num_or_title): + worksheet = None + if type(worksheet_num_or_title) is int: + worksheet = spreadsheet.get_worksheet_by_index(worksheet_num_or_title) + if worksheet is None: + worksheet_count = len(spreadsheet.worksheets()) + raise WorksheetNotFoundError(worksheet_num_or_title, worksheet_count) + elif type(worksheet_num_or_title) is str: + worksheet = spreadsheet.get_worksheet_by_title(worksheet_num_or_title) + if worksheet is None: + raise WorksheetNotFoundByTitleError(worksheet_num_or_title) - worksheet = worksheets[worksheet_num].get_all_values() + worksheet_values = worksheet.get_all_values() - return parse_worksheet(worksheet) + return parse_worksheet(worksheet_values) def is_url_key(key): @@ -141,6 +162,23 @@ def parse_api_error(error): return message +class SpreadsheetWrapper: + def __init__(self, spreadsheet): + self.spreadsheet = spreadsheet + + def worksheets(self): + return self.spreadsheet.worksheets() + + def get_worksheet_by_index(self, index): + return self.spreadsheet.get_worksheet(index) + + def get_worksheet_by_title(self, title): + try: + return self.spreadsheet.worksheet(title) + except GSWorksheetNotFound: + return None + + class TimeoutSession(Session): def request(self, *args, **kwargs): kwargs.setdefault("timeout", 300) @@ -198,7 +236,7 @@ def test_connection(self): def run_query(self, query, user): logger.debug("Spreadsheet is about to execute query: %s", query) - key, worksheet_num = parse_query(query) + key, worksheet_num_or_title = parse_query(query) try: spreadsheet_service = self._get_spreadsheet_service() @@ -208,7 +246,7 @@ def run_query(self, query, user): else: spreadsheet = spreadsheet_service.open_by_key(key) - data = parse_spreadsheet(spreadsheet, worksheet_num) + data = parse_spreadsheet(SpreadsheetWrapper(spreadsheet), worksheet_num_or_title) return json_dumps(data), None except gspread.SpreadsheetNotFound: diff --git a/tests/query_runner/test_google_spreadsheets.py b/tests/query_runner/test_google_spreadsheets.py index e9eca55950..6f02e10e1a 100644 --- a/tests/query_runner/test_google_spreadsheets.py +++ b/tests/query_runner/test_google_spreadsheets.py @@ -7,6 +7,7 @@ from redash.query_runner.google_spreadsheets import ( TYPE_BOOLEAN, TYPE_STRING, + WorksheetNotFoundByTitleError, WorksheetNotFoundError, _get_columns_and_column_names, _value_eval_list, @@ -51,10 +52,14 @@ def test_returns_meaningful_error_for_missing_worksheet(self): spreadsheet = MagicMock() spreadsheet.worksheets = MagicMock(return_value=[]) + spreadsheet.get_worksheet_by_index = MagicMock(return_value=None) self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 0) - spreadsheet.worksheets = MagicMock(return_value=[1, 2]) - self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 2) + def test_returns_meaningful_error_for_missing_worksheet_by_title(self): + spreadsheet = MagicMock() + + spreadsheet.get_worksheet_by_title = MagicMock(return_value=None) + self.assertRaises(WorksheetNotFoundByTitleError, parse_spreadsheet, spreadsheet, "a") empty_worksheet = [] @@ -104,6 +109,40 @@ def test_parse_query(self): parsed = parse_query("key|0") self.assertEqual(("key", 0), parsed) + def test_parse_query_ignored(self): + parsed = parse_query("key") + self.assertEqual(("key", 0), parsed) + + parsed = parse_query("key|") + self.assertEqual(("key", 0), parsed) + + parsed = parse_query("key|1|") + self.assertEqual(("key", 0), parsed) + + def test_parse_query_title(self): + parsed = parse_query('key|""') + self.assertEqual(("key", ""), parsed) + + parsed = parse_query('key|"1"') + self.assertEqual(("key", "1"), parsed) + + parsed = parse_query('key|"abc"') + self.assertEqual(("key", "abc"), parsed) + + parsed = parse_query('key|"あ"') + self.assertEqual(("key", "あ"), parsed) + + parsed = parse_query('key|"1""') + self.assertEqual(("key", '1"'), parsed) + + parsed = parse_query('key|""') + self.assertEqual(("key", ""), parsed) + + def test_parse_query_failed(self): + self.assertRaises(ValueError, parse_query, "key|0x01") + self.assertRaises(ValueError, parse_query, "key|a") + self.assertRaises(ValueError, parse_query, 'key|""a') + class TestGetColumnsAndColumnNames(TestCase): def test_get_columns(self):