Skip to content
This repository has been archived by the owner on Aug 19, 2024. It is now read-only.

Commit

Permalink
Google Spreadsheet Data Source: support to find a worksheet by title (g…
Browse files Browse the repository at this point in the history
…etredash#5334)

* Support to find a worksheet by the title for google_spreadsheets

* Add tests for parse_query

* Fix types

* Add tests when finding a worksheet by a title is failed

* Proxy by a wrapper instead of using spreadsheet directly. Do not use gspread in tests

* Add tests. Fix format of quoted strings for titles

* Fix an error format

* Add a newline

* Fix formatting

---------

Co-authored-by: Justin Clift <justin@postgresql.org>
  • Loading branch information
yutopp and justinclift authored Aug 31, 2023
1 parent 182d842 commit 28db934
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 15 deletions.
64 changes: 51 additions & 13 deletions redash/query_runner/google_spreadsheets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
from base64 import b64decode

from dateutil import parser
Expand All @@ -22,6 +23,7 @@
try:
import gspread
from gspread.exceptions import APIError
from gspread.exceptions import WorksheetNotFound as GSWorksheetNotFound
from oauth2client.service_account import ServiceAccountCredentials

enabled = True
Expand Down Expand Up @@ -88,14 +90,27 @@ def __init__(self, worksheet_num, worksheet_count):
super(WorksheetNotFoundError, self).__init__(message)


class WorksheetNotFoundByTitleError(Exception):
def __init__(self, worksheet_title):
message = "Worksheet title '{}' not found.".format(worksheet_title)
super(WorksheetNotFoundByTitleError, self).__init__(message)


def parse_query(query):
values = query.split("|")
key = values[0] # key of the spreadsheet
worksheet_num = (
0 if len(values) != 2 else int(values[1])
) # if spreadsheet contains more than one worksheet - this is the number of it
worksheet_num_or_title = 0 # A default value for when a number of inputs is invalid
if len(values) == 2:
s = values[1].strip()
if len(s) > 0:
if re.match(r"^\"(.*?)\"$", s):
# A string quoted by " means a title of worksheet
worksheet_num_or_title = s[1:-1]
else:
# if spreadsheet contains more than one worksheet - this is the number of it
worksheet_num_or_title = int(s)

return key, worksheet_num
return key, worksheet_num_or_title


def parse_worksheet(worksheet):
Expand All @@ -115,15 +130,21 @@ def parse_worksheet(worksheet):
return data


def parse_spreadsheet(spreadsheet, worksheet_num):
worksheets = spreadsheet.worksheets()
worksheet_count = len(worksheets)
if worksheet_num >= worksheet_count:
raise WorksheetNotFoundError(worksheet_num, worksheet_count)
def parse_spreadsheet(spreadsheet, worksheet_num_or_title):
worksheet = None
if type(worksheet_num_or_title) is int:
worksheet = spreadsheet.get_worksheet_by_index(worksheet_num_or_title)
if worksheet is None:
worksheet_count = len(spreadsheet.worksheets())
raise WorksheetNotFoundError(worksheet_num_or_title, worksheet_count)
elif type(worksheet_num_or_title) is str:
worksheet = spreadsheet.get_worksheet_by_title(worksheet_num_or_title)
if worksheet is None:
raise WorksheetNotFoundByTitleError(worksheet_num_or_title)

worksheet = worksheets[worksheet_num].get_all_values()
worksheet_values = worksheet.get_all_values()

return parse_worksheet(worksheet)
return parse_worksheet(worksheet_values)


def is_url_key(key):
Expand All @@ -141,6 +162,23 @@ def parse_api_error(error):
return message


class SpreadsheetWrapper:
def __init__(self, spreadsheet):
self.spreadsheet = spreadsheet

def worksheets(self):
return self.spreadsheet.worksheets()

def get_worksheet_by_index(self, index):
return self.spreadsheet.get_worksheet(index)

def get_worksheet_by_title(self, title):
try:
return self.spreadsheet.worksheet(title)
except GSWorksheetNotFound:
return None


class TimeoutSession(Session):
def request(self, *args, **kwargs):
kwargs.setdefault("timeout", 300)
Expand Down Expand Up @@ -198,7 +236,7 @@ def test_connection(self):

def run_query(self, query, user):
logger.debug("Spreadsheet is about to execute query: %s", query)
key, worksheet_num = parse_query(query)
key, worksheet_num_or_title = parse_query(query)

try:
spreadsheet_service = self._get_spreadsheet_service()
Expand All @@ -208,7 +246,7 @@ def run_query(self, query, user):
else:
spreadsheet = spreadsheet_service.open_by_key(key)

data = parse_spreadsheet(spreadsheet, worksheet_num)
data = parse_spreadsheet(SpreadsheetWrapper(spreadsheet), worksheet_num_or_title)

return json_dumps(data), None
except gspread.SpreadsheetNotFound:
Expand Down
43 changes: 41 additions & 2 deletions tests/query_runner/test_google_spreadsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from redash.query_runner.google_spreadsheets import (
TYPE_BOOLEAN,
TYPE_STRING,
WorksheetNotFoundByTitleError,
WorksheetNotFoundError,
_get_columns_and_column_names,
_value_eval_list,
Expand Down Expand Up @@ -51,10 +52,14 @@ def test_returns_meaningful_error_for_missing_worksheet(self):
spreadsheet = MagicMock()

spreadsheet.worksheets = MagicMock(return_value=[])
spreadsheet.get_worksheet_by_index = MagicMock(return_value=None)
self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 0)

spreadsheet.worksheets = MagicMock(return_value=[1, 2])
self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 2)
def test_returns_meaningful_error_for_missing_worksheet_by_title(self):
spreadsheet = MagicMock()

spreadsheet.get_worksheet_by_title = MagicMock(return_value=None)
self.assertRaises(WorksheetNotFoundByTitleError, parse_spreadsheet, spreadsheet, "a")


empty_worksheet = []
Expand Down Expand Up @@ -104,6 +109,40 @@ def test_parse_query(self):
parsed = parse_query("key|0")
self.assertEqual(("key", 0), parsed)

def test_parse_query_ignored(self):
parsed = parse_query("key")
self.assertEqual(("key", 0), parsed)

parsed = parse_query("key|")
self.assertEqual(("key", 0), parsed)

parsed = parse_query("key|1|")
self.assertEqual(("key", 0), parsed)

def test_parse_query_title(self):
parsed = parse_query('key|""')
self.assertEqual(("key", ""), parsed)

parsed = parse_query('key|"1"')
self.assertEqual(("key", "1"), parsed)

parsed = parse_query('key|"abc"')
self.assertEqual(("key", "abc"), parsed)

parsed = parse_query('key|"あ"')
self.assertEqual(("key", "あ"), parsed)

parsed = parse_query('key|"1""')
self.assertEqual(("key", '1"'), parsed)

parsed = parse_query('key|""')
self.assertEqual(("key", ""), parsed)

def test_parse_query_failed(self):
self.assertRaises(ValueError, parse_query, "key|0x01")
self.assertRaises(ValueError, parse_query, "key|a")
self.assertRaises(ValueError, parse_query, 'key|""a')


class TestGetColumnsAndColumnNames(TestCase):
def test_get_columns(self):
Expand Down

0 comments on commit 28db934

Please sign in to comment.