From 327be026192410a9f46cf37316746d59250381d3 Mon Sep 17 00:00:00 2001 From: Guillermo Carrasco Date: Fri, 19 Oct 2018 16:47:35 +0200 Subject: [PATCH 1/8] Add --params option to %%bigquery magic --- bigquery/google/cloud/bigquery/magics.py | 19 ++++++++++++++++ bigquery/tests/unit/test_magics.py | 28 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index ed54d9c04b59..488fabccf9cf 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -39,6 +39,9 @@ amount of time for the query to complete will not be cleared after the query is finished. By default, this information will be displayed but will be cleared after the query is finished. + * ``--params `` (optional, line argument): + If present, the argument will be parsed into a dictionary. This dictionary + will be used to format values enclosed within {} in the query. * ```` (required, cell argument): SQL query to run. @@ -100,8 +103,10 @@ from __future__ import print_function +import json import time from concurrent import futures +from json import JSONDecodeError try: import IPython @@ -249,6 +254,12 @@ def _run_query(client, query, job_config=None): 'amount of time for the query to finish. By default, this ' 'information will be displayed as the query runs, but will be ' 'cleared after the query is finished.')) +@magic_arguments.argument( + '--params', + default=None, + help=('Parameters to format the query string. If present, it should be a ' + 'parsable JSON string. The parsed dictionary will be used for string' + 'replacement in the query')) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -265,6 +276,14 @@ def _cell_magic(line, query): """ args = magic_arguments.parse_argstring(_cell_magic, line) + if args.params is not None: + try: + params = json.loads(args.params) + except JSONDecodeError as e: + raise JSONDecodeError('--params is not a correctly formatted JSON string', e.doc, e.pos) + + query = query.format(**params) + project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) job_config = bigquery.job.QueryJobConfig() diff --git a/bigquery/tests/unit/test_magics.py b/bigquery/tests/unit/test_magics.py index 800edf2918bc..731837b5c597 100644 --- a/bigquery/tests/unit/test_magics.py +++ b/bigquery/tests/unit/test_magics.py @@ -259,3 +259,31 @@ def test_bigquery_magic_with_project(): assert client_used.project == 'specific-project' # context project should not change assert magics.context.project == 'general-project' + + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_with_formatting_params(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT {num} AS num' + result = pandas.DataFrame([17], columns=['num']) + assert 'myvariable' not in ip.user_ns + + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + ip.run_cell_magic('bigquery', 'df --params {"num":17}', sql) + + assert 'df' in ip.user_ns # verify that variable exists + df = ip.user_ns['df'] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names From d4c2944cc35bff66f7953466adcb77b5664f5cee Mon Sep 17 00:00:00 2001 From: Guillermo Carrasco Date: Sun, 21 Oct 2018 19:46:23 +0200 Subject: [PATCH 2/8] Clarify --params docs and add assertion to ensure sql string is formatted --- bigquery/google/cloud/bigquery/magics.py | 2 +- bigquery/tests/unit/test_magics.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index 488fabccf9cf..84ee18bd2c37 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -40,7 +40,7 @@ query is finished. By default, this information will be displayed but will be cleared after the query is finished. * ``--params `` (optional, line argument): - If present, the argument will be parsed into a dictionary. This dictionary + If present, the argument must be a parsable JSON string. This dictionary will be used to format values enclosed within {} in the query. * ```` (required, cell argument): SQL query to run. diff --git a/bigquery/tests/unit/test_magics.py b/bigquery/tests/unit/test_magics.py index 731837b5c597..386e17303082 100644 --- a/bigquery/tests/unit/test_magics.py +++ b/bigquery/tests/unit/test_magics.py @@ -282,8 +282,10 @@ def test_bigquery_magic_with_formatting_params(): run_query_mock.return_value = query_job_mock ip.run_cell_magic('bigquery', 'df --params {"num":17}', sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) assert 'df' in ip.user_ns # verify that variable exists df = ip.user_ns['df'] assert len(df) == len(result) # verify row count assert list(df) == list(result) # verify column names + From ca181263e497f647d44589999f5a09f0674386b7 Mon Sep 17 00:00:00 2001 From: Guillermo Carrasco Date: Fri, 26 Oct 2018 18:05:11 +0200 Subject: [PATCH 3/8] Use ast instead of json for parsing --- bigquery/google/cloud/bigquery/magics.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index 84ee18bd2c37..3e1604701f05 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -103,10 +103,9 @@ from __future__ import print_function -import json +import ast import time from concurrent import futures -from json import JSONDecodeError try: import IPython @@ -256,6 +255,7 @@ def _run_query(client, query, job_config=None): 'cleared after the query is finished.')) @magic_arguments.argument( '--params', + nargs='+', default=None, help=('Parameters to format the query string. If present, it should be a ' 'parsable JSON string. The parsed dictionary will be used for string' @@ -278,9 +278,9 @@ def _cell_magic(line, query): if args.params is not None: try: - params = json.loads(args.params) - except JSONDecodeError as e: - raise JSONDecodeError('--params is not a correctly formatted JSON string', e.doc, e.pos) + params = ast.literal_eval(''.join(args.params)) + except Exception: + raise SyntaxError('--params is not a correctly formatted JSON string') query = query.format(**params) From 0baa02188652c5a99265ce928a663e45dff0853d Mon Sep 17 00:00:00 2001 From: Guillermo Carrasco Date: Fri, 26 Oct 2018 18:45:16 +0200 Subject: [PATCH 4/8] Make use of to_query_parameters and BQ parametrized syntax --- bigquery/google/cloud/bigquery/magics.py | 7 ++++--- bigquery/tests/unit/test_magics.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index 3e1604701f05..7756eaf7751a 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -116,6 +116,7 @@ import google.auth from google.cloud import bigquery +from google.cloud.bigquery.dbapi._helpers import to_query_parameters class Context(object): @@ -276,17 +277,17 @@ def _cell_magic(line, query): """ args = magic_arguments.parse_argstring(_cell_magic, line) + params = [] if args.params is not None: try: - params = ast.literal_eval(''.join(args.params)) + params = to_query_parameters(ast.literal_eval(''.join(args.params))) except Exception: raise SyntaxError('--params is not a correctly formatted JSON string') - query = query.format(**params) - project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) job_config = bigquery.job.QueryJobConfig() + job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql query_job = _run_query(client, query, job_config) diff --git a/bigquery/tests/unit/test_magics.py b/bigquery/tests/unit/test_magics.py index 386e17303082..25fc1feffb73 100644 --- a/bigquery/tests/unit/test_magics.py +++ b/bigquery/tests/unit/test_magics.py @@ -269,7 +269,7 @@ def test_bigquery_magic_with_formatting_params(): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True) - sql = 'SELECT {num} AS num' + sql = 'SELECT @num AS num' result = pandas.DataFrame([17], columns=['num']) assert 'myvariable' not in ip.user_ns From 2c17f199771dc586a106589f5b52aaaa14802e96 Mon Sep 17 00:00:00 2001 From: Guillermo Carrasco Date: Mon, 29 Oct 2018 15:39:44 +0100 Subject: [PATCH 5/8] Add test for expanding a dictionary. Add usage examples --- bigquery/google/cloud/bigquery/magics.py | 18 ++++++++-- bigquery/tests/unit/test_magics.py | 43 ++++++++++++++++++++---- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index 7756eaf7751a..35a89e1de28f 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -41,7 +41,7 @@ will be cleared after the query is finished. * ``--params `` (optional, line argument): If present, the argument must be a parsable JSON string. This dictionary - will be used to format values enclosed within {} in the query. + will be used to format values preceded by ``@`` in the query. * ```` (required, cell argument): SQL query to run. @@ -99,6 +99,18 @@ ...: 1 Patricia 1568495 ...: 2 Elizabeth 1519946 + In [5]: %%bigquery df --params {"num": 17} + ...: SELECT @num AS num + Out[5]: + ...: num + ...: 0 17 + In [6]: # Expand a dictionary instead of writing it's string value + In [6]: params = {"num": 17} + In [7]: %%bigquery df --params $params + ...: SELECT @num AS num + Out[7]: + ...: num + ...: 0 17 """ from __future__ import print_function @@ -116,7 +128,7 @@ import google.auth from google.cloud import bigquery -from google.cloud.bigquery.dbapi._helpers import to_query_parameters +from google.cloud.bigquery.dbapi import _helpers class Context(object): @@ -280,7 +292,7 @@ def _cell_magic(line, query): params = [] if args.params is not None: try: - params = to_query_parameters(ast.literal_eval(''.join(args.params))) + params = _helpers.to_query_parameters(ast.literal_eval(''.join(args.params))) except Exception: raise SyntaxError('--params is not a correctly formatted JSON string') diff --git a/bigquery/tests/unit/test_magics.py b/bigquery/tests/unit/test_magics.py index 25fc1feffb73..daaf002270a1 100644 --- a/bigquery/tests/unit/test_magics.py +++ b/bigquery/tests/unit/test_magics.py @@ -186,7 +186,7 @@ def test_bigquery_magic_with_result_saved_to_variable(): sql = 'SELECT 17 AS num' result = pandas.DataFrame([17], columns=['num']) - assert 'myvariable' not in ip.user_ns + assert 'df' not in ip.user_ns run_query_patch = mock.patch( 'google.cloud.bigquery.magics._run_query', autospec=True) @@ -263,7 +263,7 @@ def test_bigquery_magic_with_project(): @pytest.mark.usefixtures('ipython_interactive') @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') -def test_bigquery_magic_with_formatting_params(): +def test_bigquery_magic_with_formatting_params_with_string(): ip = IPython.get_ipython() ip.extension_manager.load_extension('google.cloud.bigquery') magics.context.credentials = mock.create_autospec( @@ -271,7 +271,7 @@ def test_bigquery_magic_with_formatting_params(): sql = 'SELECT @num AS num' result = pandas.DataFrame([17], columns=['num']) - assert 'myvariable' not in ip.user_ns + assert 'params_string_df' not in ip.user_ns run_query_patch = mock.patch( 'google.cloud.bigquery.magics._run_query', autospec=True) @@ -281,11 +281,42 @@ def test_bigquery_magic_with_formatting_params(): with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - ip.run_cell_magic('bigquery', 'df --params {"num":17}', sql) + ip.run_cell_magic('bigquery', 'params_string_df --params {"num":17}', sql) run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) - assert 'df' in ip.user_ns # verify that variable exists - df = ip.user_ns['df'] + assert 'params_string_df' in ip.user_ns # verify that variable exists + df = ip.user_ns['params_string_df'] assert len(df) == len(result) # verify row count assert list(df) == list(result) # verify column names + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_with_formatting_params_with_expanded_dict(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT @num AS num' + result = pandas.DataFrame([17], columns=['num']) + assert 'params_dict_df' not in ip.user_ns + + run_query_patch = mock.patch( + 'google.cloud.bigquery.magics._run_query', autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + params = {"num": 17} + # Insert dictionary into user namespace so that it can be expanded + ip.user_ns['params'] = params + ip.run_cell_magic('bigquery', 'params_dict_df --params $params', sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) + + assert 'params_dict_df' in ip.user_ns # verify that variable exists + df = ip.user_ns['params_dict_df'] + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names From 396381ba19a62322aba37ef4ad852ac5aa870386 Mon Sep 17 00:00:00 2001 From: Guillermo Carrasco Date: Tue, 30 Oct 2018 11:37:40 +0100 Subject: [PATCH 6/8] clarify --params docstring --- bigquery/google/cloud/bigquery/magics.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index 35a89e1de28f..1b4e91ce76de 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -40,8 +40,9 @@ query is finished. By default, this information will be displayed but will be cleared after the query is finished. * ``--params `` (optional, line argument): - If present, the argument must be a parsable JSON string. This dictionary - will be used to format values preceded by ``@`` in the query. + If present, the argument must be a parsable JSON string or a reference to dictionary + which is serializable to JSON (preceding the dictionary with $). + This dictionary will be used to format values preceded by ``@`` in the query. * ```` (required, cell argument): SQL query to run. @@ -104,13 +105,6 @@ Out[5]: ...: num ...: 0 17 - In [6]: # Expand a dictionary instead of writing it's string value - In [6]: params = {"num": 17} - In [7]: %%bigquery df --params $params - ...: SELECT @num AS num - Out[7]: - ...: num - ...: 0 17 """ from __future__ import print_function From e3e969755bdd0b1a2b060afa81fb52f16d1e30e4 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 30 Oct 2018 11:01:42 -0700 Subject: [PATCH 7/8] update documentation and fix lint --- bigquery/google/cloud/bigquery/magics.py | 54 ++++++++++++++++++------ bigquery/tests/unit/test_magics.py | 21 +++++---- 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index 1b4e91ce76de..05e8e52c7ffa 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -21,7 +21,7 @@ .. code-block:: python %%bigquery [] [--project ] [--use_legacy_sql] - [--verbose] + [--verbose] [--params ] Parameters: @@ -39,10 +39,20 @@ amount of time for the query to complete will not be cleared after the query is finished. By default, this information will be displayed but will be cleared after the query is finished. - * ``--params `` (optional, line argument): - If present, the argument must be a parsable JSON string or a reference to dictionary - which is serializable to JSON (preceding the dictionary with $). - This dictionary will be used to format values preceded by ``@`` in the query. + * ``--params `` (optional, line argument): + If present, the argument following the ``--params`` flag must be + either: + + * :class:`str` - A JSON string representation of a dictionary in the + format ``{"param_name": "param_value"}`` (ex. ``{"num": 17}``). Use + of the parameter in the query should be indicated with + ``@param_name``. See ``In[5]`` in the Examples section below. + + * :class:`dict` reference - A reference to a ``dict`` in the format + ``{"param_name": "param_value"}``, where the value types must be JSON + serializable. The variable reference is indicated by a ``$`` before + the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]`` + in the Examples section below. * ```` (required, cell argument): SQL query to run. @@ -58,7 +68,7 @@ the bigquery IPython extension (see ``In[1]``) and setting up Application Default Credentials. - .. code-block:: python + .. code-block:: none In [1]: %load_ext google.cloud.bigquery @@ -102,9 +112,19 @@ In [5]: %%bigquery df --params {"num": 17} ...: SELECT @num AS num - Out[5]: - ...: num - ...: 0 17 + + Out[5]: num + ...: ------- + ...: 0 17 + + In [6]: params = {"num": 17} + + In [7]: %%bigquery df --params $params + ...: SELECT @num AS num + + Out[7]: num + ...: ------- + ...: 0 17 """ from __future__ import print_function @@ -264,9 +284,12 @@ def _run_query(client, query, job_config=None): '--params', nargs='+', default=None, - help=('Parameters to format the query string. If present, it should be a ' - 'parsable JSON string. The parsed dictionary will be used for string' - 'replacement in the query')) + help=('Parameters to format the query string. If present, the --params ' + 'flag should be followed by a string representation of a dictionary ' + 'in the format {\'param_name\': \'param_value\'} (ex. {"num": 17}), ' + 'or a reference to a dictionary in the same format. The dictionary ' + 'reference can be made by including a \'$\' before the variable ' + 'name (ex. $my_dict_var).')) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -286,9 +309,12 @@ def _cell_magic(line, query): params = [] if args.params is not None: try: - params = _helpers.to_query_parameters(ast.literal_eval(''.join(args.params))) + params = _helpers.to_query_parameters( + ast.literal_eval(''.join(args.params))) except Exception: - raise SyntaxError('--params is not a correctly formatted JSON string') + raise SyntaxError( + '--params is not a correctly formatted JSON string or a JSON ' + 'serializable dictionary') project = args.project or context.project client = bigquery.Client(project=project, credentials=context.credentials) diff --git a/bigquery/tests/unit/test_magics.py b/bigquery/tests/unit/test_magics.py index daaf002270a1..e8c0a629b4d2 100644 --- a/bigquery/tests/unit/test_magics.py +++ b/bigquery/tests/unit/test_magics.py @@ -281,13 +281,15 @@ def test_bigquery_magic_with_formatting_params_with_string(): with run_query_patch as run_query_mock: run_query_mock.return_value = query_job_mock - ip.run_cell_magic('bigquery', 'params_string_df --params {"num":17}', sql) - run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) + ip.run_cell_magic( + 'bigquery', 'params_string_df --params {"num":17}', sql) + run_query_mock.assert_called_once_with( + mock.ANY, sql.format(num=17), mock.ANY) - assert 'params_string_df' in ip.user_ns # verify that variable exists + assert 'params_string_df' in ip.user_ns # verify that the variable exists df = ip.user_ns['params_string_df'] - assert len(df) == len(result) # verify row count - assert list(df) == list(result) # verify column names + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names @pytest.mark.usefixtures('ipython_interactive') @@ -314,9 +316,10 @@ def test_bigquery_magic_with_formatting_params_with_expanded_dict(): # Insert dictionary into user namespace so that it can be expanded ip.user_ns['params'] = params ip.run_cell_magic('bigquery', 'params_dict_df --params $params', sql) - run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) + run_query_mock.assert_called_once_with( + mock.ANY, sql.format(num=17), mock.ANY) - assert 'params_dict_df' in ip.user_ns # verify that variable exists + assert 'params_dict_df' in ip.user_ns # verify that the variable exists df = ip.user_ns['params_dict_df'] - assert len(df) == len(result) # verify row count - assert list(df) == list(result) # verify column names + assert len(df) == len(result) # verify row count + assert list(df) == list(result) # verify column names From 914187389a24b9854335a1b492ba97e3cd5804fe Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Tue, 30 Oct 2018 11:25:17 -0700 Subject: [PATCH 8/8] fix coverage --- bigquery/tests/unit/test_magics.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/unit/test_magics.py b/bigquery/tests/unit/test_magics.py index e8c0a629b4d2..b0e08661ca00 100644 --- a/bigquery/tests/unit/test_magics.py +++ b/bigquery/tests/unit/test_magics.py @@ -263,7 +263,7 @@ def test_bigquery_magic_with_project(): @pytest.mark.usefixtures('ipython_interactive') @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') -def test_bigquery_magic_with_formatting_params_with_string(): +def test_bigquery_magic_with_string_params(): ip = IPython.get_ipython() ip.extension_manager.load_extension('google.cloud.bigquery') magics.context.credentials = mock.create_autospec( @@ -294,7 +294,7 @@ def test_bigquery_magic_with_formatting_params_with_string(): @pytest.mark.usefixtures('ipython_interactive') @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') -def test_bigquery_magic_with_formatting_params_with_expanded_dict(): +def test_bigquery_magic_with_dict_params(): ip = IPython.get_ipython() ip.extension_manager.load_extension('google.cloud.bigquery') magics.context.credentials = mock.create_autospec( @@ -323,3 +323,18 @@ def test_bigquery_magic_with_formatting_params_with_expanded_dict(): df = ip.user_ns['params_dict_df'] assert len(df) == len(result) # verify row count assert list(df) == list(result) # verify column names + + +@pytest.mark.usefixtures('ipython_interactive') +@pytest.mark.skipif(pandas is None, reason='Requires `pandas`') +def test_bigquery_magic_with_improperly_formatted_params(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension('google.cloud.bigquery') + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True) + + sql = 'SELECT @num AS num' + + with pytest.raises(SyntaxError): + ip.run_cell_magic( + 'bigquery', '--params {17}', sql)