From 320ced14b915d41db5f1f1b894314da62573f82e Mon Sep 17 00:00:00 2001 From: qimingj Date: Thu, 8 Dec 2016 17:38:33 -0800 Subject: [PATCH 01/19] Add gcs_copy_file() that is missing but is referenced in a couple of places. (#110) * Add gcs_copy_file() that is missing but is referenced in a couple of places. * Add DataFlow to pydatalab dependency list. * Fix travis test errors by reimplementing gcs copy. * Remove unnecessary shutil import. --- datalab/utils/__init__.py | 2 +- datalab/utils/_utils.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/datalab/utils/__init__.py b/datalab/utils/__init__.py index bcf33ea40..fa5434992 100644 --- a/datalab/utils/__init__.py +++ b/datalab/utils/__init__.py @@ -21,4 +21,4 @@ from ._lru_cache import LRUCache from ._lambda_job import LambdaJob from ._utils import print_exception_with_last_stack, get_item, compare_datetimes, \ - pick_unused_port, is_http_running_on + pick_unused_port, is_http_running_on, gcs_copy_file diff --git a/datalab/utils/_utils.py b/datalab/utils/_utils.py index 3e453e647..53ba94409 100644 --- a/datalab/utils/_utils.py +++ b/datalab/utils/_utils.py @@ -22,6 +22,7 @@ import httplib import pytz +import subprocess import socket import traceback import types @@ -110,3 +111,13 @@ def is_http_running_on(port): return True except Exception as e: return False + + +def gcs_copy_file(source, dest): + """ Copy file from source to destination. The paths can be GCS or local. + + Args: + source: the source file path. + dest: the destination file path. + """ + subprocess.check_call(['gsutil', '-q', 'cp', source, dest]) From 7320b39553c75fa0b5624eb861a92e800436a326 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Sun, 11 Dec 2016 12:42:07 -0500 Subject: [PATCH 02/19] Flake8 configuration. Set max line length to 100. Ignore E111, E114 (#102) --- setup.cfg | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.cfg b/setup.cfg index 8c2826799..970f3f031 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,11 @@ [metadata] description-file = README.md +[flake8] +max-line-length = 100 +ignore = + # Indentation is not a multiple of four + E111, + # Indentation is not a multiple of four (comment) + E114 + From f77abd65214af32635f63abafb193136c415bc46 Mon Sep 17 00:00:00 2001 From: qimingj Date: Mon, 12 Dec 2016 18:19:52 -0800 Subject: [PATCH 03/19] Add datalab user agent to CloudML trainer and predictor requests. (#112) --- datalab/mlalpha/_cloud_predictor.py | 1 + datalab/mlalpha/_cloud_runner.py | 1 + 2 files changed, 2 insertions(+) diff --git a/datalab/mlalpha/_cloud_predictor.py b/datalab/mlalpha/_cloud_predictor.py index 6ecc357e6..8209d77a1 100644 --- a/datalab/mlalpha/_cloud_predictor.py +++ b/datalab/mlalpha/_cloud_predictor.py @@ -81,6 +81,7 @@ def predict(self, data): request = self._api.projects().predict(body={'instances': data}, name=self._full_version_name) + request.headers['user-agent'] = 'GoogleCloudDataLab/1.0' result = request.execute() if 'predictions' not in result: raise Exception('Invalid response from service. Cannot find "predictions" in response.') diff --git a/datalab/mlalpha/_cloud_runner.py b/datalab/mlalpha/_cloud_runner.py index db3630e0a..5da4958d8 100644 --- a/datalab/mlalpha/_cloud_runner.py +++ b/datalab/mlalpha/_cloud_runner.py @@ -86,4 +86,5 @@ def run(self, job_id=None): discoveryServiceUrl=_CLOUDML_DISCOVERY_URL) request = cloudml.projects().jobs().create(body=job, parent='projects/' + context.project_id) + request.headers['user-agent'] = 'GoogleCloudDataLab/1.0' return request.execute() From 9124c5e37ad9954807de403017d36157fde3fef0 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 19 Dec 2016 18:58:41 -0500 Subject: [PATCH 04/19] Update oauth2client to 2.2.0 to satisfy cloudml in Cloud Datalab (#111) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 203783dba..27d32b21f 100644 --- a/setup.py +++ b/setup.py @@ -72,7 +72,7 @@ 'futures==3.0.5', 'google-cloud==0.19.0', 'httplib2==0.9.2', - 'oauth2client==2.0.2', + 'oauth2client==2.2.0', 'pandas>=0.17.1', 'pandas-profiling>=1.0.0a2', 'python-dateutil==2.5.0', From 7a3399bd49c7944ecc2ba6542895a9a31ea27f16 Mon Sep 17 00:00:00 2001 From: Yasser Elsayed Date: Sat, 31 Dec 2016 01:55:53 -0800 Subject: [PATCH 05/19] Update README.md (#114) Added docs link. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index ffa63d075..c1c860ad4 100644 --- a/README.md +++ b/README.md @@ -60,3 +60,5 @@ You will also need to set the project ID to use; either set a `PROJECT_ID` environment variable to the project name, or call `set_datalab_project_id(name)` from within your notebook. +## Documentation +You can read the Sphinx generated docs at: [http://googledatalab.github.io/pydatalab/](http://googledatalab.github.io/pydatalab/) From 4c18e19187f483bc339d7eaa5aa6bdf08262f8cc Mon Sep 17 00:00:00 2001 From: Yasser Elsayed Date: Sat, 31 Dec 2016 01:59:13 -0800 Subject: [PATCH 06/19] Generate reST documentation for magic commands (#113) Auto generate docs for any added magics by searching through the source files for lines with register_line_cell_magic, capturing the names for those magics, and calling them inside an ipython kernel with the -h argument, then storing that output into a generated datalab.magics.rst file. --- .gitignore | 1 + docs/Makefile | 54 +++++++++++++++++++++++------------------- docs/README | 9 ++++--- docs/conf.py | 2 +- docs/gen-magic-rst.ipy | 39 ++++++++++++++++++++++++++++++ docs/index.rst | 1 + setup.py | 2 +- 7 files changed, 76 insertions(+), 32 deletions(-) create mode 100644 docs/gen-magic-rst.ipy diff --git a/.gitignore b/.gitignore index fa647c89e..aa484f7a7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ MANIFEST build .coverage dist +datalab.magics.rst diff --git a/docs/Makefile b/docs/Makefile index d10cd1f8d..49388854a 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -55,38 +55,42 @@ help: clean: rm -rf $(BUILDDIR)/* -html: +pre-build: + @echo "Generate reST for magic commands:" + ipython gen-magic-rst.ipy + +html: pre-build $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." -dirhtml: +dirhtml: pre-build $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." -singlehtml: +singlehtml: pre-build $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." -pickle: +pickle: pre-build $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." -json: +json: pre-build $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." -htmlhelp: +htmlhelp: pre-build $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." -qthelp: +qthelp: pre-build $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ @@ -95,7 +99,7 @@ qthelp: @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/api.qhc" -applehelp: +applehelp: pre-build $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @@ -103,7 +107,7 @@ applehelp: "~/Library/Documentation/Help or install it in your application" \ "bundle." -devhelp: +devhelp: pre-build $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @@ -112,85 +116,85 @@ devhelp: @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/api" @echo "# devhelp" -epub: +epub: pre-build $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." -latex: +latex: pre-build $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." -latexpdf: +latexpdf: pre-build $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." -latexpdfja: +latexpdfja: pre-build $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." -text: +text: pre-build $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." -man: +man: pre-build $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." -texinfo: +texinfo: pre-build $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." -info: +info: pre-build $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." -gettext: +gettext: pre-build $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." -changes: +changes: pre-build $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." -linkcheck: +linkcheck: pre-build $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." -doctest: +doctest: pre-build $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." -coverage: +coverage: pre-build $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." -xml: +xml: pre-build $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." -pseudoxml: +pseudoxml: pre-build $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." @@ -202,7 +206,7 @@ prepublish: cd ../../datalab-docs && git clone https://github.com/GoogleCloudPlatform/datalab.git html && \ git checkout gh-pages -publish: +publish: pre-build $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html cd ../../datalab-docs/html && git add . && git commit -m "Updated" && git push --force origin gh-pages diff --git a/docs/README b/docs/README index dcb5afe1f..a4d51aca2 100644 --- a/docs/README +++ b/docs/README @@ -1,9 +1,8 @@ -To use, install the prerequisites: +To use, install the prerequisites and the pydatalab module: pip install sphinx sphinx_rtd_theme sphinxcontrib-napoleon + pip install .. # from docs directory +then in the docs directory, do 'make html' (or epub, or text, etc). -then in the docs directory, do 'make html' (or epub, or pdf, etc). - -Output will be in the docs/_build directory. - +Output will be in $BUILDDIR, defaulting to ../../datalab-docs. diff --git a/docs/conf.py b/docs/conf.py index 470e1874d..9f1c1355e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -145,7 +145,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +#html_static_path = [] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/docs/gen-magic-rst.ipy b/docs/gen-magic-rst.ipy new file mode 100644 index 000000000..a44d6cec0 --- /dev/null +++ b/docs/gen-magic-rst.ipy @@ -0,0 +1,39 @@ +import subprocess, pkgutil, importlib, sys +from cStringIO import StringIO + +# ignore mlalpha and tensorboard for now because of their tensorflow dependency +# until tensorboard is pip installable and can be listed as a pydatalab dependency +IGNORED_MAGICS = ['mlalpha', 'tensorboard'] + +# import submodules +submodules = [s for _,s,_ in pkgutil.iter_modules(['../datalab'])] + +for m in submodules: + name = 'datalab.' + m + '.commands' + try: + importlib.import_module(name) + except: + sys.stderr.write('WARNING, could not find module ' + name + '. Ignoring..\n') + +magic_regex = "find ../datalab -name '*.py' -exec perl -e '$f=join(\"\",<>); print \"$1\n\" if $f=~/register_line_cell_magic\ndef ([^\(]+)/m' {} \;" +magics = subprocess.check_output(magic_regex, shell=True) + +reSTfile = open('datalab.magics.rst', 'w') +indent = '\n ' + +reSTfile.write('datalab.magics\n') +reSTfile.write('=================\n\n') + +for m in magics.split(): + if m in IGNORED_MAGICS: + sys.stderr.write('Ignoring magic ' + m + '\n') + else: + reSTfile.write('.. attribute:: ' + m + '\n') + reSTfile.write('.. parsed-literal::\n') + # hijack stdout since the ipython kernel call writes to stdout/err directly + # and does not return its output + tmpStdout, sys.stdout = sys.stdout, StringIO() + get_ipython().magic(m + ' -h') + resultout = sys.stdout.getvalue().splitlines() + sys.stdout = tmpStdout + reSTfile.writelines(indent + indent.join(resultout) + '\n\n') diff --git a/docs/index.rst b/docs/index.rst index afc3d6f96..1f0167284 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,6 +16,7 @@ Contents: datalab.data datalab.stackdriver.monitoring datalab.storage + datalab.magics Indices and tables diff --git a/setup.py b/setup.py index 27d32b21f..d37838bb3 100644 --- a/setup.py +++ b/setup.py @@ -79,8 +79,8 @@ 'pytz>=2015.4', 'pyyaml==3.11', 'requests==2.9.1', - 'scikit-learn==0.17.1', 'scipy==0.18.0', + 'scikit-learn==0.17.1', 'ipykernel==4.4.1', ], package_data={ From 94c731320fb048dd36bf0fcbc1c9e3b097a7f15f Mon Sep 17 00:00:00 2001 From: qimingj Date: Sat, 31 Dec 2016 21:47:14 -0800 Subject: [PATCH 07/19] Fix an issue that %%chart failed with UDF query. (#116) * Fix an issue that %%chart failed with UDF query. The problem is that the query is submitted to BQ without replacing variable values from user namespace. * Fix chart tests by adding ip.user_ns mock. * Fix charting test. * Add missing import "mock". * Fix chart tests. --- datalab/utils/commands/_utils.py | 3 ++- tests/kernel/chart_data_tests.py | 1 + tests/kernel/chart_tests.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/datalab/utils/commands/_utils.py b/datalab/utils/commands/_utils.py index 4035ae6b8..95b11f26f 100644 --- a/datalab/utils/commands/_utils.py +++ b/datalab/utils/commands/_utils.py @@ -207,10 +207,11 @@ def get_data(source, fields='*', env=None, first_row=0, count=-1, schema=None): Exception if the request could not be fulfilled. """ + ipy = IPython.get_ipython() if env is None: env = {} + env.update(ipy.user_ns) if isinstance(source, basestring): - ipy = IPython.get_ipython() source = datalab.utils.get_item(ipy.user_ns, source, source) if isinstance(source, basestring): source = datalab.bigquery.Table(source) diff --git a/tests/kernel/chart_data_tests.py b/tests/kernel/chart_data_tests.py index be02564d8..52f34562d 100644 --- a/tests/kernel/chart_data_tests.py +++ b/tests/kernel/chart_data_tests.py @@ -37,6 +37,7 @@ class TestCases(unittest.TestCase): @mock.patch('datalab.utils.get_item') def test_get_chart_data(self, mock_get_item): + IPython.get_ipython().user_ns = {} t = [ {'country': 'US', 'quantity': 100}, {'country': 'ZA', 'quantity': 50}, diff --git a/tests/kernel/chart_tests.py b/tests/kernel/chart_tests.py index 260d1a615..0f6a16dc3 100644 --- a/tests/kernel/chart_tests.py +++ b/tests/kernel/chart_tests.py @@ -28,7 +28,6 @@ def noop_decorator(func): IPython.core.display.HTML = lambda x: x IPython.core.display.JSON = lambda x: x - import datalab.utils.commands @@ -36,6 +35,7 @@ class TestCases(unittest.TestCase): def test_chart_cell(self): t = [{'country': 'US', 'quantity': 100}, {'country': 'ZA', 'quantity': 50}] + IPython.get_ipython().user_ns = {} chart = datalab.utils.commands._chart._chart_cell({'chart': 'geo', 'data': t, 'fields': None}, '') self.assertTrue(chart.find('charts.render(') > 0) self.assertTrue(chart.find('\'geo\'') > 0) From 23e9e21ba9c3b083e0ef20c16865f0531dd8557f Mon Sep 17 00:00:00 2001 From: qimingj Date: Wed, 4 Jan 2017 11:15:04 -0800 Subject: [PATCH 08/19] Fix "%%bigquery schema" issue -- the command generates nothing in output. (#119) --- datalab/bigquery/commands/_bigquery.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/datalab/bigquery/commands/_bigquery.py b/datalab/bigquery/commands/_bigquery.py index 9731965ab..032f1e56c 100644 --- a/datalab/bigquery/commands/_bigquery.py +++ b/datalab/bigquery/commands/_bigquery.py @@ -1009,6 +1009,14 @@ def _repr_html_table_schema(schema): _HTML_TEMPLATE = """