From 4d5e2ccf07f722889e9ceedeb65cc8a534826eee Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Thu, 23 Jun 2016 09:11:43 -0400 Subject: [PATCH 1/9] Added shell for accessing odbc to supply data to Jupyter. Fixed a few issues by pulling in newer versions of rpy2 and ipywidgets. 1) Added freetds to allow access to database instances using odbc. 2) Create a shell .odbc.ini file that can be filled out to access a database instance. 3) Updated the version of rpy2 since the %R and %%R magic wouldn't work from the Python kernels so that R can be used in cells within a Python sheet. 4) Updated ipywidgets so that the widgets can be used within a Python3 sheet. Without this update it wouldn't show the controls in a Python3 sheet after running the sheet 5) Pinned the conda to jpeg 8 since the ggplot2 graphs wouldn't showup in R sheets. See https://github.com/jupyter/docker-stacks/issues/210 --- datascience-notebook/Dockerfile | 64 ++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index f2b81237d6..913d56afa2 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -11,6 +11,10 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ fonts-dejavu \ gfortran \ + unixodbc-dev \ + libtool-bin \ + autoconf \ + automake \ gcc && apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -24,6 +28,8 @@ RUN apt-get update && \ USER jovyan # R packages including IRKernel which gets installed globally. +# Note that there is a bug with rpy2 where the %R and %%R don't work. +# We need an newer version not yet available on conda to fix this so uninstall and install new version at end. RUN conda config --add channels r && \ conda install --quiet --yes \ 'rpy2=2.7*' \ @@ -43,8 +49,13 @@ RUN conda config --add channels r && \ 'r-nycflights13=0.1*' \ 'r-caret=6.0*' \ 'r-rcurl=1.95*' \ + 'r-dbi=*' \ + 'r-scales=*' \ 'r-randomforest=4.6*' && conda clean -tipsy +# Install additional non-conda R packages +RUN R -e 'install.packages(c("ggthemes", "RODBC", "sendmailR", "tis"), repos="http://cran.utstat.utoronto.ca/")' + # Install IJulia packages as jovyan and then move the kernelspec out # to the system share location. Avoids problems with runtime UID change not # taking effect properly on the .local folder in the jovyan home dir. @@ -56,4 +67,55 @@ RUN julia -e 'Pkg.add("IJulia")' && \ # Show Julia where conda libraries are # Add essential packages RUN echo 'push!(Sys.DL_LOAD_PATH, "/opt/conda/lib")' > /home/$NB_USER/.juliarc.jl && \ - julia -e 'Pkg.add("Gadfly")' && julia -e 'Pkg.add("RDatasets")' && julia -F -e 'Pkg.add("HDF5")' +julia -e 'Pkg.add("Gadfly")' && julia -e 'Pkg.add("RDatasets")' && julia -F -e 'Pkg.add("HDF5")' + +# Fix the graphing issue with ggplot2 +# from: https://github.com/jupyter/docker-stacks/issues/210 +RUN echo "jpeg 8*" >> /opt/conda/conda-meta/pinned +RUN conda update --all -y + +# Add odbc drivers to the install +RUN cd /home/jovyan && \ + wget ftp://ftp.freetds.org/pub/freetds/stable/freetds-patched.tar.gz && \ + tar xvzf freetds-patched.tar.gz && \ + cd /home/jovyan/freetds-1.00.6 && \ + ./configure --prefix=/home/jovyan/odbcdriver && \ + make && \ + make install + +# Add ODBC configuration file +RUN echo "[ODBC Data Sources]" >> ~/.odbc.ini && \ + echo "SQL_DB = Sample Database Configuration" >> ~/.odbc.ini && \ + echo "" >> ~/.odbc.ini && \ + echo "[Default]" >> ~/.odbc.ini && \ + echo "" >> ~/.odbc.ini && \ + echo "[SQL_DB]" >> ~/.odbc.ini && \ + echo "Description = Sample Database Configuration" >> ~/.odbc.ini && \ + echo "Driver = /home/jovyan/odbcdriver/lib/libtdsodbc.so" >> ~/.odbc.ini && \ + echo "Trace = No" >> ~/.odbc.ini && \ + echo "TraceFile = /home/jovyan/prodms.log" >> ~/.odbc.ini && \ + echo "Server = MachineIPAddress >> ~/.odbc.ini && \ + echo "Host = MachineHostName >> ~/.odbc.ini && \ + echo "Port = MachineDBPort" >> ~/.odbc.ini && \ + echo "Database = DatabaseInstanceName" >> ~/.odbc.ini && \ + echo "UID = DatabaseUserName" >> ~/.odbc.ini && \ + echo "PWD = DatabasePassword >> ~/.odbc.ini && \ + echo "Protocol =" >> ~/.odbc.ini && \ + echo "ReadOnly = No" >> ~/.odbc.ini && \ + echo "RowVersioning = No" >> ~/.odbc.ini && \ + echo "ShowSystemTables = No" >> ~/.odbc.ini && \ + echo "ShowOidColumn = No" >> ~/.odbc.ini && \ + echo "FakeOidIndex = No" >> ~/.odbc.ini && \ + echo "ConnSettings =" >> ~/.odbc.ini && \ + echo "TDS_Version = 7.0" >> ~/.odbc.ini + +# Fix the version of rpy2 since the 2.7 is broken. See note above +RUN conda uninstall rpy2 && \ + conda install -c bioconda rpy2=2.7.8 + +# Fix the ipywidgets since the base image has an older version of ipywidgets. +# This older version works with the Python2 instance but not the Python3 instance. +# Installing a newer version allows the widgets to work with both Python versions. +RUN conda uninstall ipywidgets && \ + conda install -c conda-forge ipywidgets=5.1.5 + From 9206a00ee7803efa3e43cdab0a5eaccc06b04744 Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Thu, 23 Jun 2016 11:14:18 -0400 Subject: [PATCH 2/9] Moved fixes earlier in the stack. --- base-notebook/Dockerfile | 5 +++++ datascience-notebook/Dockerfile | 23 +++++------------------ scipy-notebook/Dockerfile | 6 +++++- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/base-notebook/Dockerfile b/base-notebook/Dockerfile index f8b5aa1da3..da52a70a15 100644 --- a/base-notebook/Dockerfile +++ b/base-notebook/Dockerfile @@ -64,6 +64,11 @@ RUN cd /tmp && \ $CONDA_DIR/bin/conda config --system --add channels conda-forge && \ conda clean -tipsy +# Fix the graphing issue with ggplot2 +# from: https://github.com/jupyter/docker-stacks/issues/210 +RUN mkdir -p $CONDA_DIR/conda-meta && \ + echo "jpeg 8*" >> $CONDA_DIR/conda-meta/pinned + # Install Jupyter notebook as jovyan RUN conda install --quiet --yes \ 'notebook=4.2*' \ diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index 913d56afa2..117c4dc59f 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -27,12 +27,14 @@ RUN apt-get update && \ USER jovyan -# R packages including IRKernel which gets installed globally. +# Fix the version of rpy2 since the 2.7 is broken. # Note that there is a bug with rpy2 where the %R and %%R don't work. -# We need an newer version not yet available on conda to fix this so uninstall and install new version at end. +# We need an newer version not yet available on the default for conda to fix this. +RUN conda install -c bioconda rpy2=2.7.8 + +# R packages including IRKernel which gets installed globally. RUN conda config --add channels r && \ conda install --quiet --yes \ - 'rpy2=2.7*' \ 'r-base=3.2*' \ 'r-irkernel=0.5*' \ 'r-plyr=1.8*' \ @@ -69,11 +71,6 @@ RUN julia -e 'Pkg.add("IJulia")' && \ RUN echo 'push!(Sys.DL_LOAD_PATH, "/opt/conda/lib")' > /home/$NB_USER/.juliarc.jl && \ julia -e 'Pkg.add("Gadfly")' && julia -e 'Pkg.add("RDatasets")' && julia -F -e 'Pkg.add("HDF5")' -# Fix the graphing issue with ggplot2 -# from: https://github.com/jupyter/docker-stacks/issues/210 -RUN echo "jpeg 8*" >> /opt/conda/conda-meta/pinned -RUN conda update --all -y - # Add odbc drivers to the install RUN cd /home/jovyan && \ wget ftp://ftp.freetds.org/pub/freetds/stable/freetds-patched.tar.gz && \ @@ -109,13 +106,3 @@ RUN echo "[ODBC Data Sources]" >> ~/.odbc.ini && \ echo "ConnSettings =" >> ~/.odbc.ini && \ echo "TDS_Version = 7.0" >> ~/.odbc.ini -# Fix the version of rpy2 since the 2.7 is broken. See note above -RUN conda uninstall rpy2 && \ - conda install -c bioconda rpy2=2.7.8 - -# Fix the ipywidgets since the base image has an older version of ipywidgets. -# This older version works with the Python2 instance but not the Python3 instance. -# Installing a newer version allows the widgets to work with both Python versions. -RUN conda uninstall ipywidgets && \ - conda install -c conda-forge ipywidgets=5.1.5 - diff --git a/scipy-notebook/Dockerfile b/scipy-notebook/Dockerfile index f8be0803ee..7786947dc3 100644 --- a/scipy-notebook/Dockerfile +++ b/scipy-notebook/Dockerfile @@ -14,9 +14,13 @@ RUN apt-get update && \ USER jovyan +# Fix the ipywidgets since the base image has an older version of ipywidgets. +# The older version works with the Python2 instance but not the Python3 instance. +# Installing a newer version allows the widgets to work with both Python versions. +RUN conda install -c conda-forge ipywidgets=5.1.5 + # Install Python 3 packages RUN conda install --quiet --yes \ - 'ipywidgets=5.1*' \ 'pandas=0.17*' \ 'numexpr=2.5*' \ 'matplotlib=1.5*' \ From 9730ff0b152729e90c617541664be6e6f52f4654 Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Thu, 23 Jun 2016 11:53:36 -0400 Subject: [PATCH 3/9] Fixed build by moving r-base install above rpy2 --- datascience-notebook/Dockerfile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index 117c4dc59f..b01b0d18ed 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -27,15 +27,18 @@ RUN apt-get update && \ USER jovyan +# R packages including IRKernel which gets installed globally. +RUN conda config --add channels r && \ + conda install --quiet --yes \ + 'r-base=3.2*' + # Fix the version of rpy2 since the 2.7 is broken. # Note that there is a bug with rpy2 where the %R and %%R don't work. # We need an newer version not yet available on the default for conda to fix this. RUN conda install -c bioconda rpy2=2.7.8 # R packages including IRKernel which gets installed globally. -RUN conda config --add channels r && \ - conda install --quiet --yes \ - 'r-base=3.2*' \ +RUN conda install --quiet --yes \ 'r-irkernel=0.5*' \ 'r-plyr=1.8*' \ 'r-devtools=1.9*' \ From b2fe4247de586630aa62c540cc319ed49f3bc7e1 Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Thu, 23 Jun 2016 16:07:33 -0400 Subject: [PATCH 4/9] Fixed string termination error. --- datascience-notebook/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index b01b0d18ed..3bd841faf8 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -94,12 +94,12 @@ RUN echo "[ODBC Data Sources]" >> ~/.odbc.ini && \ echo "Driver = /home/jovyan/odbcdriver/lib/libtdsodbc.so" >> ~/.odbc.ini && \ echo "Trace = No" >> ~/.odbc.ini && \ echo "TraceFile = /home/jovyan/prodms.log" >> ~/.odbc.ini && \ - echo "Server = MachineIPAddress >> ~/.odbc.ini && \ - echo "Host = MachineHostName >> ~/.odbc.ini && \ + echo "Server = MachineIPAddress" >> ~/.odbc.ini && \ + echo "Host = MachineHostName" >> ~/.odbc.ini && \ echo "Port = MachineDBPort" >> ~/.odbc.ini && \ echo "Database = DatabaseInstanceName" >> ~/.odbc.ini && \ echo "UID = DatabaseUserName" >> ~/.odbc.ini && \ - echo "PWD = DatabasePassword >> ~/.odbc.ini && \ + echo "PWD = DatabasePassword" >> ~/.odbc.ini && \ echo "Protocol =" >> ~/.odbc.ini && \ echo "ReadOnly = No" >> ~/.odbc.ini && \ echo "RowVersioning = No" >> ~/.odbc.ini && \ From 89b16bf36740ed8eb21d576f5727af13833738a9 Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Fri, 24 Jun 2016 14:07:19 -0400 Subject: [PATCH 5/9] Added python access to odbc using pyodbc. --- datascience-notebook/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index 3bd841faf8..6169bf721b 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -18,6 +18,9 @@ RUN apt-get update && \ gcc && apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Python pyodbc +RUN pip install pyodbc + # Julia dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ From 6c95068aa8a4941f0edcee0578da238488d35bc6 Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Wed, 29 Jun 2016 08:05:59 -0400 Subject: [PATCH 6/9] Added checksum for tar file. --- datascience-notebook/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index 6169bf721b..07795c6b7e 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -80,6 +80,7 @@ julia -e 'Pkg.add("Gadfly")' && julia -e 'Pkg.add("RDatasets")' && julia -F -e ' # Add odbc drivers to the install RUN cd /home/jovyan && \ wget ftp://ftp.freetds.org/pub/freetds/stable/freetds-patched.tar.gz && \ + echo "8dbcc50fd1cc3bc9aeab85667bd3516d16c9891381b92cf53eb5547bc8299323 freetds-patched.tar.gz" | sha256sum -c - && \ tar xvzf freetds-patched.tar.gz && \ cd /home/jovyan/freetds-1.00.6 && \ ./configure --prefix=/home/jovyan/odbcdriver && \ From c7b1eb17ddc68d4a89c3648da71c3b024388194a Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Wed, 29 Jun 2016 08:10:11 -0400 Subject: [PATCH 7/9] Added pinning for R packages dbi and scales --- datascience-notebook/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index 07795c6b7e..a5d86ad3b6 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -57,8 +57,8 @@ RUN conda install --quiet --yes \ 'r-nycflights13=0.1*' \ 'r-caret=6.0*' \ 'r-rcurl=1.95*' \ - 'r-dbi=*' \ - 'r-scales=*' \ + 'r-dbi=0.3*' \ + 'r-scales=0.3*' \ 'r-randomforest=4.6*' && conda clean -tipsy # Install additional non-conda R packages From 114d1bc148ae0aa0933b77fa6a4766db48834c28 Mon Sep 17 00:00:00 2001 From: Andrew Stern Date: Wed, 29 Jun 2016 09:05:53 -0400 Subject: [PATCH 8/9] Replaced patched version of freetds with current released version. --- datascience-notebook/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index e53e5316c4..93937ac89d 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -78,8 +78,8 @@ RUN echo "push!(Sys.DL_LOAD_PATH, \"$CONDA_DIR/lib\")" > /home/$NB_USER/.juliarc # Add odbc drivers to the install RUN cd /home/jovyan && \ - wget ftp://ftp.freetds.org/pub/freetds/stable/freetds-patched.tar.gz && \ - echo "8dbcc50fd1cc3bc9aeab85667bd3516d16c9891381b92cf53eb5547bc8299323 freetds-patched.tar.gz" | sha256sum -c - && \ + wget ftp://ftp.freetds.org/pub/freetds/stable/freetds-1.00.9.tar.gz && \ + echo "dcd5e7589f955ced31269de63fb554562806da0133cb7f930b117588313eaf18 freetds-1.00.9.tar.gz" | sha256sum -c - && \ tar xvzf freetds-patched.tar.gz && \ cd /home/jovyan/freetds-1.00.6 && \ ./configure --prefix=/home/jovyan/odbcdriver && \ From 5cddd1b5dcb1427c567665fc94f2c670af0576ca Mon Sep 17 00:00:00 2001 From: SternAndrew Date: Thu, 30 Jun 2016 17:49:00 -0400 Subject: [PATCH 9/9] Fixed untar --- datascience-notebook/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index 93937ac89d..862b8cf569 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -79,9 +79,9 @@ RUN echo "push!(Sys.DL_LOAD_PATH, \"$CONDA_DIR/lib\")" > /home/$NB_USER/.juliarc # Add odbc drivers to the install RUN cd /home/jovyan && \ wget ftp://ftp.freetds.org/pub/freetds/stable/freetds-1.00.9.tar.gz && \ - echo "dcd5e7589f955ced31269de63fb554562806da0133cb7f930b117588313eaf18 freetds-1.00.9.tar.gz" | sha256sum -c - && \ - tar xvzf freetds-patched.tar.gz && \ - cd /home/jovyan/freetds-1.00.6 && \ + echo "dcd5e7589f955ced31269de63fb554562806da0133cb7f930b117588313eaf18 freetds-1.00.9.tar.gz" | sha256sum -c - && \ + tar xvzf freetds-1.00.9.tar.gz && \ + cd /home/jovyan/freetds-1.00.9 && \ ./configure --prefix=/home/jovyan/odbcdriver && \ make && \ make install