Skip to content

Commit

Permalink
ARROW-6214: [R] Add R sanitizer docker image
Browse files Browse the repository at this point in the history
The PR creates a docker image to replicate R sanitizer errors. One can iterate with arrow local sources without installing CRAN arrow package.

A real undefined behaviour was fixed in `array_to_vector.cpp`. The other errors were false positives (to best of my knowledge).

Closes #5408 from fsaintjacques/ARROW-6214-r-sanitizer and squashes the following commits:

e85c7e3 <François Saint-Jacques> Add docker-r-sanitizer to nightly tasks
7209ca9 <François Saint-Jacques> Fix UBSAN pointer casting error
6b425b4 <François Saint-Jacques> ARROW-6214:  Add sanitizer docker image

Authored-by: François Saint-Jacques <fsaintjacques@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
fsaintjacques authored and nealrichardson committed Sep 19, 2019
1 parent 19d1d0a commit c2e8323
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 18 deletions.
1 change: 1 addition & 0 deletions .hadolint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ ignored:
- DL3018
- DL3015 # Avoid additional packages by specifying `--no-install-recommends`
- DL3028 # Ruby gem version pinning
- DL3007 # r-sanitizer must use latest
2 changes: 1 addition & 1 deletion Makefile.docker
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# To run the test suite
# $ make -f Makefile.docker cpp

LANGUAGES = cpp cpp-alpine cpp-cmake32 c_glib go java js python python-alpine rust r
LANGUAGES = cpp cpp-alpine cpp-cmake32 c_glib go java js python python-alpine rust r r-sanitizer
MISC = lint iwyu clang-format docs pandas-master
TESTS = dask hdfs-integration spark-integration python-nopandas

Expand Down
12 changes: 9 additions & 3 deletions ci/docker_build_r.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ set -e

export ARROW_HOME=$CONDA_PREFIX

: ${R_BIN:=R}
source /arrow/ci/docker_install_r_deps.sh

# Build arrow
pushd /arrow/r

R CMD build --keep-empty-dirs .
R CMD INSTALL $(ls | grep arrow_*.tar.gz)
install_deps

make clean
${R_BIN} CMD build --keep-empty-dirs .
${R_BIN} CMD INSTALL $(ls | grep arrow_*.tar.gz)

export _R_CHECK_FORCE_SUGGESTS_=false
R CMD check $(ls | grep arrow_*.tar.gz) --as-cran --no-manual
${R_BIN} CMD check $(ls | grep arrow_*.tar.gz) --as-cran --no-manual

popd
39 changes: 39 additions & 0 deletions ci/docker_build_r_sanitizer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e

: ${R_BIN:=RDsan}
source /arrow/ci/docker_install_r_deps.sh

# Build arrow
pushd /arrow/r

install_deps

make clean
${R_BIN} CMD INSTALL --no-byte-compile .

pushd tests

export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
${R_BIN} < testthat.R

popd

popd
14 changes: 5 additions & 9 deletions ci/docker_install_r_deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,8 @@
# specific language governing permissions and limitations
# under the License.

set -e

pushd /arrow/r

Rscript -e "install.packages(c('remotes', 'dplyr', 'glue'))"
Rscript -e "remotes::install_deps(dependencies = TRUE)"
Rscript -e "remotes::install_github('romainfrancois/decor')"

popd
install_deps() {
${R_BIN} -e "install.packages(c('remotes', 'dplyr', 'glue'))"
${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
${R_BIN} -e "remotes::install_github('romainfrancois/decor')"
}
11 changes: 11 additions & 0 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ groups:
docker:
- docker-r
- docker-r-conda
- docker-r-sanitizer
- docker-rust
- docker-cpp
- docker-cpp-alpine
Expand Down Expand Up @@ -164,6 +165,7 @@ groups:
- gandiva-jar-osx
- docker-r
- docker-r-conda
- docker-r-sanitizer
- docker-rust
- docker-cpp
- docker-cpp-cmake32
Expand Down Expand Up @@ -1121,6 +1123,15 @@ tasks:
- docker-compose build r-conda
- docker-compose run r-conda

docker-r-sanitizer:
ci: circle
platform: linux
template: docker-tests/circle.linux.yml
params:
commands:
- docker-compose build r-sanitizer
- docker-compose run r-sanitizer

docker-rust:
ci: circle
platform: linux
Expand Down
13 changes: 13 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,19 @@ services:
dockerfile: r/Dockerfile
volumes: *ubuntu-volumes

r-sanitizer:
# Usage:
# docker-compose build r-sanitizer
# docker-compose run r-sanitizer
image: arrow:r-sanitizer
cap_add:
# LeakSanitizer and gdb requires ptrace(2)
- SYS_PTRACE
build:
context: .
dockerfile: r/Dockerfile.sanitizer
volumes: *ubuntu-volumes

r-conda:
# Usage:
# export R_VERSION=3.5.1
Expand Down
90 changes: 90 additions & 0 deletions r/Dockerfile.sanitizer
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

FROM wch1/r-debug:latest

# Installs C++ toolchain and dependencies
RUN apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
autoconf \
bison \
ca-certificates \
ccache \
cmake \
flex \
g++ \
gcc \
git \
libbenchmark-dev \
libboost-filesystem-dev \
libboost-regex-dev \
libboost-system-dev \
libbrotli-dev \
libbz2-dev \
libdouble-conversion-dev \
libgflags-dev \
libgoogle-glog-dev \
liblz4-dev \
liblzma-dev \
libre2-dev \
libsnappy-dev \
libssl-dev \
libzstd-dev \
ninja-build \
pkg-config \
rapidjson-dev \
thrift-compiler \
tzdata && \
apt-get clean && rm -rf /var/lib/apt/lists*

# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
# - libc-ares-dev does not install CMake config files
# - flatbuffer is not packaged
# - libgtest-dev only provide sources
# - libprotobuf-dev only provide sources
# - thrift is too old
ENV CMAKE_ARGS="-DThrift_SOURCE=BUNDLED \
-DFlatbuffers_SOURCE=BUNDLED \
-DGTest_SOURCE=BUNDLED \
-Dc-ares_SOURCE=BUNDLED \
-DgRPC_SOURCE=BUNDLED \
-DProtobuf_SOURCE=BUNDLED ${CMAKE_ARGS}"

# Prioritize system packages and local installation
ENV ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_FLIGHT=OFF \
ARROW_GANDIVA=OFF \
ARROW_HDFS=OFF \
ARROW_ORC=OFF \
ARROW_PARQUET=ON \
ARROW_PLASMA=OFF \
ARROW_USE_ASAN=OFF \
ARROW_USE_UBSAN=OFF \
ARROW_NO_DEPRECATED_API=ON \
ARROW_INSTALL_NAME_RPATH=OFF \
ARROW_WITH_BZ2=OFF \
ARROW_WITH_ZSTD=OFF \
ARROW_R_DEV=TRUE

# Ensure parallel R package installation
RUN printf "options(Ncpus = parallel::detectCores(), repos = 'https://demo.rstudiopm.com/all/__linux__/bionic/latest')\n" >> /etc/R/Rprofile.site
# Also ensure parallel compilation of each individual package
RUN printf "MAKEFLAGS=-j8\n" >> /usr/lib/R/etc/Makeconf

CMD ["/bin/bash", "-c", "/arrow/ci/docker_build_cpp.sh && \
/arrow/ci/docker_build_r_sanitizer.sh"]
5 changes: 4 additions & 1 deletion r/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
# under the License.

PKG_CPPFLAGS=@cflags@
PKG_CXXFLAGS=$(CXX_VISIBILITY)
# `-fvisibility=hidden` does not play well with UBSAN:
# https://bugs.llvm.org/show_bug.cgi?id=39191
# https://www.mail-archive.com/gcc-bugs@gcc.gnu.org/msg534862.html
# PKG_CXXFLAGS=$(CXX_VISIBILITY)
CXX_STD=CXX11
PKG_LIBS=@libs@
8 changes: 4 additions & 4 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ class Converter_Promotion : public Converter {
}
};

template <typename value_type>
template <typename value_type, typename unit_type = TimeType>
class Converter_Time : public Converter {
public:
explicit Converter_Time(const ArrayVector& arrays) : Converter(arrays) {}
Expand Down Expand Up @@ -485,7 +485,7 @@ class Converter_Time : public Converter {

private:
int TimeUnit_multiplier(const std::shared_ptr<Array>& array) const {
switch (static_cast<TimeType*>(array->type().get())->unit()) {
switch (static_cast<unit_type*>(array->type().get())->unit()) {
case TimeUnit::SECOND:
return 1;
case TimeUnit::MILLI:
Expand All @@ -501,10 +501,10 @@ class Converter_Time : public Converter {
};

template <typename value_type>
class Converter_Timestamp : public Converter_Time<value_type> {
class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
public:
explicit Converter_Timestamp(const ArrayVector& arrays)
: Converter_Time<value_type>(arrays) {}
: Converter_Time<value_type, TimestampType>(arrays) {}

SEXP Allocate(R_xlen_t n) const {
Rcpp::NumericVector data(no_init(n));
Expand Down
18 changes: 18 additions & 0 deletions r/tools/ubsan.supp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

vptr:include/c++/8/bits/shared_ptr_base.h

0 comments on commit c2e8323

Please sign in to comment.