diff --git a/r/configure b/r/configure index c957c9946fba0..5a7f0c81a947b 100755 --- a/r/configure +++ b/r/configure @@ -203,9 +203,27 @@ find_arrow () { do_bundled_build () { ${R_HOME}/bin/Rscript tools/nixlibs.R $VERSION + if [ $? -ne 0 ]; then + # If the nixlibs.R script failed, we can't continue + echo "------------------------- NOTE ---------------------------" + echo "There was an issue building the Arrow C++ libraries." + echo "See https://arrow.apache.org/docs/r/articles/install.html" + echo "---------------------------------------------------------" + exit 1 + fi + + if [ -d "libarrow/arrow-$VERSION" ]; then + _LIBARROW_FOUND="`pwd`/libarrow/arrow-${VERSION}" + else + # It's possible that the version of the libarrow binary is not identical to the + # R version, e.g. if the R build is a patch release, so find what the dir is + # actually called. If there is more than one version present, use the one + # with the highest version: + _LIBARROW_FOUND="`pwd`/libarrow/$(ls libarrow/ | grep ^arrow- | tail -n 1)" + fi + # Handle a few special cases, using what we know about the bundled build # and our ability to make edits to it since we "own" it. - _LIBARROW_FOUND="`pwd`/libarrow/arrow-${VERSION}" LIB_DIR="${_LIBARROW_FOUND}/lib" if [ -d "$LIB_DIR" ]; then if [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then diff --git a/r/configure.win b/r/configure.win index eed0c632dea13..2d9e5cdf54e44 100755 --- a/r/configure.win +++ b/r/configure.win @@ -38,8 +38,7 @@ GCS_LIBS="-lcurl -lnormaliz -lssh2 -lgdi32 -lssl -lcrypto -lcrypt32 -lwldap32 \ function configure_release() { VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //) # Try to find/download a C++ Arrow binary, - # including possibly a local .zip file if RWINLIB_LOCAL is set - "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "tools/winlibs.R" $VERSION $RWINLIB_LOCAL + "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "tools/nixlibs.R" $VERSION # If binary not found, script exits nonzero if [ $? -ne 0 ]; then echo "Arrow C++ library was not found" diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 3d651ec6ac9ea..96f2b858cf180 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -15,35 +15,55 @@ # specific language governing permissions and limitations # under the License. -args <- commandArgs(TRUE) -VERSION <- args[1] -dst_dir <- paste0("libarrow/arrow-", VERSION) - -# TESTING is set in test-nixlibs.R; it won't be set when called from configure -test_mode <- exists("TESTING") +#### Fuctions #### check end of file for main logic +env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) -# Prevent error with binary selection during testing. -if (test_mode && is.na(VERSION)) { - VERSION <- "8.0.0.9000" +# Log messages in the style of the configure script +lg <- function(..., .indent = "***") { + cat(.indent, " ", sprintf(...), "\n", sep = "") } -dev_version <- package_version(VERSION)[1, 4] -is_release <- is.na(dev_version) || dev_version < "100" -on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" -checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") +cleanup <- function(path) { + options(.arrow.cleanup = c(getOption(".arrow.cleanup"), path)) +} -# Small dev versions are added for R-only changes during CRAN submission. -if (is_release) { - VERSION <- package_version(VERSION)[1, 1:3] - arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") -} else { - arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") +# Exit the script after logging with .status=1 instead of throwing an error +exit <- function(..., .status = 1) { + lg(...) + q(save = "no", status = .status) } -options(.arrow.cleanup = character()) # To collect dirs to rm on exit -on.exit(unlink(getOption(".arrow.cleanup"))) -env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) +# checks the nightly repo for the latest nightly version X.Y.Z.100 +find_latest_nightly <- function(description_version) { + if (!startsWith(arrow_repo, "https://nightlies.apache.org/arrow/r")) { + lg("Detected non standard dev repo: %s, not checking latest nightly version.", arrow_repo) + return(description_version) + } + + res <- try( + { + # Binaries are only uploaded if all jobs pass so can just look at the source versions. + urls <- readLines("https://nightlies.apache.org/arrow/r/src/contrib") + versions <- grep("arrow_.*\\.tar\\.gz", urls, value = TRUE) + versions <- sub(".*arrow_(.*)\\.tar\\.gz.*", "\\1", x = versions) + versions <- sapply(versions, package_version) + versions <- data.frame(do.call(rbind, versions)) + matching_major <- versions[versions$X1 == description_version[1, 1], ] + latest <- matching_major[which.max(matching_major$X4), ] + package_version(paste0(latest, collapse = ".")) + }, + silent = quietly + ) + if (inherits(res, "try-error")) { + lg("Failed to find latest nightly for %s", description_version) + latest <- description_version + } else { + lg("Found latest nightly for %s: %s", description_version, res) + latest <- res + } + latest +} try_download <- function(from_url, to_file, hush = quietly) { status <- try( @@ -56,52 +76,19 @@ try_download <- function(from_url, to_file, hush = quietly) { !inherits(status, "try-error") && status == 0 } -not_cran <- env_is("NOT_CRAN", "true") -# enable full featured builds and binaries for macOS (or if the NOT_CRAN variable has been set) -if (not_cran || on_macos) { - # Set more eager defaults - if (env_is("LIBARROW_BINARY", "")) { - Sys.setenv(LIBARROW_BINARY = "true") - } - if (env_is("LIBARROW_MINIMAL", "")) { - Sys.setenv(LIBARROW_MINIMAL = "false") - } -} - -# For local debugging, set ARROW_R_DEV=TRUE to make this script print more -quietly <- !env_is("ARROW_R_DEV", "true") - -# The default will build from source as a fallback if a binary is not found or shouldn't be used -# Set LIBARROW_BUILD=FALSE to ensure that we use a previously built libarrow -# and don't fall back to a full source build -build_ok <- !env_is("LIBARROW_BUILD", "false") - -# Check if we're authorized to download (not asked an offline build). -# (Note that cmake will still be downloaded if necessary -# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds) -download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") - -# This "tools/thirdparty_dependencies" path, within the tar file, might exist if -# create_package_with_all_dependencies() was run, or if someone has created it -# manually before running make build. -# If you change this path, you also need to edit -# `create_package_with_all_dependencies()` in install-arrow.R -thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") - - download_binary <- function(lib) { libfile <- paste0("arrow-", VERSION, ".zip") binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") if (try_download(binary_url, libfile)) { if (!quietly) { - cat(sprintf("*** Successfully retrieved C++ binaries (%s)\n", lib)) + lg("Successfully retrieved C++ binaries (%s)", lib) } } else { if (!quietly) { - cat(sprintf( - "*** Downloading libarrow binary failed for version %s (%s)\n at %s\n", + lg( + "Downloading libarrow binary failed for version %s (%s)\n at %s", VERSION, lib, binary_url - )) + ) } libfile <- NULL } @@ -109,6 +96,7 @@ download_binary <- function(lib) { # e.g. in case the included checksums are stale. skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") + checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") # validate binary checksum for CRAN release only if (!skip_checksum && dir.exists(checksum_path) && is_release || enforce_checksum) { @@ -163,6 +151,10 @@ download_binary <- function(lib) { # These string values, along with `NULL`, are the potential return values of # this function. identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro()) { + if (on_windows) { + return("windows") + } + lib <- tolower(lib) if (identical(lib, "")) { # Not specified. Check the allowlist. @@ -171,16 +163,16 @@ identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro() if (identical(lib, "false")) { # Do not download a binary - NULL + lib <- NULL } else if (!identical(lib, "true")) { # Env var provided an os-version to use, to override our logic. # We don't validate that this exists. If it doesn't, the download will fail # and the build will fall back to building from source - lib } else { # See if we can find a suitable binary - select_binary() + lib <- select_binary() } + lib } check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apache/arrow/main/r/tools/nixlibs-allowlist.txt") { @@ -209,13 +201,13 @@ select_binary <- function(os = tolower(Sys.info()[["sysname"]]), ifelse(is.null(openssl_version), NULL, paste0(os, arch, openssl_version)) }, error = function(e) { - cat("*** Unable to find libcurl and openssl\n") + lg("Unable to find libcurl and openssl") NULL } ) } else { # No binary available for arch - cat(sprintf("*** Building on %s %s\n", os, arch)) + lg("Building on %s %s", os, arch) binary <- NULL } return(binary) @@ -284,32 +276,32 @@ determine_binary_from_stderr <- function(errs) { if (is.null(attr(errs, "status"))) { # There was no error in compiling: so we found libcurl and OpenSSL >= 1.1, # openssl is < 3.0 - cat("*** Found libcurl and OpenSSL >= 1.1\n") + lg("Found libcurl and OpenSSL >= 1.1") return("openssl-1.1") # Else, check for dealbreakers: } else if (!on_macos && any(grepl("Using libc++", errs, fixed = TRUE))) { # Our linux binaries are all built with GNU stdlib so they fail with libc++ - cat("*** Linux binaries incompatible with libc++\n") + lg("Linux binaries incompatible with libc++") return(NULL) } else if (header_not_found("curl/curl", errs)) { - cat("*** libcurl not found\n") + lg("libcurl not found") return(NULL) } else if (header_not_found("openssl/opensslv", errs)) { - cat("*** OpenSSL not found\n") + lg("OpenSSL not found") return(NULL) } else if (any(grepl("OpenSSL version too old", errs))) { - cat("*** OpenSSL found but version >= 1.0.2 is required for some features\n") + lg("OpenSSL found but version >= 1.0.2 is required for some features") return(NULL) # Else, determine which other binary will work } else if (any(grepl("Using OpenSSL version 1.0", errs))) { if (on_macos) { - cat("*** OpenSSL 1.0 is not supported on macOS\n") + lg("OpenSSL 1.0 is not supported on macOS") return(NULL) } - cat("*** Found libcurl and OpenSSL < 1.1\n") + lg("Found libcurl and OpenSSL < 1.1") return("openssl-1.0") } else if (any(grepl("Using OpenSSL version 3", errs))) { - cat("*** Found libcurl and OpenSSL >= 3.0.0\n") + lg("Found libcurl and OpenSSL >= 3.0.0") return("openssl-3.0") } NULL @@ -438,7 +430,7 @@ find_local_source <- function() { ) for (cpp_dir in cpp_dir_options) { if (file.exists(file.path(cpp_dir, "src/arrow/api.h"))) { - cat(paste0("*** Found local C++ source: '", cpp_dir, "'\n")) + lg("Found local C++ source: '%s'", cpp_dir) return(cpp_dir) } } @@ -459,7 +451,7 @@ env_vars_as_string <- function(env_var_list) { if (nchar(env_var_string) > 30000) { # This could happen if the full paths in *_SOURCE_URL were *very* long. # A more formal check would look at getconf ARG_MAX, but this shouldn't matter - cat("*** Warning: Environment variables are very long. This could cause issues on some shells.\n") + lg("Warning: Environment variables are very long. This could cause issues on some shells.") } env_var_string } @@ -484,7 +476,7 @@ build_libarrow <- function(src_dir, dst_dir) { Sys.setenv(MAKEFLAGS = makeflags) } if (!quietly) { - cat("*** Building with MAKEFLAGS=", makeflags, "\n") + lg("Building with MAKEFLAGS=", makeflags) } # Check for libarrow build dependencies: # * cmake @@ -498,7 +490,7 @@ build_libarrow <- function(src_dir, dst_dir) { # But normally we'll just build in a tmp dir build_dir <- tempfile() } - options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir)) + cleanup(build_dir) env_var_list <- c( SOURCE_DIR = src_dir, @@ -523,7 +515,7 @@ build_libarrow <- function(src_dir, dst_dir) { dep_source <- Sys.getenv("ARROW_DEPENDENCY_SOURCE") if (dep_source %in% c("", "AUTO") && !nzchar(Sys.which("pkg-config"))) { - cat("**** pkg-config not installed, setting ARROW_DEPENDENCY_SOURCE=BUNDLED\n") + lg("pkg-config not installed, setting ARROW_DEPENDENCY_SOURCE=BUNDLED", .indent = "****") env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED") } @@ -552,7 +544,7 @@ build_libarrow <- function(src_dir, dst_dir) { } env_vars <- env_vars_as_string(env_var_list) - cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n") + lg("arrow %s", ifelse(quietly, "", paste("with", env_vars)), .indent = "****") build_log_path <- tempfile(fileext = ".log") status <- suppressWarnings(system2( @@ -565,7 +557,7 @@ build_libarrow <- function(src_dir, dst_dir) { if (status != 0) { # It failed :( - cat("**** Error building Arrow C++.", "\n") + lg("Error building Arrow C++.", .indent = "****") if (quietly) { cat( "**** Printing contents of build log because the build failed", @@ -583,7 +575,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { if (is.null(cmake)) { # If not found, download it - cat("**** cmake\n") + lg("cmake", .indent = "****") CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4") if (on_macos) { postfix <- "-macos-universal.tar.gz" @@ -592,10 +584,10 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { } else if (tolower(Sys.info()[["machine"]]) == "x86_64") { postfix <- "-linux-x86_64.tar.gz" } else { - stop(paste0( + exit(paste0( "*** cmake was not found locally.\n", " Please make sure cmake >= ", cmake_minimum_required, - " is installed and available on your PATH.\n" + " is installed and available on your PATH." )) } cmake_binary_url <- paste0( @@ -606,7 +598,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { cmake_dir <- tempfile() download_successful <- try_download(cmake_binary_url, cmake_tar) if (!download_successful) { - cat(paste0( + exit(paste0( "*** cmake was not found locally and download failed.\n", " Make sure cmake >= ", cmake_minimum_required, " is installed and available on your PATH,\n", @@ -616,7 +608,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { } untar(cmake_tar, exdir = cmake_dir) unlink(cmake_tar) - options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir)) + cleanup(cmake_dir) cmake <- paste0( cmake_dir, "/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE), @@ -625,7 +617,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { } else { # Show which one we found # Full source builds will always show "cmake" in the logs - cat(sprintf("**** cmake: %s\n", cmake)) + lg("cmake: %s", cmake, .indent = "****") } cmake } @@ -796,7 +788,7 @@ with_cloud_support <- function(env_var_list) { cmake_find_package <- function(pkg, version = NULL, env_var_list) { td <- tempfile() dir.create(td) - options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td)) + cleanup(td) find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)") writeLines(find_package, file.path(td, "CMakeLists.txt")) env_vars <- env_vars_as_string(env_var_list) @@ -811,22 +803,101 @@ cmake_find_package <- function(pkg, version = NULL, env_var_list) { system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0 } -##### +############### Main logic ############# +args <- commandArgs(TRUE) +VERSION <- args[1] + +# TESTING is set in test-nixlibs.R; it won't be set when called from configure +test_mode <- exists("TESTING") + +# Prevent error with binary selection during testing. +if (test_mode && is.na(VERSION)) { + VERSION <- "8.0.0.9000" +} + +VERSION <- package_version(VERSION) +dev_version <- VERSION[1, 4] +# Small dev versions are added for R-only changes during CRAN submission +is_release <- is.na(dev_version) || dev_version < "100" + +on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" +on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" + +# For local debugging, set ARROW_R_DEV=TRUE to make this script print more +quietly <- !env_is("ARROW_R_DEV", "true") + +not_cran <- env_is("NOT_CRAN", "true") + +if (is_release) { + VERSION <- VERSION[1, 1:3] + arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") +} else { + not_cran <- TRUE + arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") + VERSION <- find_latest_nightly(VERSION) +} + +# To collect dirs to rm on exit, use del() to add dirs +# we reset it to avoid errors on reruns in the same session. +options(.arrow.cleanup = character()) +on.exit(unlink(getOption(".arrow.cleanup"), recursive = TRUE), add = TRUE) + +# enable full featured builds for macOS in case of CRAN source builds. +if (not_cran || on_macos) { + # Set more eager defaults + if (env_is("LIBARROW_BINARY", "")) { + Sys.setenv(LIBARROW_BINARY = "true") + } + if (env_is("LIBARROW_MINIMAL", "")) { + Sys.setenv(LIBARROW_MINIMAL = "false") + } +} + +# The default will build from source as a fallback if a binary is not found or shouldn't be used +# Set LIBARROW_BUILD=FALSE to ensure that we use a previously built libarrow +# and don't fall back to a full source build +build_ok <- !env_is("LIBARROW_BUILD", "false") + +# Check if we're authorized to download (not asked an offline build). +# (Note that cmake will still be downloaded if necessary +# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds) +download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") + +# This "tools/thirdparty_dependencies" path, within the tar file, might exist if +# create_package_with_all_dependencies() was run, or if someone has created it +# manually before running make build. +# If you change this path, you also need to edit +# `create_package_with_all_dependencies()` in install-arrow.R +thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") + +arrow_versioned <- paste0("arrow-", VERSION) +# configure.win uses a different libarrow dir and and the zip is already nested +if (on_windows) { + lib_dir <- "windows" + dst_dir <- lib_dir +} else { + lib_dir <- "libarrow" + dst_dir <- file.path(lib_dir, arrow_versioned) +} + +api_h <- file.path(lib_dir, arrow_versioned, "include/arrow/api.h") -if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { +if (!test_mode && !file.exists(api_h)) { # If we're working in a local checkout and have already built the libs, we # don't need to do anything. Otherwise, # (1) Look for a prebuilt binary for this version bin_file <- src_dir <- NULL - if (!identical(Sys.getenv("ARROW_DOWNLOADED_BINARIES"), "")) { - bin_zip <- Sys.getenv("ARROW_DOWNLOADED_BINARIES") - cat(sprintf("*** Using pre-downloaded zip for libarrow binaries: %s\n", bin_zip)) + # Keep backwards compatibility with winlibs.R + bin_zip <- Sys.getenv("ARROW_DOWNLOADED_BINARIES", Sys.getenv("RWINLIB_LOCAL", NA)) + + if (!is.na(bin_zip)) { + lg("Using pre-downloaded zip for libarrow binaries: %s", bin_zip) if (file.exists(bin_zip)) { bin_file <- tempfile() file.copy(bin_zip, bin_file) } else { - cat(sprintf("*** File not found: %s ($ARROW_DOWNLOADED_BINARIES)\n", bin_zip)) + lg("File not found: %s ($ARROW_DOWNLOADED_BINARIES)", bin_zip) bin_file <- NULL } } else if (download_ok) { @@ -842,7 +913,7 @@ if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE) unzip(bin_file, exdir = dst_dir) unlink(bin_file) - } else if (build_ok) { + } else if (build_ok && !on_windows) { # (2) Find source and build it src_dir <- find_local_source() if (!is.null(src_dir)) { @@ -853,9 +924,9 @@ if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { )) build_libarrow(src_dir, dst_dir) } else { - cat("*** Proceeding without libarrow (no local source)\n") + exit("Proceeding without libarrow (no local source)") } } else { - cat("*** Proceeding without libarrow (build not authorized)\n") + exit("Proceeding without libarrow (build not authorized)") } } diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R deleted file mode 100644 index 314062044dcf2..0000000000000 --- a/r/tools/winlibs.R +++ /dev/null @@ -1,100 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -args <- commandArgs(TRUE) -VERSION <- args[1] -dev_version <- package_version(VERSION)[1, 4] -# Small dev versions are added for R-only changes during CRAN submission -is_release <- is.na(dev_version) || dev_version < "100" -env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) -# We want to log the message in the style of the configure script -# not as an R error. Use `return` to exit the script after logging. -lg <- function(...) { - cat("*** ", sprintf(...), "\n") -} - -if (is_release) { - # This is a release version, so we need to use the major.minor.patch version without - # the CRAN suffix/dev_version - VERSION <- package_version(VERSION)[1, 1:3] - # %1$s uses the first variable for both substitutions - url_template <- paste0( - getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), - "/libarrow/bin/windows/arrow-%1$s.zip" - ) -} else { - url_template <- paste0( - getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), - "/libarrow/bin/windows/arrow-%s.zip" - ) -} - -if (file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { - lg("Found local Arrow %s!", VERSION) - return() -} - -zip_file <- sprintf("arrow-%s.zip", VERSION) - -if (length(args) > 1) { - # Arg 2 would be the path/to/lib.zip - localfile <- args[2] - if (!file.exists(localfile)) { - lg("RWINLIB_LOCAL '%s' does not exist. Build will fail.", localfile) - return() - } else { - lg("Using RWINLIB_LOCAL %s", localfile) - } - file.copy(localfile, zip_file) -} else { - quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") - binary_url <- sprintf(url_template, VERSION) - try( - suppressWarnings( - download.file(binary_url, zip_file, quiet = quietly) - ), - silent = quietly - ) - - if (!file.exists(zip_file) || file.size(zip_file) == 0) { - lg("Failed to download libarrow binary from %s. Build will fail.", binary_url) - return() - } - - checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") - # Explicitly setting the env var to "false" will skip checksum validation - # e.g. in case the included checksums are stale. - skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") - enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") - # validate binary checksum for CRAN release only - if (!skip_checksum && dir.exists(checksum_path) && is_release || - enforce_checksum) { - checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) - # rtools does not have shasum with default config - checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) - - if (checksum_ok != 0) { - lg("Checksum validation failed for libarrow binary: %s", zip_file) - return() - } - lg("Checksum validated successfully for libarrow binary: %s", zip_file) - } -} - -dir.create("windows", showWarnings = FALSE) -unzip(zip_file, exdir = "windows") -unlink(zip_file) diff --git a/r/vignettes/developers/install_details.Rmd b/r/vignettes/developers/install_details.Rmd index 2f2f126b61b38..83700b196a642 100644 --- a/r/vignettes/developers/install_details.Rmd +++ b/r/vignettes/developers/install_details.Rmd @@ -5,12 +5,12 @@ description: > output: rmarkdown::html_vignette --- -This document is intended specifically for arrow _developers_ who wish to know -more about these scripts. If you are an arrow _user_ looking for help with +This document is intended specifically for arrow _developers_ who wish to know +more about these scripts. If you are an arrow _user_ looking for help with installing arrow, please see [the installation guide](../install.html) -The arrow R package requires that Arrow C++ library (also known as libarrow) to -be installed in order to work properly. There are a number of different ways +The arrow R package requires that Arrow C++ library (also known as libarrow) to +be installed in order to work properly. There are a number of different ways in which libarrow could be installed: * as part of the R package installation process @@ -21,9 +21,9 @@ Below, we discuss each of these setups in turn. # Installing libarrow during R package installation -There are a number of scripts that are triggered -when `R CMD INSTALL .` is run and for Arrow users, these should all just work -without configuration and pull in the most complete pieces (e.g. official +There are a number of scripts that are triggered +when `R CMD INSTALL .` is run and for Arrow users, these should all just work +without configuration and pull in the most complete pieces (e.g. official binaries that we host). One of the jobs of these scripts is to work out if libarrow is installed, and if not, install it. @@ -35,23 +35,22 @@ handle finding the libarrow, setting up the build variables necessary, and writing the package Makevars file that is used to compile the C++ code in the R package. -* `tools/nixlibs.R` - this script is called by `configure` on Linux +* `tools/nixlibs.R` - this script is called by `configure` on Linux and macOS (or on any non-windows OS with the environment variable -`FORCE_BUNDLED_BUILD=true`) if an existing libarrow installation cannot be found. -This sets up the build process for our bundled builds (which is the default on -linux) and checks for binaries or downloads libarrow from source depending on -dependency availability and build configuration. - -* `tools/winlibs.R` - this script is called by `configure.win` on Windows -when environment variable `ARROW_HOME` is not set. It looks for an existing libarrow -installation, and if it can't find one downloads an appropriate libarrow binary. - -* `inst/build_arrow_static.sh` - called by `tools/nixlibs.R` when libarrow +`FORCE_BUNDLED_BUILD=true`). On windows this script is called by +`configure.win` when environment variable `ARROW_HOME` is not set. +It looks for an existing libarrow installation, and if it can't find one +downloads an appropriate libarrow binary. +On non-windows if no binary could be found, the script sets up the build +process for our bundled builds (which is the default on linux) and checks +for dependencies. + +* `inst/build_arrow_static.sh` - called by `tools/nixlibs.R` when libarrow needs to be built. It builds libarrow for a bundled, static build, and mirrors the steps described in the [Arrow R developer guide](./setup.html) This build script is also what is used to generate our prebuilt binaries. -The actions taken by these scripts to resolve dependencies and install the +The actions taken by these scripts to resolve dependencies and install the correct components are described below. ## How the R package finds libarrow @@ -80,7 +79,7 @@ On Linux and macOS, the core logic is: 2. Find libarrow on the system. If it is present, make sure that its version is compatible with the R package. 3. If no suitable libarrow is found, download it (where allowed) or build it from source. -4. Determine what features this libarrow has and what other flags it requires, +4. Determine what features this libarrow has and what other flags it requires, and set them in `src/Makevars` for use when compiling the bindings. #### Finding libarrow on the system @@ -93,7 +92,7 @@ The `configure` script will look for libarrow in three places: If a libarrow build is found, it will then check that the version of that C++ library matches that of the R package. If the versions do not match, like when you've installed -a system package for a release version but you have a development version of the +a system package for a release version but you have a development version of the R package, that libarrow will not be used. If both the C++ library and R package are on development versions, you will see a warning message advising you that if you do have trouble, you should ensure that the C++ library was built from the same commit as the R @@ -101,10 +100,10 @@ package, as development version numbers do not change with every commit. #### Prebuilt binaries -If libarrow is not found on the system, the R package installation +If libarrow is not found on the system, the R package installation script will next attempt to download prebuilt libarrow binaries -that match your both your local operating system, required -dependencies (e.g. openssl version) and arrow R package version. +that match your both your local operating system, required +dependencies (e.g. openssl version) and arrow R package version. These are used automatically on many Linux distributions (x86_64 architecture only), according to the [allowlist](https://github.com/apache/arrow/blob/main/r/tools/nixlibs-allowlist.txt). @@ -115,18 +114,18 @@ downloaded and bundled when your R package compiles. #### Building from source If no suitable libarrow binary is found, it will attempt to build it locally. -First, it will also look to see if you are in a checkout of the `apache/arrow` +First, it will also look to see if you are in a checkout of the `apache/arrow` git repository and thus have the libarrow source files there. Otherwise, it builds from the source files included in the package. -Depending on your system, building libarrow from source may be slow. If +Depending on your system, building libarrow from source may be slow. If libarrow is built from source, `inst/build_arrow_static.sh` is executed. # Using the R package with libarrow installed as a system package If you are authorized to install system packages and you're installing a CRAN release, -you may want to use the official Apache Arrow release packages corresponding to -the R package version via software distribution tools such as `apt` or `yum` -(though there are some drawbacks: see the +you may want to use the official Apache Arrow release packages corresponding to +the R package version via software distribution tools such as `apt` or `yum` +(though there are some drawbacks: see the ["Troubleshooting" section in the main installation docs](../install.html#troubleshooting)). See the [Arrow project installation page](https://arrow.apache.org/install/) to find pre-compiled binary packages for some common Linux distributions, @@ -138,5 +137,5 @@ be useful because the versions will not match. # Using the R package with an existing libarrow build This setup is much more common for arrow developers, who may be needing to make -changes to both the R package and libarrow source code. See +changes to both the R package and libarrow source code. See the [developer setup docs](./setup.html) for more information.