diff --git a/doc/NEWS.Rd b/doc/NEWS.Rd index 80aef760f81..cd97248af4f 100644 --- a/doc/NEWS.Rd +++ b/doc/NEWS.Rd @@ -91,6 +91,9 @@ \item \code{md5sum()} can be used to compute an MD5 hash of a raw vector of bytes by using the \code{bytes=} argument instead of \code{files=}. The two arguments are mutually exclusive. + + \item Added \code{sha256sum()} analogous to \code{md5sum()}, + but implementing the SHA-256 hashing algorithm. } } diff --git a/src/library/tools/NAMESPACE b/src/library/tools/NAMESPACE index 38b82e3c5e2..7713cd1c734 100644 --- a/src/library/tools/NAMESPACE +++ b/src/library/tools/NAMESPACE @@ -22,7 +22,7 @@ export("Adobe_glyphs", "HTMLheader", "Rcmd", "Rd2HTML", "pkg2HTML", "Rd2ex", "makevars_site", "makevars_user", "matchConcordance", nonS3methods, - "make_translations_pkg", "md5sum", + "make_translations_pkg", "md5sum", "sha256sum", package.dependencies,# R/package.dependencies.R man/package.dependencies.Rd getDepList, pkgDepends, installFoundDepends,# R/pkgDepends.R man/getDepList.Rd man/installFoundDepends.Rd vignetteDepends, # deprecated, too as it calls getDepList() diff --git a/src/library/tools/R/sha256.R b/src/library/tools/R/sha256.R new file mode 100644 index 00000000000..f9972c58225 --- /dev/null +++ b/src/library/tools/R/sha256.R @@ -0,0 +1,109 @@ +# File src/library/tools/R/sha256.R +# Part of the R package, https://www.R-project.org +# +# Copyright (C) 1995-2024 The R Core Team +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# A copy of the GNU General Public License is available at +# https://www.R-project.org/Licenses/ + +sha256sum <- function(files, bytes) { + if (!missing(files) && !missing(bytes)) + stop("files and bytes are mutually exclusive") + if (!missing(bytes)) { + if (!is.raw(bytes)) stop("bytes must be a raw vector") + .Call(C_Rsha256, bytes) + } else { + files <- path.expand(files) + structure(.Call(C_Rsha256, files), names=files) + } +} + +# The following fns are neither used nor exported - for now. + +.installSHA256sums <- function(pkgDir, outDir = pkgDir) +{ + dot <- getwd() + if (is.null(dot)) + stop("current working directory cannot be ascertained") + setwd(pkgDir) + x <- sha256sum(dir(".", recursive=TRUE)) + setwd(dot) + x <- x[names(x) != "SHA256"] + cat(paste(x, names(x), sep=" *"), sep="\n", + file=file.path(outDir, "SHA256")) +} + +checkSHA256sums <- function(package, dir) +{ + if(missing(dir)) dir <- find.package(package, quiet = TRUE) + if(length(dir) != 1L) return(NA) + sha256file <- file.path(dir, "SHA256") + if(!file.exists(sha256file)) return(NA) + inlines <- readLines(sha256file) + ## now split on the first space. + xx <- sub("^([0-9a-fA-F]*)(.*)", "\\1", inlines) + nmxx <- names(xx) <- sub("^[0-9a-fA-F]* [ |*](.*)", "\\1", inlines) + dot <- getwd() + if (is.null(dot)) + stop("current working directory cannot be ascertained") + setwd(dir) + x <- sha256sum(dir(dir, recursive = TRUE)) + setwd(dot) + x <- x[names(x) != "SHA256"] + nmx <- names(x) + res <- TRUE + not.here <- (nmxx %notin% nmx) + if(any(not.here)) { + res <- FALSE + if (sum(not.here) > 1L) + cat("files", paste(sQuote(nmxx[not.here]), collapse = ", "), + "are missing\n", sep = " ") + else + cat("file", sQuote(nmxx[not.here]), "is missing\n", sep = " ") + } + nmxx <- nmxx[!not.here] + diff <- xx[nmxx] != x[nmxx] + if(any(diff)) { + res <- FALSE + files <- nmxx[diff] + if(length(files) > 1L) + cat("files", paste(sQuote(files), collapse = ", "), + "have the wrong SHA256 checksums\n", sep = " ") + else cat("file", sQuote(files), "has the wrong SHA256 checksum\n") + } + res +} + +.hex.chars <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f") + +hex2raw <- function(x) { + if (length(x) != 1L) stop("x must be a single string") + if (!nzchar(x)) return(raw(1L)) + ## pad with 0 to full bytes + m <- match(strsplit(tolower(x),"")[[1L]], .hex.chars) + if (any(is.na(m))) stop("invalid hex string") + if (length(m) %% 2 == 1) m <- c(1L, m) ## add leading 0 for full byte + as.raw(colSums(matrix(m - 1L, 2) * c(16L, 1L))) +} + +.pad <- function(x, n) if (length(x) < n) c(x, raw(n - length(x))) else x + +hmac <- function(key, x, hash, block) { + key <- .pad(if (length(key) > block) hex2raw(hash(key)) else key, block) + # HMAC := HASH( c( key ^ 0x5c, HASH( c( key ^ 0x36, x ) ) ) ) + hash(c(xor(key, as.raw(0x5c)), + hex2raw(hash(c(xor(key, as.raw(0x36)), x))))) +} + +hmac.sha256 <- function(key, x) hmac(key, x, function(x) sha256sum(bytes=x), 64L) +hmac.md5 <- function(key, x) hmac(key, x, function(x) md5sum(bytes=x), 64L) diff --git a/src/library/tools/man/md5sum.Rd b/src/library/tools/man/md5sum.Rd index 437e45a0f77..2986256e444 100644 --- a/src/library/tools/man/md5sum.Rd +++ b/src/library/tools/man/md5sum.Rd @@ -1,6 +1,6 @@ % File src/library/tools/man/md5sum.Rd % Part of the R package, https://www.R-project.org -% Copyright 1995-2014 R Core Team +% Copyright 1995-2024 R Core Team % Distributed under GPL 2 or later \name{md5sum} @@ -44,7 +44,7 @@ md5sum(files, bytes) a 2001 release of \code{glibc}. } \seealso{ - \code{\link{checkMD5sums}} + \code{\link{checkMD5sums}}, \code{\link{sha256sum}} } \examples{ as.vector(md5sum(dir(R.home(), pattern = "^COPY", full.names = TRUE))) diff --git a/src/library/tools/man/sha256sum.Rd b/src/library/tools/man/sha256sum.Rd new file mode 100644 index 00000000000..c1f12d9281e --- /dev/null +++ b/src/library/tools/man/sha256sum.Rd @@ -0,0 +1,50 @@ +% File src/library/tools/man/sha256sum.Rd +% Part of the R package, https://www.R-project.org +% Copyright 1995-2024 R Core Team +% Distributed under GPL 2 or later + +\name{sha256sum} +\alias{sha256sum} +\title{Compute SHA-256 Checksums} +\description{ + Compute the 32-byte SHA-256 hashes of one or more files, or a raw vector of bytes. +} +\usage{ +sha256sum(files, bytes) +} +\arguments{ + \item{files}{character. The paths of file(s) whose contents are to be hashed.} + \item{bytes}{raw. Bytes to be hashed. + NB: \code{bytes} and \code{files} are mutually exclusive.} +} +\details{ + A SHA-256 \sQuote{hash} or \sQuote{checksum} or \sQuote{message digest} is + a 256-bit summary of the file contents represented by 64 hexadecimal + digits. + + On Windows all files are read in binary mode (as the \code{sha256sum} + utilities there do): on other OSes the files are read in the default + mode (almost always text mode where there is more than one). +} +\value{ + A character vector of the same length as \code{files}, with names + equal to \code{files} (possibly expanded). The elements will be + \code{NA} for non-existent or unreadable files, otherwise a + 64-character string of hexadecimal digits. + + For \code{bytes} the result is a single 64-character string. +} +\source{ + The underlying C code was written by \I{Ulrich Drepper}, extracted from + the public domain version SHA-crypt.txt verison 0.6 (2016-8-31). +} +\seealso{ + \code{\link{md5sum}} +} +\examples{ +as.vector(sha256sum(dir(R.home(), pattern = "^COPY", full.names = TRUE))) +sha256sum(bytes=raw()) +sha256sum(bytes=charToRaw("abc")) +} +\keyword{utilities} + diff --git a/src/library/tools/src/Makefile.in b/src/library/tools/src/Makefile.in index 85767e69ced..acecf0f2c65 100644 --- a/src/library/tools/src/Makefile.in +++ b/src/library/tools/src/Makefile.in @@ -14,7 +14,7 @@ R_SHARE_DIR = $(R_HOME)/share R_INCLUDE_DIR = $(R_HOME)/include SOURCES_C = text.c init.c Rmd5.c md5.c signals.c install.c getfmts.c http.c \ - gramLatex.c gramRd.c pdscan.c + gramLatex.c gramRd.c pdscan.c Rsha256.c sha256.c DEPENDS = $(SOURCES_C:.c=.d) OBJECTS = $(SOURCES_C:.c=.o) diff --git a/src/library/tools/src/Makefile.win b/src/library/tools/src/Makefile.win index 1235f434a8c..8fe261c7554 100644 --- a/src/library/tools/src/Makefile.win +++ b/src/library/tools/src/Makefile.win @@ -12,7 +12,7 @@ subdir = src/library/$(pkg)/src R_HOME = $(top_builddir) SOURCES_C = text.c init.c Rmd5.c md5.c signals.c install.c getfmts.c http.c \ - gramLatex.c gramRd.c pdscan.c + gramLatex.c gramRd.c pdscan.c Rsha256.c sha256.c DEPENDS = $(SOURCES_C:.c=.d) OBJECTS = $(SOURCES_C:.c=.o) ../../../gnuwin32/dllversion.o diff --git a/src/library/tools/src/Rmd5.c b/src/library/tools/src/Rmd5.c index d07082c4bef..11512ff19f7 100644 --- a/src/library/tools/src/Rmd5.c +++ b/src/library/tools/src/Rmd5.c @@ -47,7 +47,7 @@ SEXP Rmd5(SEXP files) /* RAW mode: hash of one buffer instead of files */ if (TYPEOF(files) == RAWSXP) { /* there is really no failure possible, but just in case... */ - if (!md5_buffer((char *) RAW(files), XLENGTH(files), resblock)) + if (!md5_buffer((const char *) RAW(files), XLENGTH(files), resblock)) return ScalarString(NA_STRING); for(j = 0; j < 16; j++) snprintf (out+2*j, 33-2*j, "%02x", resblock[j]); diff --git a/src/library/tools/src/Rsha256.c b/src/library/tools/src/Rsha256.c new file mode 100644 index 00000000000..7fd7a35b4c5 --- /dev/null +++ b/src/library/tools/src/Rsha256.c @@ -0,0 +1,101 @@ +/* + * R : A Computer Language for Statistical Data Analysis + * Copyright (C) 2003-2024 The R Core Team. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, a copy is available at + * https://www.R-project.org/Licenses/ + */ + +/* OK since this is intended to treat chars as byte streams */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#undef _ + +#include "tools.h" +#include "sha256.h" + +#define SHA256_HASH_SIZE 32 +#define SHA256_HEX_SIZE 64 + +/* convenience fn for init + process + finish */ +static void *Rsha256_buffer (const void *buffer, size_t len, void *resblock) +{ + struct sha256_ctx ctx; + Rsha256_init_ctx(&ctx); + Rsha256_process_bytes(buffer, len, &ctx); + return Rsha256_finish_ctx(&ctx, resblock); +} + +/* This is essentailly identical to Rmd5 */ + +/* .Call so manages R_alloc stack */ +SEXP Rsha256(SEXP files) +{ + SEXP ans; + int i, j, nfiles = length(files), res; +#ifdef _WIN32 + const wchar_t *wpath; +#else + const char *path; +#endif + char out[SHA256_HEX_SIZE + 1]; + FILE *fp; + unsigned char resblock[SHA256_HASH_SIZE]; + + /* RAW mode: hash of one buffer instead of files */ + if (TYPEOF(files) == RAWSXP) { + /* there is really no failure possible, but just in case... */ + if (!Rsha256_buffer((const void *) RAW(files), XLENGTH(files), resblock)) + return ScalarString(NA_STRING); + for(j = 0; j < SHA256_HASH_SIZE; j++) + snprintf (out+2*j, sizeof(out) - 2*j, "%02x", resblock[j]); + return mkString(out); + } + /* otherwise list of files */ + if(!isString(files)) error(_("argument 'files' must be character")); + PROTECT(ans = allocVector(STRSXP, nfiles)); + for(i = 0; i < nfiles; i++) { +#ifdef _WIN32 + wpath = filenameToWchar(STRING_ELT(files, i), FALSE); + fp = _wfopen(wpath, L"rb"); +#else + path = translateChar(STRING_ELT(files, i)); + fp = fopen(path, "r"); +#endif + if(!fp) { + SET_STRING_ELT(ans, i, NA_STRING); + } else { + res = Rsha256_stream(fp, &resblock); + if(res) { +#ifdef _WIN32 + warning(_("sha256 failed on file '%ls'"), wpath); +#else + warning(_("sha256 failed on file '%s'"), path); +#endif + SET_STRING_ELT(ans, i, NA_STRING); + } else { + for(j = 0; j < SHA256_HASH_SIZE; j++) + snprintf (out+2*j, sizeof(out) - 2*j, "%02x", resblock[j]); + SET_STRING_ELT(ans, i, mkChar(out)); + } + fclose(fp); + } + } + UNPROTECT(1); + return ans; +} diff --git a/src/library/tools/src/init.c b/src/library/tools/src/init.c index 074b8d19fbf..cb5eb95d132 100644 --- a/src/library/tools/src/init.c +++ b/src/library/tools/src/init.c @@ -42,6 +42,7 @@ static const R_CallMethodDef CallEntries[] = { CALLDEF(dirchmod, 2), CALLDEF(getfmts, 1), CALLDEF(Rmd5, 1), + CALLDEF(Rsha256, 1), CALLDEF(check_nonASCII, 2), CALLDEF(check_nonASCII2, 1), CALLDEF(doTabExpand, 2), diff --git a/src/library/tools/src/sha256.c b/src/library/tools/src/sha256.c new file mode 100644 index 00000000000..4217eb6cfb6 --- /dev/null +++ b/src/library/tools/src/sha256.c @@ -0,0 +1,319 @@ +/* SHA256 implementation. + * R : A Computer Language for Statistical Data Analysis + * Copyright (C) 2003-2024 The R Core Team. + * Based on code released into the Public Domain by + * Ulrich Drepper . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, a copy is available at + * https://www.R-project.org/Licenses/ + */ + +#include +#include + +#include +#include + +#include "sha256.h" + +/* re-name our API points to avoid possible conflicts with other implementations */ +#define sha256_init_ctx Rsha256_init_ctx +#define sha256_process_bytes Rsha256_process_bytes +#define sha256_stream Rsha256_stream +#define sha256_finish_ctx Rsha256_finish_ctx + +#ifndef WORDS_BIGENDIAN +# define SWAP(n) \ + (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) +#else +# define SWAP(n) (n) +#endif + + +/* This array contains the bytes used to pad the buffer to the next + 64-byte boundary. (FIPS 180-2:5.1.1) */ +static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; + + +/* Constants for SHA256 from FIPS 180-2:4.2.2. */ +static const uint32_t K[64] = + { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + }; + + +/* Process LEN bytes of BUFFER, accumulating context into CTX. + It is assumed that LEN % 64 == 0. */ +static void sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx) +{ + const uint32_t *words = buffer; + size_t nwords = len / sizeof (uint32_t); + uint32_t a = ctx->H[0]; + uint32_t b = ctx->H[1]; + uint32_t c = ctx->H[2]; + uint32_t d = ctx->H[3]; + uint32_t e = ctx->H[4]; + uint32_t f = ctx->H[5]; + uint32_t g = ctx->H[6]; + uint32_t h = ctx->H[7]; + + /* First increment the byte count. FIPS 180-2 specifies the possible + length of the file up to 2^64 bits. Here we only compute the + number of bytes. Do a double word increment. */ + ctx->total[0] += len; + if (ctx->total[0] < len) + ++ctx->total[1]; + + /* Process all bytes in the buffer with 64 bytes in each round of + the loop. */ + while (nwords > 0) + { + uint32_t W[64]; + uint32_t a_save = a; + uint32_t b_save = b; + uint32_t c_save = c; + uint32_t d_save = d; + uint32_t e_save = e; + uint32_t f_save = f; + uint32_t g_save = g; + uint32_t h_save = h; + + /* Operators defined in FIPS 180-2:4.1.2. */ +#define Ch(x, y, z) ((x & y) ^ (~x & z)) +#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) +#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22)) +#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25)) +#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3)) +#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10)) + + /* It is unfortunate that C does not provide an operator for + cyclic rotation. Hope the C compiler is smart enough. */ +#define CYCLIC(w, s) ((w >> s) | (w << (32 - s))) + + /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ + for (unsigned int t = 0; t < 16; ++t) + { + W[t] = SWAP (*words); + ++words; + } + for (unsigned int t = 16; t < 64; ++t) + W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; + + /* The actual computation according to FIPS 180-2:6.2.2 step 3. */ + for (unsigned int t = 0; t < 64; ++t) + { + uint32_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; + uint32_t T2 = S0 (a) + Maj (a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + } + + /* Add the starting values of the context according to FIPS 180-2:6.2.2 + step 4. */ + a += a_save; + b += b_save; + c += c_save; + d += d_save; + e += e_save; + f += f_save; + g += g_save; + h += h_save; + + /* Prepare for the next round. */ + nwords -= 16; + } + + /* Put checksum in context given as argument. */ + ctx->H[0] = a; + ctx->H[1] = b; + ctx->H[2] = c; + ctx->H[3] = d; + ctx->H[4] = e; + ctx->H[5] = f; + ctx->H[6] = g; + ctx->H[7] = h; +} + + +/* Initialize structure containing state of computation. + (FIPS 180-2:5.3.2) */ +void sha256_init_ctx (struct sha256_ctx *ctx) +{ + ctx->H[0] = 0x6a09e667; + ctx->H[1] = 0xbb67ae85; + ctx->H[2] = 0x3c6ef372; + ctx->H[3] = 0xa54ff53a; + ctx->H[4] = 0x510e527f; + ctx->H[5] = 0x9b05688c; + ctx->H[6] = 0x1f83d9ab; + ctx->H[7] = 0x5be0cd19; + + ctx->total[0] = ctx->total[1] = 0; + ctx->buflen = 0; +} + + +/* Process the remaining bytes in the internal buffer and the usual + prolog according to the standard and write the result to RESBUF. + + IMPORTANT: On some systems it is required that RESBUF is correctly + aligned for a 32 bits value. */ +void *sha256_finish_ctx (struct sha256_ctx *ctx, void *resbuf) +{ + /* Take yet unprocessed bytes into account. */ + uint32_t bytes = ctx->buflen; + size_t pad; + + /* Now count remaining bytes. */ + ctx->total[0] += bytes; + if (ctx->total[0] < bytes) + ++ctx->total[1]; + + pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes; + memcpy (&ctx->buffer[bytes], fillbuf, pad); + + /* Put the 64-bit file length in *bits* at the end of the buffer. */ + *(uint32_t *) &ctx->buffer[bytes + pad + 4] = SWAP (ctx->total[0] << 3); + *(uint32_t *) &ctx->buffer[bytes + pad] = SWAP ((ctx->total[1] << 3) | + (ctx->total[0] >> 29)); + + /* Process last bytes. */ + sha256_process_block (ctx->buffer, bytes + pad + 8, ctx); + + /* Put result from CTX in first 32 bytes following RESBUF. */ + for (unsigned int i = 0; i < 8; ++i) + ((uint32_t *) resbuf)[i] = SWAP (ctx->H[i]); + + return resbuf; +} + + +void sha256_process_bytes (const void *buffer, size_t len, struct sha256_ctx *ctx) +{ + /* When we already have some bits in our internal buffer concatenate + both inputs first. */ + if (ctx->buflen != 0) + { + size_t left_over = ctx->buflen; + size_t add = 128 - left_over > len ? len : 128 - left_over; + + memcpy (&ctx->buffer[left_over], buffer, add); + ctx->buflen += add; + + if (ctx->buflen > 64) + { + sha256_process_block (ctx->buffer, ctx->buflen & ~63, ctx); + + ctx->buflen &= 63; + /* The regions in the following copy operation cannot overlap. */ + memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63], + ctx->buflen); + } + + buffer = (const char *) buffer + add; + len -= add; + } + + /* Process available complete blocks. */ + if (len >= 64) + { +/* To check alignment gcc has an appropriate operator. Other + compilers don't. */ +#if __GNUC__ >= 2 +# define UNALIGNED_P(p) (((uintptr_t) p) % __alignof__ (uint32_t) != 0) +#else +# define UNALIGNED_P(p) (((uintptr_t) p) % sizeof (uint32_t) != 0) +#endif + if (UNALIGNED_P (buffer)) + while (len > 64) + { + sha256_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); + buffer = (const char *) buffer + 64; + len -= 64; + } + else + { + sha256_process_block (buffer, len & ~63, ctx); + buffer = (const char *) buffer + (len & ~63); + len &= 63; + } + } + + /* Move remaining bytes into internal buffer. */ + if (len > 0) + { + size_t left_over = ctx->buflen; + + memcpy (&ctx->buffer[left_over], buffer, len); + left_over += len; + if (left_over >= 64) + { + sha256_process_block (ctx->buffer, 64, ctx); + left_over -= 64; + memcpy (ctx->buffer, &ctx->buffer[64], left_over); + } + ctx->buflen = left_over; + } +} + +int sha256_stream (FILE *stream, void *resblock) +{ + /* read buffer size (BUF_SIZE) must be a multiple of the block size (64) */ +#define BUF_SIZE 4096 + struct sha256_ctx ctx; + char buffer[BUF_SIZE]; + size_t sum = 0; + + sha256_init_ctx (&ctx); + while (1) { /* while there is any content ... */ + size_t n; + /* read next block */ + while (sum < BUF_SIZE && (n = fread(buffer + sum, 1, BUF_SIZE - sum, stream)) > 0) + sum += n; + if (n == 0) { /* no more data ? */ + if (ferror(stream)) return 1; /* error */ + if (sum < BUF_SIZE) break; /* EOF */ + } + /* full block */ + sha256_process_block (buffer, BUF_SIZE, &ctx); + sum = 0; + } + + /* add any remaining bytes */ + if (sum > 0) sha256_process_bytes (buffer, sum, &ctx); + + sha256_finish_ctx (&ctx, resblock); + return 0; +} diff --git a/src/library/tools/src/sha256.h b/src/library/tools/src/sha256.h new file mode 100644 index 00000000000..bafaedf7af0 --- /dev/null +++ b/src/library/tools/src/sha256.h @@ -0,0 +1,44 @@ +/* SHA256 implementation. + * R : A Computer Language for Statistical Data Analysis + * Copyright (C) 2003-2024 The R Core Team. + * Based on code released into the Public Domain by + * Ulrich Drepper . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, a copy is available at + * https://www.R-project.org/Licenses/ + */ + +#ifndef R_SHA256_H +#define R_SHA256_H 1 + +#include +#include +#include /* for FILE I/O in Rsha256_stream */ + +/* Structure to save state of computation between the single steps. */ +struct sha256_ctx +{ + uint32_t H[8]; + + uint32_t total[2]; + uint32_t buflen; + char buffer[128]; /* NB: always correctly aligned for uint32_t. */ +}; + +extern void Rsha256_init_ctx(struct sha256_ctx *ctx); +extern void Rsha256_process_bytes(const void *buffer, size_t len, struct sha256_ctx *ctx); +extern void* Rsha256_finish_ctx(struct sha256_ctx *ctx, void *resbuf); +extern int Rsha256_stream(FILE *stream, void *resblock); + +#endif diff --git a/src/library/tools/src/tools.h b/src/library/tools/src/tools.h index 518163caac5..c7ce25cae6e 100644 --- a/src/library/tools/src/tools.h +++ b/src/library/tools/src/tools.h @@ -32,6 +32,7 @@ SEXP delim_match(SEXP x, SEXP delims); SEXP dirchmod(SEXP dr, SEXP gwsxp); SEXP Rmd5(SEXP files); +SEXP Rsha256(SEXP files); SEXP check_nonASCII(SEXP text, SEXP ignore_quotes); SEXP check_nonASCII2(SEXP text); SEXP doTabExpand(SEXP strings, SEXP starts); diff --git a/src/library/tools/tests/hashes.R b/src/library/tools/tests/hashes.R new file mode 100644 index 00000000000..ab8d79fa382 --- /dev/null +++ b/src/library/tools/tests/hashes.R @@ -0,0 +1,64 @@ +require("tools") + +## -- MD5 test vectors +## RFC 1321 A.5: +stopifnot(identical(md5sum(bytes=raw()), + "d41d8cd98f00b204e9800998ecf8427e")) +stopifnot(identical(md5sum(bytes=charToRaw("a")), + "0cc175b9c0f1b6a831c399e269772661")) +stopifnot(identical(md5sum(bytes=charToRaw("abc")), + "900150983cd24fb0d6963f7d28e17f72")) +## not official, but the FIPS180-2 vectors +stopifnot(identical(md5sum(bytes=rep(charToRaw("a"),1e6)), + "7707d6ae4e027c70eea2a935c2296f21")) +stopifnot(identical(md5sum(bytes=charToRaw("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq")), + "8215ef0796a20bcaaae116d3876c664a")) + +## -- SHA256 test vectors +stopifnot(identical(sha256sum(bytes=raw()), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")) + +## FIPS180-2 Appendix B.3 test vectors +stopifnot(identical(sha256sum(bytes=charToRaw("abc")), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad")) +stopifnot(identical(sha256sum(bytes=charToRaw("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq")), + "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1")) +stopifnot(identical(sha256sum(bytes=rep(charToRaw("a"),1e6)), + "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0")) + +## let's see if there are hidden HMAC functions - if so, test them +if (!is.null(environment(tools::sha256sum)[["hmac"]])) { + +## [[ we have not expoorted HMAC functions yet ]] +hmac.md5 <- tools:::hmac.md5 +hmac.sha256 <- tools:::hmac.sha256 + +## -- HMAC test vectors +## RFC 2202 HMAC-MD5 +stopifnot(identical(hmac.md5(rep(as.raw(0xb),16), charToRaw("Hi There")), + "9294727a3638bb1c13f48ef8158bfc9d")) +stopifnot(identical(hmac.md5(charToRaw("Jefe"), charToRaw("what do ya want for nothing?")), + "750c783e6ab0b503eaa86e310a5db738")) +stopifnot(identical(hmac.md5(rep(as.raw(0xaa),16), rep(as.raw(0xdd), 50)), + "56be34521d144c88dbb8c733f0e8b3f6")) +## skipping test_case = 4,5 +stopifnot(identical(hmac.md5(rep(as.raw(0xaa),80), charToRaw("Test Using Larger Than Block-Size Key - Hash Key First")), + "6b1ab7fe4bd7bf8f0b62e6ce61b9d0cd")) +stopifnot(identical(hmac.md5(rep(as.raw(0xaa),80), charToRaw("Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data")), + "6f630fad67cda0ee1fb1f562db3aa53e")) + +## RFC 4231 HMAC-SHA256 +stopifnot(identical(hmac.sha256(rep(as.raw(0xb),20), charToRaw("Hi There")), + "b0344c61d8db38535ca8afceaf0bf12b881dc200c9833da726e9376c2e32cff7")) +stopifnot(identical(hmac.sha256(charToRaw("Jefe"), charToRaw("what do ya want for nothing?")), + "5bdcc146bf60754e6a042426089575c75a003f089d2739839dec58b964ec3843")) +stopifnot(identical(hmac.sha256(rep(as.raw(0xaa),20), rep(as.raw(0xdd), 50)), + "773ea91e36800e46854db8ebd09181a72959098b3ef8c122d9635514ced565fe")) +## skipping test_case = 4,5 (we don't truncate - left to the user) +stopifnot(identical(hmac.sha256(rep(as.raw(0xaa),131), charToRaw("Test Using Larger Than Block-Size Key - Hash Key First")), + "60e431591ee0b67f0d8a26aacbf5b77f8e0bc6213728c5140546040f0ee37f54")) +stopifnot(identical(hmac.sha256(rep(as.raw(0xaa),131), + charToRaw("This is a test using a larger than block-size key and a larger than block-size data. The key needs to be hashed before being used by the HMAC algorithm.")), + "9b09ffa71b942fcb27635fbcd5b0e944bfdc63644f0713938a7f51535c3a35e2")) + +}