diff --git a/doc/NEWS.Rd b/doc/NEWS.Rd index 75ce6d184f0..3ca8f9bc260 100644 --- a/doc/NEWS.Rd +++ b/doc/NEWS.Rd @@ -87,6 +87,10 @@ \item The set operations now avoid the \code{as.vector()} transformation for same-kind apparently vector-like operands. + + \item \code{md5sum()} can be used to compute an MD5 hash of a raw + vector of bytes by using the \code{bytes=} argument instead + of \code{files=}. The two arguments are mutually exclusive. } } diff --git a/src/library/tools/R/md5.R b/src/library/tools/R/md5.R index 22e27127505..efe4d052984 100644 --- a/src/library/tools/R/md5.R +++ b/src/library/tools/R/md5.R @@ -16,9 +16,16 @@ # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ -md5sum <- function(files) { - files <- path.expand(files) - structure(.Call(C_Rmd5, files), names=files) +md5sum <- function(files, bytes) { + if (!missing(files) && !missing(bytes)) + stop("files and bytes are mutually exclusive") + if (!missing(bytes)) { + if (!is.raw(bytes)) stop("bytes must be a raw vector") + .Call(C_Rmd5, bytes) + } else { + files <- path.expand(files) + structure(.Call(C_Rmd5, files), names=files) + } } .installMD5sums <- function(pkgDir, outDir = pkgDir) diff --git a/src/library/tools/man/md5sum.Rd b/src/library/tools/man/md5sum.Rd index 9136ac30f96..437e45a0f77 100644 --- a/src/library/tools/man/md5sum.Rd +++ b/src/library/tools/man/md5sum.Rd @@ -7,13 +7,15 @@ \alias{md5sum} \title{Compute MD5 Checksums} \description{ - Compute the 32-byte MD5 hashes of one or more files. + Compute the 32-byte MD5 hashes of one or more files, or a raw vector of bytes. } \usage{ -md5sum(files) +md5sum(files, bytes) } \arguments{ \item{files}{character. The paths of file(s) whose contents are to be hashed.} + \item{bytes}{raw. Bytes to be hashed. + NB: \code{bytes} and \code{files} are mutually exclusive.} } \details{ A MD5 \sQuote{hash} or \sQuote{checksum} or \sQuote{message digest} is @@ -34,6 +36,8 @@ md5sum(files) equal to \code{files} (possibly expanded). The elements will be \code{NA} for non-existent or unreadable files, otherwise a 32-character string of hexadecimal digits. + + For \code{bytes} the result is a single 32-character string. } \source{ The underlying C code was written by \I{Ulrich Drepper} and extracted from @@ -44,6 +48,8 @@ md5sum(files) } \examples{ as.vector(md5sum(dir(R.home(), pattern = "^COPY", full.names = TRUE))) +md5sum(bytes=raw()) +md5sum(bytes=charToRaw("abc")) } \keyword{utilities} diff --git a/src/library/tools/src/Rmd5.c b/src/library/tools/src/Rmd5.c index a3cffd45207..fd4afe5996e 100644 --- a/src/library/tools/src/Rmd5.c +++ b/src/library/tools/src/Rmd5.c @@ -44,6 +44,16 @@ SEXP Rmd5(SEXP files) FILE *fp; unsigned char resblock[16]; + /* RAW mode: hash of one buffer instead of files */ + if (TYPEOF(files) == RAWSXP) { + /* there is really no failure possible, but just in case... */ + if (!md5_buffer(RAW(files), XLENGTH(files), resblock)) + return ScalarString(NA_STRING); + for(j = 0; j < 16; j++) + snprintf (out+2*j, 33-2*j, "%02x", resblock[j]); + return mkString(out); + } + /* otherwise list of files */ if(!isString(files)) error(_("argument 'files' must be character")); PROTECT(ans = allocVector(STRSXP, nfiles)); for(i = 0; i < nfiles; i++) { diff --git a/src/library/tools/src/md5.c b/src/library/tools/src/md5.c index f1e0c90f4c9..22a1b0e4efc 100644 --- a/src/library/tools/src/md5.c +++ b/src/library/tools/src/md5.c @@ -163,12 +163,11 @@ md5_stream (FILE *stream, void *resblock) return 0; } -#ifdef UNUSED /* Compute MD5 message digest for LEN bytes beginning at BUFFER. The result is always in little endian byte order, so that a byte-wise output yields to the wanted ASCII representation of the message digest. */ -static void * +void * md5_buffer (const char *buffer, size_t len, void *resblock) { struct md5_ctx ctx; @@ -182,7 +181,6 @@ md5_buffer (const char *buffer, size_t len, void *resblock) /* Put result in desired memory area. */ return md5_finish_ctx (&ctx, resblock); } -#endif static void md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx) diff --git a/src/library/tools/src/md5.h b/src/library/tools/src/md5.h index 82fa4b45ace..3b1becfe486 100644 --- a/src/library/tools/src/md5.h +++ b/src/library/tools/src/md5.h @@ -100,6 +100,8 @@ struct md5_ctx resulting message digest number will be written into the 16 bytes beginning at RESBLOCK. */ extern int md5_stream __P ((FILE *stream, void *resblock)); +/* same as above for a buffer instead of file; returns resblock on success */ +extern void* md5_buffer __P ((const char *buffer, size_t len, void *resblock)); #ifndef ROL_UNUSED /* The following is from gnupg-1.0.2's cipher/bithelp.h. */ diff --git a/tests/Examples/tools-Ex.Rout.save b/tests/Examples/tools-Ex.Rout.save index cf5bb12e653..696a472562d 100644 --- a/tests/Examples/tools-Ex.Rout.save +++ b/tests/Examples/tools-Ex.Rout.save @@ -796,6 +796,10 @@ character(0) > > as.vector(md5sum(dir(R.home(), pattern = "^COPY", full.names = TRUE))) [1] "eb723b61539feef013de476e68b5c50a" +> md5sum(bytes=raw()) +[1] "d41d8cd98f00b204e9800998ecf8427e" +> md5sum(bytes=charToRaw("abc")) +[1] "900150983cd24fb0d6963f7d28e17f72" > > >