Skip to content

Commit

Permalink
add support for md5sum(bytes=) hashing a raw vector
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.r-project.org/R/trunk@87162 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information
urbaneks committed Sep 18, 2024
1 parent b81cbfa commit bd5b42f
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 8 deletions.
4 changes: 4 additions & 0 deletions doc/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@

\item The set operations now avoid the \code{as.vector()}
transformation for same-kind apparently vector-like operands.

\item \code{md5sum()} can be used to compute an MD5 hash of a raw
vector of bytes by using the \code{bytes=} argument instead
of \code{files=}. The two arguments are mutually exclusive.
}
}

Expand Down
13 changes: 10 additions & 3 deletions src/library/tools/R/md5.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/

md5sum <- function(files) {
files <- path.expand(files)
structure(.Call(C_Rmd5, files), names=files)
md5sum <- function(files, bytes) {
if (!missing(files) && !missing(bytes))
stop("files and bytes are mutually exclusive")
if (!missing(bytes)) {
if (!is.raw(bytes)) stop("bytes must be a raw vector")
.Call(C_Rmd5, bytes)
} else {
files <- path.expand(files)
structure(.Call(C_Rmd5, files), names=files)
}
}

.installMD5sums <- function(pkgDir, outDir = pkgDir)
Expand Down
10 changes: 8 additions & 2 deletions src/library/tools/man/md5sum.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
\alias{md5sum}
\title{Compute MD5 Checksums}
\description{
Compute the 32-byte MD5 hashes of one or more files.
Compute the 32-byte MD5 hashes of one or more files, or a raw vector of bytes.
}
\usage{
md5sum(files)
md5sum(files, bytes)
}
\arguments{
\item{files}{character. The paths of file(s) whose contents are to be hashed.}
\item{bytes}{raw. Bytes to be hashed.
NB: \code{bytes} and \code{files} are mutually exclusive.}
}
\details{
A MD5 \sQuote{hash} or \sQuote{checksum} or \sQuote{message digest} is
Expand All @@ -34,6 +36,8 @@ md5sum(files)
equal to \code{files} (possibly expanded). The elements will be
\code{NA} for non-existent or unreadable files, otherwise a
32-character string of hexadecimal digits.

For \code{bytes} the result is a single 32-character string.
}
\source{
The underlying C code was written by \I{Ulrich Drepper} and extracted from
Expand All @@ -44,6 +48,8 @@ md5sum(files)
}
\examples{
as.vector(md5sum(dir(R.home(), pattern = "^COPY", full.names = TRUE)))
md5sum(bytes=raw())
md5sum(bytes=charToRaw("abc"))
}
\keyword{utilities}

10 changes: 10 additions & 0 deletions src/library/tools/src/Rmd5.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ SEXP Rmd5(SEXP files)
FILE *fp;
unsigned char resblock[16];

/* RAW mode: hash of one buffer instead of files */
if (TYPEOF(files) == RAWSXP) {
/* there is really no failure possible, but just in case... */
if (!md5_buffer(RAW(files), XLENGTH(files), resblock))
return ScalarString(NA_STRING);
for(j = 0; j < 16; j++)
snprintf (out+2*j, 33-2*j, "%02x", resblock[j]);
return mkString(out);
}
/* otherwise list of files */
if(!isString(files)) error(_("argument 'files' must be character"));
PROTECT(ans = allocVector(STRSXP, nfiles));
for(i = 0; i < nfiles; i++) {
Expand Down
4 changes: 1 addition & 3 deletions src/library/tools/src/md5.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,11 @@ md5_stream (FILE *stream, void *resblock)
return 0;
}

#ifdef UNUSED
/* Compute MD5 message digest for LEN bytes beginning at BUFFER. The
result is always in little endian byte order, so that a byte-wise
output yields to the wanted ASCII representation of the message
digest. */
static void *
void *
md5_buffer (const char *buffer, size_t len, void *resblock)
{
struct md5_ctx ctx;
Expand All @@ -182,7 +181,6 @@ md5_buffer (const char *buffer, size_t len, void *resblock)
/* Put result in desired memory area. */
return md5_finish_ctx (&ctx, resblock);
}
#endif

static void
md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx)
Expand Down
2 changes: 2 additions & 0 deletions src/library/tools/src/md5.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ struct md5_ctx
resulting message digest number will be written into the 16 bytes
beginning at RESBLOCK. */
extern int md5_stream __P ((FILE *stream, void *resblock));
/* same as above for a buffer instead of file; returns resblock on success */
extern void* md5_buffer __P ((const char *buffer, size_t len, void *resblock));

#ifndef ROL_UNUSED
/* The following is from gnupg-1.0.2's cipher/bithelp.h. */
Expand Down
4 changes: 4 additions & 0 deletions tests/Examples/tools-Ex.Rout.save
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,10 @@ character(0)
>
> as.vector(md5sum(dir(R.home(), pattern = "^COPY", full.names = TRUE)))
[1] "eb723b61539feef013de476e68b5c50a"
> md5sum(bytes=raw())
[1] "d41d8cd98f00b204e9800998ecf8427e"
> md5sum(bytes=charToRaw("abc"))
[1] "900150983cd24fb0d6963f7d28e17f72"
>
>
>
Expand Down

0 comments on commit bd5b42f

Please sign in to comment.