Skip to content

Commit

Permalink
Avoid as.vector() trafo for same-kind apparently vector-like operands.
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.r-project.org/R/trunk@87083 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information
hornik committed Sep 1, 2024
1 parent 0e1629a commit 982288a
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 70 deletions.
154 changes: 94 additions & 60 deletions src/library/base/R/sets.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# File src/library/base/R/sets.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2021 The R Core Team
# Copyright (C) 1995-2024 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -16,76 +16,110 @@
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/

union <- function(x, y) {
u <- as.vector(x)
v <- as.vector(y)
## <FIXME>
## Remove eventually: not safe enough for arbitrary classes.
## if(!is.object(x) || !is.object(y) ||
## !identical(class(x), class(y))) {
## x <- u
## y <- v
## }
## z <- c(x[!duplicated(unclass(u))],
## y[!duplicated(unclass(v)) & (match(v, u, 0L) == 0L)])
## names(z) <- NULL
## z
## </FIXME>
## Could do
## c(u[!duplicated(unclass(u))],
## v[!duplicated(unclass(v)) & (match(v, u, 0L) == 0L)])
## but the following is faster and "basically the same":
unique(c(u, v))
## <NOTE>
## The set ops have always been documented to work for args that are
## "same-kind" (same mode in the unclassed case) and sequences of items,
## i.e., "vector-like".
## In the "same-kind" case we test for vector-like whether subscripting
## no items from x or y retains the class.
## Where needed, we also check whether duplicated() on x or y has the
## same length as x or y: for consistency this could always be done, at
## a possible loss of efficiency where not needed.
## </NOTE>

union <-
function(x, y)
{
if(is.null(x)) return(y)
if(is.null(y)) return(x)
cx <- class(x)
cy <- class(y)
if((isa(x, cy) || isa(y, cx)) &&
identical(class(y0 <- y[integer()]), cy) &&
(length(dx <- duplicated(x)) == length(x)) &&
(length(dy <- duplicated(y)) == length(y))) {
if(!isa(x, cy))
x <- c(y0, x)
} else {
x <- as.vector(x)
y <- as.vector(y)
y0 <- y[integer()]
dx <- duplicated(x)
dy <- duplicated(y)
}
x <- x[!dx]
y <- y[!dy & (match(y, x, 0L) == 0L)]
c(x, y)
}

intersect <- function(x, y)
intersect <-
function(x, y)
{
if(is.null(x) || is.null(y))
return(NULL)
u <- as.vector(x)
v <- as.vector(y)
## <FIXME>
## Remove eventually: not safe enough for arbitrary classes.
## if(!is.object(x) || !is.object(y) ||
## !identical(class(x), class(y))) {
## x <- u
## y <- v
## }
## z <- c(x[!duplicated(unclass(u)) & (match(u, v, 0L) > 0L)],
## y[numeric()])
## ## (Combining with y[numeric()] in the common class case is needed
## ## e.g. for factors to combine levels.)
## names(z) <- NULL
## z
## </FIXME>
c(u[!duplicated(unclass(u)) & (match(u, v, 0L) > 0L)],
v[numeric()])
cx <- class(x)
cy <- class(y)
if((isa(x, cy) || isa(y, cx)) &&
identical(class(y0 <- y[integer()]), cy) &&
(length(dx <- duplicated(x)) == length(x))) {
x <- x[!dx]
} else {
x <- as.vector(x)
x <- x[!duplicated(x)]
y <- as.vector(y)
y0 <- y[integer()]
}
## Combining with y0 in the common class case is needed e.g. for
## factors to combine levels, and otherwise to get a common mode.
c(x[match(x, y, 0L) > 0L], y0)
}

setdiff <- function(x, y)
setdiff <-
function(x, y)
{
u <- as.vector(x)
v <- as.vector(y)
## <FIXME>
## Remove eventually: not safe enough for arbitrary classes.
## z <- x[!duplicated(unclass(u)) & (match(u, v, 0L) == 0L)]
## names(z) <- NULL
## z
## </FIXME>
u[!duplicated(unclass(u)) & (match(u, v, 0L) == 0L)]
if(is.null(x) || is.null(y))
return(x)
cx <- class(x)
cy <- class(y)
if((isa(x, cy) || isa(y, cx)) &&
identical(class(x[integer()]), cx) &&
(length(dx <- duplicated(x)) == length(x))) {
x <- x[!dx]
} else {
x <- as.vector(x)
x <- x[!duplicated(x)]
y <- as.vector(y)
}
x[match(x, y, 0L) == 0L]
}

## speed optimization etc: R-devel, Jan.4-6, 2000; then again 15 yrs later
setequal <- function(x, y)
setequal <-
function(x, y)
{
x <- as.vector(x)
y <- as.vector(y)
cx <- class(x)
cy <- class(y)
if(!((isa(x, cy) || isa(y, cx)) &&
identical(class(x[integer()]), cx))) {
x <- as.vector(x)
y <- as.vector(y)
}
!( anyNA(match(x, y)) || anyNA(match(y, x)) )
}

## same as %in% ( ./match.R ) but different arg names, and use match()
## on as.vector() transformations for consistency with the other set
## functions.
is.element <- function(el, set)
match(as.vector(el), as.vector(set), 0L) > 0L

## same as %in% ( ./match.R ) but different arg names (and possible
## as.vactor() transformation in the case args are not vector-like
## same-kind.
is.element <-
function(el, set)
{
x <- el
y <- set
cx <- class(x)
cy <- class(y)
if(!((isa(x, cy) || isa(y, cx)) &&
identical(class(x[integer()]), cx))) {
x <- as.vector(x)
y <- as.vector(y)
}
match(x, y, 0L) > 0L
}
42 changes: 32 additions & 10 deletions src/library/base/man/sets.Rd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
% File src/library/base/man/sets.Rd
% Part of the R package, https://www.R-project.org
% Copyright 1995-2011 R Core Team
% Copyright 1995-2024 R Core Team
% Distributed under GPL 2 or later

\name{sets}
Expand Down Expand Up @@ -32,20 +32,33 @@ is.element(el, set)
equality and membership on two vectors.
}
\details{
Each of \code{union}, \code{intersect}, \code{setdiff} and
\code{setequal} will discard any duplicated values in the arguments,
and they apply \code{\link{as.vector}} to their arguments (and so
in particular coerce factors to character vectors).
The set operations are intended for \dQuote{same-kind}
\dQuote{vector-like} objects containing sequences of items. However,
being \dQuote{vector-like} cannot easily be ascertained (in particular
as \code{\link{is.vector}()} enforces a very narrow concept of
\dQuote{vector}).

\code{is.element(x, y)} is identical to \code{x \%in\% y}.
Thus, for \R < 4.5.0, the set operands were always transformed via
\code{\link{as.vector}()} (so that in particular, factors were coerced
to character vectors). Starting with \R 4.5.0, operands of the
\dQuote{same kind} (in the sense that \code{\link{isa}(x, class(y))}
or \code{\link{isa}(y, class(x))} which appear to be
\dQuote{vector-like} (in the sense that subscripting \code{x} and/or
\code{y} by \code{integer()} leaves the class unchanged) are no longer
transformed. In particular, union, intersection and set difference
of two factors now give factors (see the examples).

\code{is.element(x, y)} is identical to \code{x \%in\% y} (after
possibly transforming via \code{\link{as.vector}()}).
}
\value{
For \code{union}, a vector of a common mode.
For \code{union}, a vector of a common mode or class.

For \code{intersect}, a vector of a common mode, or \code{NULL} if
\code{x} or \code{y} is \code{NULL}.
For \code{intersect}, a vector of a common mode or class, or
\code{NULL} if \code{x} or \code{y} is \code{NULL}.

For \code{setdiff}, a vector of the same \code{\link{mode}} as \code{x}.
For \code{setdiff}, a vector of the same \code{\link{mode}} or class
as \code{x}.

A logical scalar for \code{setequal} and a logical of the same
length as \code{x} for \code{is.element}.
Expand All @@ -72,5 +85,14 @@ setequal( union(x, y),

is.element(x, y) # length 10
is.element(y, x) # length 8

## Factors:
x <- as.factor(c("A", "B", "A"))
y <- as.factor(c("B", "b"))
union(x, y)
intersect(x, y)
setdiff(x, y)
setdiff(y, x)
## (Note that union() and intersect() merge the levels.)
}
\keyword{misc}

0 comments on commit 982288a

Please sign in to comment.