Merge pull request #3926 from tidyverse/fix-3923/subset-frame

column_subset evaluating [ in the frame
tidyverse · Oct 18, 2018 · bcaff7b · bcaff7b
2 parents 5610775 + b01913f
commit bcaff7b
Show file tree

Hide file tree

Showing 17 changed files with 184 additions and 118 deletions.
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -38,8 +38,8 @@ maybe_shared_columns <- function(df) {
     .Call(`_dplyr_maybe_shared_columns`, df)
 }
 
-arrange_impl <- function(df, quosures) {
-    .Call(`_dplyr_arrange_impl`, df, quosures)
+arrange_impl <- function(df, quosures, frame) {
+    .Call(`_dplyr_arrange_impl`, df, quosures, frame)
 }
 
 #' Do values in a numeric vector fall in specified range?
@@ -76,8 +76,8 @@ combine_all <- function(data) {
     .Call(`_dplyr_combine_all`, data)
 }
 
-distinct_impl <- function(df, vars, keep) {
-    .Call(`_dplyr_distinct_impl`, df, vars, keep)
+distinct_impl <- function(df, vars, keep, frame) {
+    .Call(`_dplyr_distinct_impl`, df, vars, keep, frame)
 }
 
 n_distinct_multi <- function(variables, na_rm = FALSE) {
@@ -120,32 +120,32 @@ hybrids <- function() {
     .Call(`_dplyr_hybrids`)
 }
 
-semi_join_impl <- function(x, y, by_x, by_y, na_match) {
-    .Call(`_dplyr_semi_join_impl`, x, y, by_x, by_y, na_match)
+semi_join_impl <- function(x, y, by_x, by_y, na_match, frame) {
+    .Call(`_dplyr_semi_join_impl`, x, y, by_x, by_y, na_match, frame)
 }
 
-anti_join_impl <- function(x, y, by_x, by_y, na_match) {
-    .Call(`_dplyr_anti_join_impl`, x, y, by_x, by_y, na_match)
+anti_join_impl <- function(x, y, by_x, by_y, na_match, frame) {
+    .Call(`_dplyr_anti_join_impl`, x, y, by_x, by_y, na_match, frame)
 }
 
-inner_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match) {
-    .Call(`_dplyr_inner_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match)
+inner_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match, frame) {
+    .Call(`_dplyr_inner_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match, frame)
 }
 
-nest_join_impl <- function(x, y, by_x, by_y, aux_y, yname) {
-    .Call(`_dplyr_nest_join_impl`, x, y, by_x, by_y, aux_y, yname)
+nest_join_impl <- function(x, y, by_x, by_y, aux_y, yname, frame) {
+    .Call(`_dplyr_nest_join_impl`, x, y, by_x, by_y, aux_y, yname, frame)
 }
 
-left_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match) {
-    .Call(`_dplyr_left_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match)
+left_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match, frame) {
+    .Call(`_dplyr_left_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match, frame)
 }
 
-right_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match) {
-    .Call(`_dplyr_right_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match)
+right_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match, frame) {
+    .Call(`_dplyr_right_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match, frame)
 }
 
-full_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match) {
-    .Call(`_dplyr_full_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match)
+full_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match, frame) {
+    .Call(`_dplyr_full_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match, frame)
 }
 
 mutate_impl <- function(df, dots) {

diff --git a/R/dataframe.R b/R/dataframe.R
@@ -213,7 +213,7 @@ distinct.data.frame <- function(.data, ..., .keep_all = FALSE) {
   dist <- distinct_vars(.data, quos(...), .keep_all = .keep_all)
   vars <- match_vars(dist$vars, dist$data)
   keep <- match_vars(dist$keep, dist$data)
-  distinct_impl(dist$data, vars, keep)
+  distinct_impl(dist$data, vars, keep, environment())
 }
 #' @export
 distinct_.data.frame <- function(.data, ..., .dots = list(), .keep_all = FALSE) {

diff --git a/R/grouped-df.r b/R/grouped-df.r
@@ -355,7 +355,7 @@ distinct.grouped_df <- function(.data, ..., .keep_all = FALSE) {
   )
   vars <- match_vars(dist$vars, dist$data)
   keep <- match_vars(dist$keep, dist$data)
-  out <- distinct_impl(dist$data, vars, keep)
+  out <- distinct_impl(dist$data, vars, keep, environment())
   grouped_df(out, groups(.data))
 }
 #' @export

diff --git a/R/manip.r b/R/manip.r
@@ -510,7 +510,7 @@ arrange.grouped_df <- function(.data, ..., .by_group = FALSE) {
     dots <- quos(...)
   }
 
-  arrange_impl(.data, dots)
+  arrange_impl(.data, dots, environment())
 }
 
 #' Select/rename variables by name

diff --git a/R/tbl-df.r b/R/tbl-df.r
@@ -43,12 +43,12 @@ as.data.frame.tbl_df <- function(x, row.names = NULL, optional = FALSE, ...) {
 #' @export
 arrange.tbl_df <- function(.data, ..., .by_group = FALSE) {
   dots <- quos(...)
-  arrange_impl(.data, dots)
+  arrange_impl(.data, dots, environment())
 }
 #' @export
 arrange_.tbl_df <- function(.data, ..., .dots = list(), .by_group = FALSE) {
   dots <- compat_lazy_dots(.dots, caller_env(), ...)
-  arrange_impl(.data, dots)
+  arrange_impl(.data, dots, environment())
 }
 
 #' @export
@@ -175,7 +175,7 @@ inner_join.tbl_df <- function(x, y, by = NULL, copy = FALSE,
   aux_x <- vars$idx$x$aux
   aux_y <- vars$idx$y$aux
 
-  out <- inner_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches)
+  out <- inner_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches, environment())
   names(out) <- vars$alias
 
   reconstruct_join(out, x, vars)
@@ -199,7 +199,7 @@ nest_join.tbl_df <- function(x, y, by = NULL, copy = FALSE, keep = FALSE, name =
     aux_y <- c(by_y, aux_y)
   }
 
-  out <- nest_join_impl(x, y, by_x, by_y, aux_y, name_var)
+  out <- nest_join_impl(x, y, by_x, by_y, aux_y, name_var, environment())
   out
 }
 
@@ -223,7 +223,7 @@ left_join.tbl_df <- function(x, y, by = NULL, copy = FALSE,
   aux_x <- vars$idx$x$aux
   aux_y <- vars$idx$y$aux
 
-  out <- left_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches)
+  out <- left_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches, environment())
   names(out) <- vars$alias
 
   reconstruct_join(out, x, vars)
@@ -248,7 +248,7 @@ right_join.tbl_df <- function(x, y, by = NULL, copy = FALSE,
   aux_x <- vars$idx$x$aux
   aux_y <- vars$idx$y$aux
 
-  out <- right_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches)
+  out <- right_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches, environment())
   names(out) <- vars$alias
 
   reconstruct_join(out, x, vars)
@@ -273,7 +273,7 @@ full_join.tbl_df <- function(x, y, by = NULL, copy = FALSE,
   aux_x <- vars$idx$x$aux
   aux_y <- vars$idx$y$aux
 
-  out <- full_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches)
+  out <- full_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches, environment())
   names(out) <- vars$alias
 
   reconstruct_join(out, x, vars)
@@ -288,7 +288,7 @@ semi_join.tbl_df <- function(x, y, by = NULL, copy = FALSE, ...,
 
   by <- common_by(by, x, y)
   y <- auto_copy(x, y, copy = copy)
-  out <- semi_join_impl(x, y, by$x, by$y, check_na_matches(na_matches))
+  out <- semi_join_impl(x, y, by$x, by$y, check_na_matches(na_matches), environment())
   if (is_grouped_df(x)) {
     out <- grouped_df_impl(out, group_vars(x))
   }
@@ -304,7 +304,7 @@ anti_join.tbl_df <- function(x, y, by = NULL, copy = FALSE, ...,
 
   by <- common_by(by, x, y)
   y <- auto_copy(x, y, copy = copy)
-  out <- anti_join_impl(x, y, by$x, by$y, check_na_matches(na_matches))
+  out <- anti_join_impl(x, y, by$x, by$y, check_na_matches(na_matches), environment())
   if (is_grouped_df(x)) {
     out <- grouped_df_impl(out, group_vars(x))
   }

diff --git a/inst/include/dplyr/data/DataMask.h b/inst/include/dplyr/data/DataMask.h
@@ -136,11 +136,12 @@ struct ColumnBinding {
     const typename SlicedTibble::slicing_index& indices,
     SEXP mask_resolved)
   {
+    SEXP frame = ENCLOS(ENCLOS(mask_resolved));
 
     // materialize
     Shield<SEXP> value(summary ?
-                       column_subset(data, RowwiseSlicingIndex(indices.group())) :
-                       column_subset(data, indices)
+                       column_subset(data, RowwiseSlicingIndex(indices.group()), frame) :
+                       column_subset(data, indices, frame)
                       );
     MARK_NOT_MUTABLE(value);
 

diff --git a/inst/include/dplyr/symbols.h b/inst/include/dplyr/symbols.h
@@ -53,6 +53,7 @@ struct symbols {
   static SEXP inspect;
   static SEXP dot;
   static SEXP dot_x;
+  static SEXP drop;
 };
 
 }

diff --git a/inst/include/dplyr/visitors/subset/DataFrameSubsetVisitors.h b/inst/include/dplyr/visitors/subset/DataFrameSubsetVisitors.h
@@ -10,22 +10,23 @@ namespace dplyr {
 class DataFrameSubsetVisitors {
 private:
   DataFrame data;
+  SEXP frame;
 
 public:
-  DataFrameSubsetVisitors(const DataFrame& data_): data(data_) {}
+  DataFrameSubsetVisitors(const DataFrame& data_, SEXP frame_): data(data_), frame(frame_) {}
 
   inline int size() const {
     return data.size();
   }
 
   template <typename Index>
   DataFrame subset_all(const Index& index) const {
-    return dataframe_subset<Index>(data, index, get_class(data));
+    return dataframe_subset<Index>(data, index, get_class(data), frame);
   }
 
   template <typename Index>
   SEXP subset_one(int i, const Index& index) const {
-    return column_subset(data[i], index);
+    return column_subset(data[i], index, frame);
   }
 
 };

diff --git a/inst/include/dplyr/visitors/subset/column_subset.h b/inst/include/dplyr/visitors/subset/column_subset.h
@@ -6,9 +6,16 @@
 #include <tools/bad.h>
 #include <tools/default_value.h>
 #include <tools/SlicingIndex.h>
+#include <dplyr/symbols.h>
 
 SEXP ns_methods();
 
+namespace base {
+SEXP bracket_one();
+SEXP bracket_two();
+}
+
+
 namespace dplyr {
 namespace traits {
 
@@ -105,41 +112,41 @@ SEXP column_subset_impl(SEXP x, const Index& index) {
 }
 
 template <typename Index>
-DataFrame dataframe_subset(const List& data, const Index& index, CharacterVector classes);
-
-inline SEXP r_subset_env(SEXP x) {
-  return IS_S4_OBJECT(x) ? ns_methods() : R_BaseEnv;
-}
+DataFrame dataframe_subset(const List& data, const Index& index, CharacterVector classes, SEXP frame);
 
 template <typename Index>
-SEXP r_column_subset(SEXP x, const Index& index) {
+SEXP r_column_subset(SEXP x, const Index& index, SEXP frame) {
   Shield<SEXP> one_based_index(index);
   if (Rf_isMatrix(x)) {
-    return Language("[", x, one_based_index, R_MissingArg, _["drop"] = false).eval(r_subset_env(x));
+    Shield<SEXP> call(Rf_lang5(base::bracket_one(), x, one_based_index, R_MissingArg, Rf_ScalarLogical(false)));
+    SET_TAG(CDR(CDR(CDDR(call))), dplyr::symbols::drop);
+    return Rcpp::Rcpp_eval(call, frame);
   } else {
-    return Language("[", x, one_based_index).eval(r_subset_env(x));
+    Shield<SEXP> call(Rf_lang3(base::bracket_one(), x, one_based_index));
+    return Rcpp::Rcpp_eval(call, frame);
   }
 }
 
 template <>
-inline SEXP r_column_subset<RowwiseSlicingIndex>(SEXP x, const RowwiseSlicingIndex& index) {
+inline SEXP r_column_subset<RowwiseSlicingIndex>(SEXP x, const RowwiseSlicingIndex& index, SEXP frame) {
   if (Rf_isMatrix(x)) {
-    // TODO: not sure about this
-    return Language("[", x, index, R_MissingArg).eval(r_subset_env(x));
+    Shield<SEXP> call(Rf_lang4(base::bracket_one(), x, index, R_MissingArg));
+    return Rcpp::Rcpp_eval(call, frame);
   } else {
-    return Language("[[", x, index).eval(r_subset_env(x));
+    Shield<SEXP> call(Rf_lang3(base::bracket_two(), x, index));
+    return Rcpp::Rcpp_eval(call, frame);
   }
 }
 
 template <typename Index>
-SEXP column_subset(SEXP x, const Index& index) {
+SEXP column_subset(SEXP x, const Index& index, SEXP frame) {
   if (Rf_inherits(x, "data.frame")) {
-    return dataframe_subset(x, index, Rf_getAttrib(x, R_ClassSymbol));
+    return dataframe_subset(x, index, Rf_getAttrib(x, R_ClassSymbol), frame);
   }
 
   // this has a class, so just use R `[` or `[[`
   if (OBJECT(x) || !Rf_isNull(Rf_getAttrib(x, R_ClassSymbol))) {
-    return r_column_subset(x, index);
+    return r_column_subset(x, index, frame);
   }
 
   switch (TYPEOF(x)) {
@@ -166,12 +173,12 @@ SEXP column_subset(SEXP x, const Index& index) {
 }
 
 template <typename Index>
-DataFrame dataframe_subset(const List& data, const Index& index, CharacterVector classes) {
+DataFrame dataframe_subset(const List& data, const Index& index, CharacterVector classes, SEXP frame) {
   int nc = data.size();
   List res(nc);
 
   for (int i = 0; i < nc; i++) {
-    res[i] = column_subset(data[i], index);
+    res[i] = column_subset(data[i], index, frame);
   }
 
   copy_most_attributes(res, data);