Skip to content

Commit 7256c37

Browse files
authored
Merge pull request #4 from pachadotdev/issue406
Issue406
2 parents 2c56e1a + 008ab7c commit 7256c37

File tree

7 files changed

+221
-17
lines changed

7 files changed

+221
-17
lines changed

cpp11test/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ Suggests:
2020
xml2
2121
LazyData: true
2222
Roxygen: list(markdown = TRUE)
23-
RoxygenNote: 7.1.1
23+
RoxygenNote: 7.3.2

cpp11test/R/cpp11.R

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,26 @@ string_push_back_ <- function() {
168168
.Call(`_cpp11test_string_push_back_`)
169169
}
170170

171+
grow_strings_cpp11_ <- function(n, seed) {
172+
.Call(`_cpp11test_grow_strings_cpp11_`, n, seed)
173+
}
174+
175+
grow_strings_rcpp_ <- function(n, seed) {
176+
.Call(`_cpp11test_grow_strings_rcpp_`, n, seed)
177+
}
178+
179+
grow_strings_manual_ <- function(n, seed) {
180+
.Call(`_cpp11test_grow_strings_manual_`, n, seed)
181+
}
182+
183+
assign_cpp11_ <- function(n, seed) {
184+
.Call(`_cpp11test_assign_cpp11_`, n, seed)
185+
}
186+
187+
assign_rcpp_ <- function(n, seed) {
188+
.Call(`_cpp11test_assign_rcpp_`, n, seed)
189+
}
190+
171191
sum_dbl_for_ <- function(x) {
172192
.Call(`_cpp11test_sum_dbl_for_`, x)
173193
}

cpp11test/bench/strings.R

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
pkgload::load_all("cpp11test")
2+
3+
bench::press(len = as.integer(10^(0:6)), {
4+
bench::mark(
5+
assign_cpp11_(n = len, 123L),
6+
assign_rcpp_(n = len, 123L),
7+
iterations = 20
8+
)
9+
})[c("expression", "len", "min", "mem_alloc", "n_itr", "n_gc")]
10+
11+
# Longer benchmark, lots of gc
12+
len <- as.integer(10^7)
13+
bench::mark(
14+
cpp11 = cpp11_push_and_truncate_(len),
15+
rcpp = rcpp_push_and_truncate_(len),
16+
min_iterations = 200
17+
)[c("expression", "min", "mem_alloc", "n_itr", "n_gc")]
18+
19+
bench::press(len = as.integer(10^(0:6)), {
20+
bench::mark(
21+
grow_strings_cpp11_(len, 123L),
22+
grow_strings_rcpp_(len, 123L),
23+
grow_strings_manual_(len, 123L),
24+
iterations = 20
25+
)
26+
})[c("expression", "len", "min", "mem_alloc", "n_itr", "n_gc")]
27+
28+
# Longer benchmark, lots of gc
29+
len <- as.integer(10^7)
30+
bench::mark(
31+
cpp11 = cpp11_grow_strings_(len),
32+
rcpp = rcpp_grow_strings_(len),
33+
manual = manual_grow_strings_(len),
34+
min_iterations = 200
35+
)[c("expression", "min", "mem_alloc", "n_itr", "n_gc")]

cpp11test/src/cpp11.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,41 @@ extern "C" SEXP _cpp11test_string_push_back_() {
324324
return cpp11::as_sexp(string_push_back_());
325325
END_CPP11
326326
}
327+
// strings.cpp
328+
cpp11::strings grow_strings_cpp11_(size_t n, int seed);
329+
extern "C" SEXP _cpp11test_grow_strings_cpp11_(SEXP n, SEXP seed) {
330+
BEGIN_CPP11
331+
return cpp11::as_sexp(grow_strings_cpp11_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
332+
END_CPP11
333+
}
334+
// strings.cpp
335+
Rcpp::CharacterVector grow_strings_rcpp_(size_t n, int seed);
336+
extern "C" SEXP _cpp11test_grow_strings_rcpp_(SEXP n, SEXP seed) {
337+
BEGIN_CPP11
338+
return cpp11::as_sexp(grow_strings_rcpp_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
339+
END_CPP11
340+
}
341+
// strings.cpp
342+
SEXP grow_strings_manual_(size_t n, int seed);
343+
extern "C" SEXP _cpp11test_grow_strings_manual_(SEXP n, SEXP seed) {
344+
BEGIN_CPP11
345+
return cpp11::as_sexp(grow_strings_manual_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
346+
END_CPP11
347+
}
348+
// strings.cpp
349+
cpp11::strings assign_cpp11_(size_t n, int seed);
350+
extern "C" SEXP _cpp11test_assign_cpp11_(SEXP n, SEXP seed) {
351+
BEGIN_CPP11
352+
return cpp11::as_sexp(assign_cpp11_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
353+
END_CPP11
354+
}
355+
// strings.cpp
356+
Rcpp::CharacterVector assign_rcpp_(size_t n, int seed);
357+
extern "C" SEXP _cpp11test_assign_rcpp_(SEXP n, SEXP seed) {
358+
BEGIN_CPP11
359+
return cpp11::as_sexp(assign_rcpp_(cpp11::as_cpp<cpp11::decay_t<size_t>>(n), cpp11::as_cpp<cpp11::decay_t<int>>(seed)));
360+
END_CPP11
361+
}
327362
// sum.cpp
328363
double sum_dbl_for_(cpp11::doubles x);
329364
extern "C" SEXP _cpp11test_sum_dbl_for_(SEXP x) {
@@ -472,6 +507,8 @@ extern "C" {
472507
extern SEXP run_testthat_tests(SEXP);
473508

474509
static const R_CallMethodDef CallEntries[] = {
510+
{"_cpp11test_assign_cpp11_", (DL_FUNC) &_cpp11test_assign_cpp11_, 2},
511+
{"_cpp11test_assign_rcpp_", (DL_FUNC) &_cpp11test_assign_rcpp_, 2},
475512
{"_cpp11test_col_sums", (DL_FUNC) &_cpp11test_col_sums, 1},
476513
{"_cpp11test_cpp11_add_vec_for_", (DL_FUNC) &_cpp11test_cpp11_add_vec_for_, 2},
477514
{"_cpp11test_cpp11_insert_", (DL_FUNC) &_cpp11test_cpp11_insert_, 1},
@@ -488,6 +525,9 @@ static const R_CallMethodDef CallEntries[] = {
488525
{"_cpp11test_gibbs_rcpp", (DL_FUNC) &_cpp11test_gibbs_rcpp, 2},
489526
{"_cpp11test_gibbs_rcpp2", (DL_FUNC) &_cpp11test_gibbs_rcpp2, 2},
490527
{"_cpp11test_grow_", (DL_FUNC) &_cpp11test_grow_, 1},
528+
{"_cpp11test_grow_strings_cpp11_", (DL_FUNC) &_cpp11test_grow_strings_cpp11_, 2},
529+
{"_cpp11test_grow_strings_manual_", (DL_FUNC) &_cpp11test_grow_strings_manual_, 2},
530+
{"_cpp11test_grow_strings_rcpp_", (DL_FUNC) &_cpp11test_grow_strings_rcpp_, 2},
491531
{"_cpp11test_my_message", (DL_FUNC) &_cpp11test_my_message, 2},
492532
{"_cpp11test_my_message_n1", (DL_FUNC) &_cpp11test_my_message_n1, 1},
493533
{"_cpp11test_my_message_n1fmt", (DL_FUNC) &_cpp11test_my_message_n1fmt, 1},

cpp11test/src/strings.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#include "cpp11/strings.hpp"
2+
#include <random>
3+
#include <vector>
4+
5+
#include <Rcpp.h>
26

37
// Test benchmark for string proxy assignment performance.
48
// We don't unwind_protect() before each `SET_STRING_ELT()` call,
@@ -33,3 +37,93 @@
3337

3438
return x;
3539
}
40+
41+
// issue 406
42+
43+
std::random_device rd;
44+
std::mt19937 gen(rd());
45+
46+
double random_double() {
47+
std::uniform_real_distribution<double> dist(0.0, 1.0);
48+
return dist(gen);
49+
}
50+
51+
int random_int(int min, int max) {
52+
std::uniform_int_distribution<int> dist(min, max);
53+
return dist(gen);
54+
}
55+
56+
std::string random_string() {
57+
std::string s(10, '\0');
58+
for (size_t i = 0; i < 10; i++) {
59+
s[i] = random_int(0, 25) + 'a';
60+
}
61+
return s;
62+
}
63+
64+
[[cpp11::register]] cpp11::strings grow_strings_cpp11_(size_t n, int seed) {
65+
gen.seed(seed);
66+
cpp11::writable::strings x;
67+
for (size_t i = 0; i < n; ++i) {
68+
x.push_back(random_string());
69+
}
70+
return x;
71+
}
72+
73+
[[cpp11::register]] Rcpp::CharacterVector grow_strings_rcpp_(size_t n, int seed) {
74+
gen.seed(seed);
75+
Rcpp::CharacterVector x(n);
76+
for (size_t i = 0; i < n; ++i) {
77+
x[i] = random_string();
78+
}
79+
return x;
80+
}
81+
82+
[[cpp11::register]] SEXP grow_strings_manual_(size_t n, int seed) {
83+
gen.seed(seed);
84+
SEXP data_ = PROTECT(Rf_allocVector(STRSXP, 0));
85+
size_t size_ = 0;
86+
size_t capacity_ = 0;
87+
for (size_t i = 0; i < n; ++i) {
88+
if (size_ == capacity_) {
89+
capacity_ = capacity_ == 0 ? 1 : capacity_ * 2;
90+
SEXP new_data_ = PROTECT(Rf_allocVector(STRSXP, capacity_));
91+
for (size_t j = 0; j < size_; ++j) {
92+
SET_STRING_ELT(new_data_, j, STRING_ELT(data_, j));
93+
}
94+
UNPROTECT(2);
95+
data_ = PROTECT(new_data_);
96+
}
97+
SET_STRING_ELT(data_, size_++, Rf_mkChar(random_string().c_str()));
98+
}
99+
// copy back down to size
100+
if (size_ < capacity_) {
101+
SEXP new_data_ = PROTECT(Rf_allocVector(STRSXP, size_));
102+
for (size_t j = 0; j < size_; ++j) {
103+
SET_STRING_ELT(new_data_, j, STRING_ELT(data_, j));
104+
}
105+
UNPROTECT(2);
106+
return new_data_;
107+
} else {
108+
UNPROTECT(1);
109+
return data_;
110+
}
111+
}
112+
113+
[[cpp11::register]] cpp11::strings assign_cpp11_(size_t n, int seed) {
114+
gen.seed(seed);
115+
cpp11::writable::strings x(n);
116+
for (size_t i = 0; i < n; ++i) {
117+
x[i] = random_string();
118+
}
119+
return x;
120+
}
121+
122+
[[cpp11::register]] Rcpp::CharacterVector assign_rcpp_(size_t n, int seed) {
123+
gen.seed(seed);
124+
Rcpp::CharacterVector x(n);
125+
for (size_t i = 0; i < n; ++i) {
126+
x[i] = random_string();
127+
}
128+
return x;
129+
}

inst/include/cpp11/r_vector.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,9 @@ class r_vector : public cpp11::r_vector<T> {
235235
proxy at(const r_string& name) const;
236236

237237
void push_back(T value);
238-
/// Implemented in `strings.hpp`
238+
template <typename U = T,
239+
typename std::enable_if<std::is_same<U, r_string>::value>::type* = nullptr>
240+
void push_back(const std::string& value); // Pacha: r_string only (#406)
239241
void push_back(const named_arg& value);
240242
void pop_back();
241243

inst/include/cpp11/strings.hpp

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,24 @@ typedef r_vector<r_string> strings;
6161
namespace writable {
6262

6363
template <>
64-
inline void r_vector<r_string>::set_elt(SEXP x, R_xlen_t i,
65-
typename r_vector::underlying_type value) {
64+
inline void r_vector<r_string>::set_elt(
65+
SEXP x, R_xlen_t i, typename r_vector<r_string>::underlying_type value) {
6666
// NOPROTECT: Likely too costly to unwind protect every set elt
6767
SET_STRING_ELT(x, i, value);
6868
}
6969

70+
// Pacha: Optimized push_back for std::string (borrows from @traversc' push_back_fast)
71+
template <>
72+
template <typename U, typename std::enable_if<std::is_same<U, r_string>::value>::type*>
73+
inline void r_vector<r_string>::push_back(const std::string& value) {
74+
while (this->length_ >= this->capacity_) {
75+
this->reserve(this->capacity_ == 0 ? 1 : this->capacity_ * 2);
76+
}
77+
set_elt(this->data_, this->length_,
78+
Rf_mkCharLenCE(value.c_str(), value.size(), CE_UTF8));
79+
++this->length_;
80+
}
81+
7082
inline bool operator==(const r_vector<r_string>::proxy& lhs, r_string rhs) {
7183
return static_cast<r_string>(lhs).operator==(static_cast<std::string>(rhs).c_str());
7284
}
@@ -95,17 +107,17 @@ inline SEXP alloc_if_charsxp(const SEXP data) {
95107

96108
template <>
97109
inline r_vector<r_string>::r_vector(const SEXP& data)
98-
: cpp11::r_vector<r_string>(alloc_or_copy(data)), capacity_(length_) {
110+
: cpp11::r_vector<r_string>(alloc_or_copy(data)), capacity_(this->length_) {
99111
if (detail::r_typeof(data) == CHARSXP) {
100-
SET_STRING_ELT(data_, 0, data);
112+
SET_STRING_ELT(this->data_, 0, data);
101113
}
102114
}
103115

104116
template <>
105117
inline r_vector<r_string>::r_vector(SEXP&& data)
106-
: cpp11::r_vector<r_string>(alloc_if_charsxp(data)), capacity_(length_) {
118+
: cpp11::r_vector<r_string>(alloc_if_charsxp(data)), capacity_(this->length_) {
107119
if (detail::r_typeof(data) == CHARSXP) {
108-
SET_STRING_ELT(data_, 0, data);
120+
SET_STRING_ELT(this->data_, 0, data);
109121
}
110122
}
111123

@@ -117,14 +129,15 @@ inline r_vector<r_string>::r_vector(std::initializer_list<r_string> il)
117129
unwind_protect([&] {
118130
auto it = il.begin();
119131

120-
for (R_xlen_t i = 0; i < capacity_; ++i, ++it) {
132+
for (R_xlen_t i = 0; i < this->capacity_; ++i, ++it) {
121133
// i.e. to `SEXP`
122-
underlying_type elt = static_cast<underlying_type>(*it);
134+
typename r_vector<r_string>::underlying_type elt =
135+
static_cast<typename r_vector<r_string>::underlying_type>(*it);
123136

124137
if (elt == NA_STRING) {
125-
set_elt(data_, i, elt);
138+
set_elt(this->data_, i, elt);
126139
} else {
127-
set_elt(data_, i, Rf_mkCharCE(Rf_translateCharUTF8(elt), CE_UTF8));
140+
set_elt(this->data_, i, Rf_mkCharCE(Rf_translateCharUTF8(elt), CE_UTF8));
128141
}
129142
}
130143
});
@@ -135,12 +148,12 @@ typedef r_vector<r_string> strings;
135148
template <typename T>
136149
inline void r_vector<T>::push_back(const named_arg& value) {
137150
push_back(value.value());
138-
if (Rf_xlength(names()) == 0) {
139-
cpp11::writable::strings new_nms(size());
140-
names() = new_nms;
151+
if (Rf_xlength(this->names()) == 0) {
152+
cpp11::writable::strings new_nms(this->size());
153+
this->names() = new_nms;
141154
}
142-
cpp11::writable::strings nms(names());
143-
nms[size() - 1] = value.name();
155+
cpp11::writable::strings nms(this->names());
156+
nms[this->size() - 1] = value.name();
144157
}
145158

146159
} // namespace writable

0 commit comments

Comments
 (0)