Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: data frame with lazy relation AltrepDataFrameRelation #960

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
4 changes: 4 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ rapi_rel_filter <- function(rel, exprs) {
.Call(`_duckdb_rapi_rel_filter`, rel, exprs)
}

rapi_rel_project2 <- function(df, con, exprs) {
.Call(`_duckdb_rapi_rel_project2`, df, con, exprs)
}

rapi_rel_project <- function(rel, exprs) {
.Call(`_duckdb_rapi_rel_project`, rel, exprs)
}
Expand Down
13 changes: 13 additions & 0 deletions R/relational.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,19 @@ rel_project <- function(rel, exprs) {
rethrow_rapi_rel_project(rel, exprs)
}

#' Lazily project a DuckDB relation object
#' @param rel the DuckDB relation object
#' @param exprs a list of DuckDB expressions to project
#' @return the now projected `duckdb_relation` object
#' @noRd
#' @examples
#' con <- DBI::dbConnect(duckdb())
#' rel <- rel_from_df(con, mtcars)
#' rel2 <- rel_project(rel, list(expr_reference("cyl"), expr_reference("disp")))
rel_project2 <- function(df, con, exprs) {
rethrow_rapi_rel_project2(as.data.frame(df), con@conn_ref, exprs)
krlmlr marked this conversation as resolved.
Show resolved Hide resolved
}

#' Lazily filter a DuckDB relation object
#' @param rel the DuckDB relation object
#' @param exprs a list of DuckDB expressions to filter by
Expand Down
10 changes: 10 additions & 0 deletions R/rethrow-gen.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ rethrow_rapi_rel_project <- function(rel, exprs, call = parent.frame(2)) {
)
}

rethrow_rapi_rel_project2 <- function(df, con, exprs, call = parent.frame(2)) {
rlang::try_fetch(
rapi_rel_project2(df, con, exprs),
error = function(e) {
rethrow_error_from_rapi(e, call)
}
)
}

rethrow_rapi_rel_aggregate <- function(rel, groups, aggregates, call = parent.frame(2)) {
rlang::try_fetch(
rapi_rel_aggregate(rel, groups, aggregates),
Expand Down Expand Up @@ -580,6 +589,7 @@ rethrow_restore <- function() {
rethrow_rapi_rel_from_df <<- rapi_rel_from_df
rethrow_rapi_rel_filter <<- rapi_rel_filter
rethrow_rapi_rel_project <<- rapi_rel_project
rethrow_rapi_rel_project2 <<- rapi_rel_project2
rethrow_rapi_rel_aggregate <<- rapi_rel_aggregate
rethrow_rapi_rel_order <<- rapi_rel_order
rethrow_rapi_expr_window <<- rapi_expr_window
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_library(
register.cpp
relational.cpp
reltoaltrep.cpp
altrepdataframe_relation.cpp
scan.cpp
statement.cpp
transform.cpp
Expand Down
2 changes: 1 addition & 1 deletion src/Makevars
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ include Makevars.duckdb

CXX_STD = CXX17
PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -DBROTLI_ENCODER_CLEANUP_ON_OOM -Iduckdb/src/include -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/fast_float -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/httplib -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/third_party/libpg_query -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/lz4 -Iduckdb/third_party/brotli/include -Iduckdb/third_party/brotli/common -Iduckdb/third_party/brotli/dec -Iduckdb/third_party/brotli/enc -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/miniz -Iduckdb/third_party/pcg -Iduckdb/third_party/re2 -Iduckdb/third_party/skiplist -Iduckdb/third_party/tdigest -Iduckdb/third_party/utf8proc -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/yyjson/include -Iduckdb/third_party/zstd/include -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/thrift -Iduckdb/third_party/lz4 -Iduckdb/third_party/brotli/include -Iduckdb/third_party/brotli/common -Iduckdb/third_party/brotli/dec -Iduckdb/third_party/brotli/enc -Iduckdb/third_party/snappy -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/zstd/include -Iduckdb/extension/core_functions/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY -DDUCKDB_EXTENSION_CORE_FUNCTIONS_LINKED -DDUCKDB_BUILD_LIBRARY
OBJECTS=rfuns.o database.o connection.o statement.o register.o relational.o scan.o signal.o transform.o utils.o reltoaltrep.o types.o cpp11.o $(SOURCES)
OBJECTS=rfuns.o database.o connection.o statement.o register.o relational.o scan.o signal.o transform.o utils.o reltoaltrep.o altrepdataframe_relation.o types.o cpp11.o $(SOURCES)
22 changes: 22 additions & 0 deletions src/altrepdataframe_relation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include "altrepdataframe_relation.hpp"

namespace duckdb {

AltrepDataFrameRelation::AltrepDataFrameRelation(shared_ptr<Relation> parent)
// TODO: which RelationType should be used?
: Relation(parent->context, RelationType::AGGREGATE_RELATION), parent(std::move(parent)) {
}

const vector<ColumnDefinition> &AltrepDataFrameRelation::Columns() {
return parent->Columns();
}

string AltrepDataFrameRelation::ToString(idx_t depth) {
return parent->ToString(depth);
}

bool AltrepDataFrameRelation::IsReadOnly() {
return parent->IsReadOnly();
}

}
8 changes: 8 additions & 0 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,13 @@ extern "C" SEXP _duckdb_rapi_rel_filter(SEXP rel, SEXP exprs) {
END_CPP11
}
// relational.cpp
SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs);
extern "C" SEXP _duckdb_rapi_rel_project2(SEXP df, SEXP con, SEXP exprs) {
BEGIN_CPP11
return cpp11::as_sexp(rapi_rel_project2(cpp11::as_cpp<cpp11::decay_t<data_frame>>(df), cpp11::as_cpp<cpp11::decay_t<duckdb::conn_eptr_t>>(con), cpp11::as_cpp<cpp11::decay_t<list>>(exprs)));
END_CPP11
}
// relational.cpp
SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs);
extern "C" SEXP _duckdb_rapi_rel_project(SEXP rel, SEXP exprs) {
BEGIN_CPP11
Expand Down Expand Up @@ -499,6 +506,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_duckdb_rapi_rel_names", (DL_FUNC) &_duckdb_rapi_rel_names, 1},
{"_duckdb_rapi_rel_order", (DL_FUNC) &_duckdb_rapi_rel_order, 3},
{"_duckdb_rapi_rel_project", (DL_FUNC) &_duckdb_rapi_rel_project, 2},
{"_duckdb_rapi_rel_project2", (DL_FUNC) &_duckdb_rapi_rel_project2, 3},
{"_duckdb_rapi_rel_set_alias", (DL_FUNC) &_duckdb_rapi_rel_set_alias, 2},
{"_duckdb_rapi_rel_set_diff", (DL_FUNC) &_duckdb_rapi_rel_set_diff, 2},
{"_duckdb_rapi_rel_set_intersect", (DL_FUNC) &_duckdb_rapi_rel_set_intersect, 2},
Expand Down
17 changes: 17 additions & 0 deletions src/include/altrepdataframe_relation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "duckdb/main/relation.hpp"

namespace duckdb {

class AltrepDataFrameRelation final : public Relation {
public:
AltrepDataFrameRelation(shared_ptr<Relation> parent);

shared_ptr<Relation> parent;
public:
const vector<ColumnDefinition> &Columns() override;
string ToString(idx_t depth) override;
bool IsReadOnly() override;
};

}

23 changes: 23 additions & 0 deletions src/relational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,29 @@ using namespace cpp11;
return make_external_prot<RelationWrapper>("duckdb_relation", prot, res);
}

[[cpp11::register]] SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs) {
if (exprs.size() == 0) {
warning("rel_project without projection expressions has no effect");
return df;
Antonov548 marked this conversation as resolved.
Show resolved Hide resolved
}
vector<duckdb::unique_ptr<ParsedExpression>> projections;
vector<string> aliases;

for (expr_extptr_t expr : exprs) {
auto dexpr = expr->Copy();
aliases.push_back(dexpr->GetName());
projections.push_back(std::move(dexpr));
}

duckdb::rel_extptr_t rel = cpp11::as_cpp<cpp11::decay_t<duckdb::rel_extptr_t>>(rapi_rel_from_df(con, df, false));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rapi_() functions are external, this coupling is a bit too tight. Perhaps extract an internal function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just temporarily


auto res = make_shared_ptr<ProjectionRelation>(rel->rel, std::move(projections), std::move(aliases));
krlmlr marked this conversation as resolved.
Show resolved Hide resolved

cpp11::writable::list prot = {rel};

return make_external_prot<RelationWrapper>("duckdb_relation", prot, res);
}

[[cpp11::register]] SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs) {
if (exprs.size() == 0) {
warning("rel_project without projection expressions has no effect");
Expand Down
Loading