Skip to content

Commit

Permalink
Merge pull request #335 from cynkra/f-3-compound
Browse files Browse the repository at this point in the history
- `dm_add_pk()` and `dm_add_fk()` support compound keys via the `c()` notation, e.g. `dm_add_pk(dm, table, c(col1, col2))`. `dm_nycflights13()` returns a data model with compound keys by default. Use `compound = FALSE` to return the data model from dm 0.1.13 or earlier (#3).
- `dm_get_all_fks()` includes `parent_pk_cols` column that describes the primary key columns of the parent table (#335).
- `dm_examine_constraints()` and other check functions count the number of rows that violate constraints for primary and foreign keys (#335).
- `copy_dm_to(set_key_constraints = FALSE)` downgrades unique indexess to regular indexes (#335).
- `rows_truncate()` implemented for data frames (#335).
- `dm_enum_fk_candidates()` enumerates column in the order they apper in the table (#335).
  • Loading branch information
krlmlr authored Apr 28, 2021
2 parents 3ab1b67 + 1184568 commit c2d4b5f
Show file tree
Hide file tree
Showing 59 changed files with 1,215 additions and 578 deletions.
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ S3method(distinct,zoomed_dm)
S3method(filter,dm)
S3method(filter,zoomed_dm)
S3method(format,dm)
S3method(format,dm_keys)
S3method(format,zoomed_df)
S3method(format,zoomed_dm)
S3method(full_join,dm)
Expand Down Expand Up @@ -73,6 +74,7 @@ S3method(rename,zoomed_dm)
S3method(right_join,dm)
S3method(right_join,zoomed_dm)
S3method(rows_insert,tbl_dbi)
S3method(rows_truncate,data.frame)
S3method(rows_truncate,tbl_sql)
S3method(rows_update,tbl_dbi)
S3method(select,dm)
Expand Down Expand Up @@ -126,6 +128,9 @@ S3method(ungroup,dm)
S3method(ungroup,zoomed_dm)
S3method(unite,dm)
S3method(unite,zoomed_dm)
S3method(vec_cast,dm_keys.dm_keys)
S3method(vec_proxy_compare,dm_keys)
S3method(vec_ptype2,dm_keys.dm_keys)
S3method(vec_ptype_abbr,dm_keys)
export("%>%")
export(anti_join)
Expand Down Expand Up @@ -310,5 +315,9 @@ importFrom(utils,head)
importFrom(utils,packageVersion)
importFrom(utils,str)
importFrom(utils,tail)
importFrom(vctrs,vec_cast)
importFrom(vctrs,vec_data)
importFrom(vctrs,vec_proxy_compare)
importFrom(vctrs,vec_ptype2)
importFrom(vctrs,vec_ptype_abbr)
importFrom(vctrs,vec_slice)
35 changes: 27 additions & 8 deletions R/db-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ get_pid <- function() {
}

# Internal copy helper functions
build_copy_data <- function(dm, dest, table_names) {
build_copy_data <- function(dm, dest, table_names, set_key_constraints) {
source <-
dm %>%
dm_apply_filters() %>%
Expand All @@ -42,6 +42,13 @@ build_copy_data <- function(dm, dest, table_names) {
dm_get_all_pks_impl(dm) %>%
transmute(source_name = table, column = pk_col, pk = TRUE)

# FIXME: COMPOUND: Need support for multiple primary keys, https://github.com/r-dbi/DBI/pull/351
# Discard primary keys of length > 1 for now
pks_flat <-
pks %>%
filter(lengths(column) == 1) %>%
mutate(column = as.character(column))

fks <-
dm_get_all_fks_impl(dm) %>%
transmute(source_name = child_table, column = child_fk_cols, fk = TRUE)
Expand All @@ -55,27 +62,39 @@ build_copy_data <- function(dm, dest, table_names) {
mutate(type = map(df, ~ map_chr(., ~ DBI::dbDataType(dest_con, .)))) %>%
select(-df) %>%
unnest(c(column, type)) %>%
left_join(pks, by = c("source_name", "column")) %>%
left_join(pks_flat, by = c("source_name", "column")) %>%
mutate(full_type = paste0(type, if_else(pk, " NOT NULL PRIMARY KEY", "", ""))) %>%
group_by(source_name) %>%
summarize(types = list(deframe(tibble(column, full_type))))

copy_data_unique_indexes <-
pks %>%
transmute(source_name, unique_indexes = map(as.list(column), list))
group_by(source_name) %>%
summarize(unique_indexes = list(column)) %>%
ungroup()

copy_data_indexes <-
copy_data_non_unique_indexes <-
fks %>%
select(source_name, column) %>%
group_by(source_name) %>%
summarize(indexes = map(list(column), as.list))
summarize(indexes = list(column))

copy_data_indexes <-
full_join(copy_data_unique_indexes, copy_data_non_unique_indexes, by = "source_name") %>%
mutate(indexes = map2(indexes, unique_indexes, setdiff))

# Downgrade unique indexes to non-unique indexes
if (!set_key_constraints) {
copy_data_indexes <-
copy_data_indexes %>%
mutate(indexes = map2(indexes, unique_indexes, c)) %>%
mutate(unique_indexes = list(new_keys()))
}

copy_data <-
copy_data_base %>%
inner_join(copy_data_types, by = "source_name") %>%
left_join(copy_data_unique_indexes, by = "source_name") %>%
left_join(copy_data_indexes, by = "source_name") %>%
mutate(indexes = map2(indexes, unique_indexes, setdiff))
left_join(copy_data_indexes, by = "source_name")
} else {
copy_data <-
copy_data_base
Expand Down
8 changes: 5 additions & 3 deletions R/db-interface.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#' @param dest An object of class `"src"` or `"DBIConnection"`.
#' @param dm A `dm` object.
#' @param overwrite,types,indexes,unique_indexes Must remain `NULL`.
#' @param set_key_constraints Boolean variable, if `TRUE` will mirror `dm` key constraints on a database.
#' @param set_key_constraints If `TRUE` will mirror `dm` primary and foreign key constraints on a database
#' and create unique indexes.
#' Set to `FALSE` if your data model currently does not satisfy primary or foreign key constraints.
#' @param unique_table_names Deprecated.
#' @param temporary Boolean variable, if `TRUE`, only temporary tables will be created.
#' @param temporary If `TRUE`, only temporary tables will be created.
#' These tables will vanish when disconnecting from the database.
#' @param schema Name of schema to copy the `dm` to.
#' If `schema` is provided, an error will be thrown if `temporary = FALSE` or
Expand Down Expand Up @@ -181,7 +183,7 @@ copy_dm_to <- function(dest, dm, ...,
return(dm)
}

copy_data <- build_copy_data(dm, dest, table_names_out)
copy_data <- build_copy_data(dm, dest, table_names_out, set_key_constraints)

new_tables <- copy_list_of_tables_to(
dest,
Expand Down
15 changes: 11 additions & 4 deletions R/disambiguate.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,21 @@ get_table_colnames <- function(dm, tables = NULL) {
select(table, column) %>%
unnest(column)

pks <-
dm_get_all_pks_impl(dm) %>%
rename(column = pk_col)
pks <- dm_get_all_pks_def_impl(def)

if (nrow(pks) == 0) {
return(table_colnames)
}

keep_colnames <-
pks %>%
rename(column = pk_col) %>%
unnest(column)

table_colnames %>%
# in case of flattening, the primary key columns will never be responsible for the name
# of the resulting column in the end, so they do not need to be disambiguated
anti_join(pks, by = c("table", "column"))
anti_join(keep_colnames, by = c("table", "column"))
}

compute_disambiguate_cols_recipe <- function(table_colnames, sep) {
Expand Down
36 changes: 0 additions & 36 deletions R/dm.R
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,6 @@ dm_get_def <- function(x) {
unclass(x)$def
}

dm_get_data_model_pks <- function(x) {
# FIXME: Obliterate

dm_get_def(x) %>%
select(table, pks) %>%
unnest_pks()
}

unnest_pks <- function(def) {
# Optimized
pk_df <- tibble(
Expand All @@ -320,39 +312,11 @@ unnest_pks <- function(def) {
# FIXME: Should work better with dplyr 0.9.0
if (!("column" %in% names(pk_df))) {
pk_df$column <- character()
} else {
# This is expected to break with compound keys
pk_df$column <- flatten_chr(pk_df$column)
}

pk_df
}

dm_get_data_model_fks <- function(x) {
# FIXME: Obliterate

fk_df <-
dm_get_def(x) %>%
select(ref = table, fks, pks) %>%
filter(map_lgl(fks, has_length)) %>%
unnest(pks)

if (nrow(fk_df) == 0) {
return(tibble(
table = character(), column = character(),
ref = character(), ref_col = character()
))
}

fk_df %>%
# This is expected to break with compound keys
mutate(ref_col = flatten_chr(column)) %>%
select(-column) %>%
unnest(fks) %>%
mutate(column = flatten_chr(column)) %>%
select(ref, column, table, ref_col)
}

#' Get filter expressions
#'
#' `dm_get_filters()` returns the filter expressions that have been applied to a `dm` object.
Expand Down
16 changes: 15 additions & 1 deletion R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ slice.zoomed_dm <- function(.data, ..., .keep_pk = NULL) {
orig_pk <- dm_get_pk_impl(.data, orig_name_zoomed(.data))
tracked_cols <- get_tracked_cols(.data)
if (is_null(.keep_pk)) {
if (has_length(orig_pk) && orig_pk %in% tracked_cols) {
if (has_length(orig_pk) && any(unlist(orig_pk) %in% tracked_cols)) {
message(
paste(
"Keeping PK column, but `slice.zoomed_dm()` can potentially damage the uniqueness of PK columns (duplicated indices).",
Expand Down Expand Up @@ -566,3 +566,17 @@ safe_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = gro
}
ungroup(out)
}

new_tracked_cols <- function(dm, selected) {
tracked_cols <- get_tracked_cols(dm)
old_tracked_names <- names(tracked_cols)
# the new tracked keys need to be the remaining original column names
# and their name needs to be the newest one (tidyselect-syntax)
# `intersect(selected, old_tracked_names)` is empty, return `NULL`

selected_match <- selected[selected %in% old_tracked_names]
set_names(
tracked_cols[selected_match],
names(selected_match)
)
}
12 changes: 8 additions & 4 deletions R/draw-dm.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,18 @@ dm_get_data_model <- function(x, column_types) {
stringsAsFactors = FALSE
)

references_for_columns <- dm_get_data_model_fks(x)
references_for_columns <-
dm_get_all_fks_impl(x) %>%
transmute(table = child_table, column = format(child_fk_cols), ref = parent_table, ref_col = format(parent_pk_cols))

references <-
references_for_columns %>%
mutate(ref_id = row_number(), ref_col_num = 1L)

keys <-
dm_get_data_model_pks(x) %>%
dm_get_all_pks_impl(x) %>%
mutate(column = format(pk_col)) %>%
select(-pk_col) %>%
mutate(key = 1L)

if (column_types) {
Expand All @@ -118,9 +122,9 @@ dm_get_data_model <- function(x, column_types) {

columns <-
types %>%
left_join(keys, by = c("table", "column")) %>%
full_join(keys, by = c("table", "column")) %>%
full_join(references_for_columns, by = c("table", "column")) %>%
mutate(key = coalesce(key, 0L)) %>%
left_join(references_for_columns, by = c("table", "column")) %>%
# for compatibility with print method from {datamodelr}
as.data.frame()

Expand Down
31 changes: 17 additions & 14 deletions R/examine-constraints.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,23 @@ check_pk_constraints <- function(dm) {
pks <- dm_get_all_pks_impl(dm)
if (nrow(pks) == 0) {
return(tibble(
table = character(0),
kind = character(0),
column = character(0),
ref_table = NA_character_,
is_key = logical(0),
problem = character(0)
table = character(),
kind = character(),
column = new_keys(),
ref_table = character(),
is_key = logical(),
problem = character()
))
}
table_names <- pull(pks, table)

tbls <- map(set_names(table_names), ~ tbl(dm, .))

tbl_is_pk <- map2_dfr(tbls, pks$pk_col, enum_pk_candidates_impl) %>%
mutate(table = table_names) %>%
tbl_is_pk <-
tibble(table = table_names, tbls, column = pks$pk_col) %>%
mutate(candidate = map2(tbls, column, ~ enum_pk_candidates_impl(.x, list(.y)))) %>%
select(-column, -tbls) %>%
unnest(candidate) %>%
rename(is_key = candidate, problem = why)

tibble(
Expand All @@ -121,11 +124,11 @@ check_fk_constraints <- function(dm) {
cts <- pull(fks, child_table) %>% map(tbl, src = dm)
fks_tibble <- mutate(fks, t1 = cts, t2 = pts) %>%
select(t1, t1_name = child_table, colname = child_fk_cols, t2, t2_name = parent_table, pk = pk_col)
mutate(
fks_tibble,
problem = pmap_chr(fks_tibble, check_fk),
is_key = if_else(problem == "", TRUE, FALSE),
kind = "FK"
) %>%
fks_tibble %>%
mutate(
problem = pmap_chr(fks_tibble, check_fk),
is_key = if_else(problem == "", TRUE, FALSE),
kind = "FK"
) %>%
select(table = t1_name, kind, column = colname, ref_table = t2_name, is_key, problem)
}
4 changes: 2 additions & 2 deletions R/filter-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ get_by <- function(dm, lhs_name, rhs_name) {
if (length(lhs_col) > 1 || length(rhs_col) > 1) abort_no_cycles(create_graph_from_dm(dm))
# Construct a `by` argument of the form `c("lhs_col[1]" = "rhs_col[1]", ...)`
# as required by `*_join()`
by <- rhs_col
names(by) <- lhs_col
by <- get_key_cols(rhs_col)
names(by) <- get_key_cols(lhs_col)
by
}

Expand Down
Loading

0 comments on commit c2d4b5f

Please sign in to comment.