From 0b688c71d089329be4ca58718eb78914d28f0c6c Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Mon, 13 Feb 2023 13:07:33 +0000 Subject: [PATCH 01/13] Add name repair functionality --- R/db-helpers.R | 37 ++++++++++++++++++++--- tests/testthat/test-db-helpers.R | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/R/db-helpers.R b/R/db-helpers.R index 3d5d7788fd..a8e4025d22 100644 --- a/R/db-helpers.R +++ b/R/db-helpers.R @@ -118,14 +118,43 @@ repair_table_names_for_db <- function(table_names, temporary, con, schema = NULL quote_ids(names, con_from_src_or_con(con), schema) } -make_local_names <- function(schema_names, table_names) { +find_name_clashes <- function(old, new) { - combined_names <- glue::glue("{schema_names}.{table_names}") + # Any entries in `new` with more than one corresponding entry in `old` + purrr::keep(split(old, new), ~ length(unique(.x)) > 1) +} + +make_local_names <- function(schema_names, table_names, repair = "minimal") { +# +# # Construct new table names - if we have multiple schemas, prepend schema +# repaired_schema_names <- vctrs::vec_as_names(schema_names, repair = repair) +# repaired_table_names <- vctrs::vec_as_names(table_names, repair = repair) - if (length(unique(schema_names)) == 1) + # if (length(unique(schema_names)) > 1) { + # table_names <- glue::glue("{schema_names}.{table_names}") + # repaired_table_names <- glue::glue("{repaired_schema_names}.{repaired_table_names}") + # } + + local_names <- if (length(unique(schema_names)) == 1) table_names else - combined_names + glue::glue("{schema_names}.{table_names}") + + repaired_local_names <- vctrs::vec_as_names(local_names, repair = repair) + + # Ensure new table names don't clash + clashes <- find_name_clashes(local_names, repaired_local_names) + + if (length(clashes) > 0) + cli::cli_abort(c( + "x" = "Repairing names caused name clashes; try again with a different `.name_repair` option.", + purrr::imap_chr( + clashes, + ~ glue::glue("* [{paste0(dQuote(.x, q = FALSE), collapse = ', ')}] => {dQuote(.y, q = FALSE)}") + ) + )) + + repaired_local_names } get_src_tbl_names <- function(src, schema = NULL, dbname = NULL) { diff --git a/tests/testthat/test-db-helpers.R b/tests/testthat/test-db-helpers.R index be2aab1499..984f67dd43 100644 --- a/tests/testthat/test-db-helpers.R +++ b/tests/testthat/test-db-helpers.R @@ -184,4 +184,55 @@ test_that("make local names", { c("sch1.tbl1", "sch2.tbl2", "sch3.tbl3") ) + + + # If we have name clashes, there should be an error about non-unique names + expect_equal( + make_local_names( + rep("sch", 2), + c("table name", "Table Name"), + repair = "check_unique" # default from dm_from_con() + ), + c("table name", "Table Name") # i.e. these names are unique & therefore fine + ) + + expect_snapshot_error( + make_local_names( + rep("sch", 2), + c("table name", "Table Name"), + repair = tolower # passing in a custom "repair" function which leads to a clash + ) + ) + + expect_equal( + make_local_names( + c("sch1", "sch2"), # this time, tables are in different schemas - so no clash of "schema.table" local names + c("table name", "Table Name"), + repair = tolower + ), + c("sch1.table name", "sch2.table name") + ) + +}) + +test_that("find name clashes", { + + # If all old names change to different new names... + res <- find_name_clashes( + c("one", "two", "three"), + c("uno", "dos", "tres") + ) + # ... we shouldn't get anything + expect_length(res, 0) + + + # If multiple old names change to the same new name... + res <- find_name_clashes( + c("one", "two", "three"), + c("uno", "uno", "tres") + ) + # We should get a list, with one element per "clashing" new name + expect_named(res, "uno") + expect_equal(res[["uno"]], c("one", "two")) + }) From d7392ad67e5816e6ad02bb651111dd53c2ee7ae6 Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Mon, 13 Feb 2023 13:11:14 +0000 Subject: [PATCH 02/13] Add .name_repair to dm_from_con() --- R/db-helpers.R | 4 ++-- R/dm_from_con.R | 8 ++++++-- man/dm_from_con.Rd | 13 ++++++++++++- man/dm_from_src.Rd | 2 ++ 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/R/db-helpers.R b/R/db-helpers.R index a8e4025d22..a56dcf5a95 100644 --- a/R/db-helpers.R +++ b/R/db-helpers.R @@ -157,7 +157,7 @@ make_local_names <- function(schema_names, table_names, repair = "minimal") { repaired_local_names } -get_src_tbl_names <- function(src, schema = NULL, dbname = NULL) { +get_src_tbl_names <- function(src, schema = NULL, dbname = NULL, .name_repair = "check_unique") { if (!is_mssql(src) && !is_postgres(src) && !is_mariadb(src)) { warn_if_arg_not(schema, only_on = c("MSSQL", "Postgres", "MariaDB")) warn_if_arg_not(dbname, only_on = "MSSQL") @@ -193,7 +193,7 @@ get_src_tbl_names <- function(src, schema = NULL, dbname = NULL) { collect() %>% # create remote names for the tables in the given schema (name is table_name; cannot be duplicated within a single schema) transmute( - local_name = make_local_names(schema_name, table_name), + local_name = make_local_names(schema_name, table_name, repair = .name_repair), remote_name = schema_if(schema_name, table_name, con, dbname) ) %>% deframe() diff --git a/R/dm_from_con.R b/R/dm_from_con.R index fbb9ba851e..35a1412147 100644 --- a/R/dm_from_con.R +++ b/R/dm_from_con.R @@ -16,12 +16,16 @@ #' foreign keys from the database. #' Currently works only for Postgres and SQL Server databases. #' The default attempts to query and issues an informative message. +#' @param .name_repair Treatment of problematic table names. This argument is +#' passed on as `repair` to [vctrs::vec_as_names()]. #' @param ... `r lifecycle::badge("experimental")` #' #' Additional parameters for the schema learning query. #' #' - `schema`: supported for MSSQL (default: `"dbo"`), Postgres (default: `"public"`), and MariaDB/MySQL #' (default: current database). Learn the tables in a specific schema (or database for MariaDB/MySQL). +#' If multiple schema names are given as a vector, the returned `dm` object will have names of the form +#' `"schema.table"`. #' - `dbname`: supported for MSSQL. Access different databases on the connected MSSQL-server; #' default: active database. #' - `table_type`: supported for Postgres (default: `"BASE TABLE"`). Specify the table type. Options are: @@ -40,7 +44,7 @@ #' #' # Avoid DBI::dbDisconnect() here, because we don't own the connection dm_from_con <- function(con = NULL, table_names = NULL, learn_keys = NULL, - ...) { + .name_repair = "check_unique", ...) { stopifnot(is(con, "DBIConnection") || inherits(con, "Pool")) if (inherits(con, "Pool")) { @@ -84,7 +88,7 @@ dm_from_con <- function(con = NULL, table_names = NULL, learn_keys = NULL, } if (is_null(table_names)) { - src_tbl_names <- get_src_tbl_names(src, ...) + src_tbl_names <- get_src_tbl_names(src, ..., .name_repair = .name_repair) } else { src_tbl_names <- table_names } diff --git a/man/dm_from_con.Rd b/man/dm_from_con.Rd index 4bd9a7100d..6b0c915a21 100644 --- a/man/dm_from_con.Rd +++ b/man/dm_from_con.Rd @@ -4,7 +4,13 @@ \alias{dm_from_con} \title{Load a dm from a remote data source} \usage{ -dm_from_con(con = NULL, table_names = NULL, learn_keys = NULL, ...) +dm_from_con( + con = NULL, + table_names = NULL, + learn_keys = NULL, + .name_repair = "check_unique", + ... +) } \arguments{ \item{con}{A \code{\link[DBI:DBIConnection-class]{DBI::DBIConnection}} or a \code{Pool} object.} @@ -18,12 +24,17 @@ foreign keys from the database. Currently works only for Postgres and SQL Server databases. The default attempts to query and issues an informative message.} +\item{.name_repair}{Treatment of problematic table names. This argument is +passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.} + \item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Additional parameters for the schema learning query. \itemize{ \item \code{schema}: supported for MSSQL (default: \code{"dbo"}), Postgres (default: \code{"public"}), and MariaDB/MySQL (default: current database). Learn the tables in a specific schema (or database for MariaDB/MySQL). +If multiple schema names are given as a vector, the returned \code{dm} object will have names of the form +\code{"schema.table"}. \item \code{dbname}: supported for MSSQL. Access different databases on the connected MSSQL-server; default: active database. \item \code{table_type}: supported for Postgres (default: \code{"BASE TABLE"}). Specify the table type. Options are: diff --git a/man/dm_from_src.Rd b/man/dm_from_src.Rd index 7ac897c79c..cdc53ba9e1 100644 --- a/man/dm_from_src.Rd +++ b/man/dm_from_src.Rd @@ -24,6 +24,8 @@ Additional parameters for the schema learning query. \itemize{ \item \code{schema}: supported for MSSQL (default: \code{"dbo"}), Postgres (default: \code{"public"}), and MariaDB/MySQL (default: current database). Learn the tables in a specific schema (or database for MariaDB/MySQL). +If multiple schema names are given as a vector, the returned \code{dm} object will have names of the form +\code{"schema.table"}. \item \code{dbname}: supported for MSSQL. Access different databases on the connected MSSQL-server; default: active database. \item \code{table_type}: supported for Postgres (default: \code{"BASE TABLE"}). Specify the table type. Options are: From 47f468c658b663b804d4f286d4701a29f458b4d8 Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Mon, 13 Feb 2023 13:23:23 +0000 Subject: [PATCH 03/13] Remove redundant approach --- R/db-helpers.R | 9 --------- 1 file changed, 9 deletions(-) diff --git a/R/db-helpers.R b/R/db-helpers.R index a56dcf5a95..e12ba8d7be 100644 --- a/R/db-helpers.R +++ b/R/db-helpers.R @@ -125,15 +125,6 @@ find_name_clashes <- function(old, new) { } make_local_names <- function(schema_names, table_names, repair = "minimal") { -# -# # Construct new table names - if we have multiple schemas, prepend schema -# repaired_schema_names <- vctrs::vec_as_names(schema_names, repair = repair) -# repaired_table_names <- vctrs::vec_as_names(table_names, repair = repair) - - # if (length(unique(schema_names)) > 1) { - # table_names <- glue::glue("{schema_names}.{table_names}") - # repaired_table_names <- glue::glue("{repaired_schema_names}.{repaired_table_names}") - # } local_names <- if (length(unique(schema_names)) == 1) table_names From 1037d90e65e5dd817cfcac48f32024fa0ad137e1 Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Wed, 22 Mar 2023 10:24:32 +0000 Subject: [PATCH 04/13] Replace .name_repair with glue-based naming patterns --- R/db-helpers.R | 45 +++++++++++--------------------- R/dm_from_con.R | 14 +++++----- man/dm_from_con.Rd | 12 +++++---- man/dm_from_src.Rd | 2 -- tests/testthat/test-db-helpers.R | 24 ++++++++++++++--- 5 files changed, 50 insertions(+), 47 deletions(-) diff --git a/R/db-helpers.R b/R/db-helpers.R index b78e4a4ae2..0fa9a8f6d0 100644 --- a/R/db-helpers.R +++ b/R/db-helpers.R @@ -124,31 +124,7 @@ find_name_clashes <- function(old, new) { purrr::keep(split(old, new), ~ length(unique(.x)) > 1) } -make_local_names <- function(schema_names, table_names, repair = "minimal") { - - local_names <- if (length(unique(schema_names)) == 1) - table_names - else - glue::glue("{schema_names}.{table_names}") - - repaired_local_names <- vctrs::vec_as_names(local_names, repair = repair) - - # Ensure new table names don't clash - clashes <- find_name_clashes(local_names, repaired_local_names) - - if (length(clashes) > 0) - cli::cli_abort(c( - "x" = "Repairing names caused name clashes; try again with a different `.name_repair` option.", - purrr::imap_chr( - clashes, - ~ glue::glue("* [{paste0(dQuote(.x, q = FALSE), collapse = ', ')}] => {dQuote(.y, q = FALSE)}") - ) - )) - - repaired_local_names -} - -get_src_tbl_names <- function(src, schema = NULL, dbname = NULL, .name_repair = "check_unique") { +get_src_tbl_names <- function(src, schema = NULL, dbname = NULL, names = NULL) { if (!is_mssql(src) && !is_postgres(src) && !is_mariadb(src)) { warn_if_arg_not(schema, only_on = c("MSSQL", "Postgres", "MariaDB")) warn_if_arg_not(dbname, only_on = "MSSQL") @@ -179,19 +155,28 @@ get_src_tbl_names <- function(src, schema = NULL, dbname = NULL, .name_repair = names_table <- get_names_table_mariadb(con) } + # Use smart default for `.names`, if it wasn't provided + names_pattern <- if (length(schema) == 1) { + names %||% "{.table}" + } else { + names %||% "{.schema}.{.table}" + } + names_table <- names_table %>% filter(schema_name %in% !!(if (inherits(schema, "sql")) glue_sql_collapse(schema) else schema)) %>% collect() %>% # create remote names for the tables in the given schema (name is table_name; cannot be duplicated within a single schema) - mutate(remote_name = schema_if(schema_name, table_name, con, dbname)) - + mutate( + local_name = glue(names_pattern, .table = table_name, .schema = schema_name), + remote_name = schema_if(schema_name, table_name, con, dbname) + ) # SQL table names are only guaranteed to be unique in a single schema, so if # we have multiple schemas, we might end up with the same local_name pointing # to more than one remote_name # In such a case, raise a warning, and keep only the first relevant schema if (length(schema) > 1) { - clashes <- with(names_table, find_name_clashes(table_name, remote_name)) + clashes <- with(names_table, find_name_clashes(local_name, remote_name)) if (length(clashes) > 0) cli::cli_warn(c( @@ -209,11 +194,11 @@ get_src_tbl_names <- function(src, schema = NULL, dbname = NULL, .name_repair = # Keep only first schema (positionally) for each local_name names_table <- names_table %>% mutate(schema = factor(schema, labels = !!schema)) %>% - slice_min(schema, by = table_name) + slice_min(schema, by = local_name) } names_table %>% - select(table_name, remote_name) %>% + select(local_name, remote_name) %>% deframe() } diff --git a/R/dm_from_con.R b/R/dm_from_con.R index 35a1412147..3fa66c893b 100644 --- a/R/dm_from_con.R +++ b/R/dm_from_con.R @@ -16,16 +16,18 @@ #' foreign keys from the database. #' Currently works only for Postgres and SQL Server databases. #' The default attempts to query and issues an informative message. -#' @param .name_repair Treatment of problematic table names. This argument is -#' passed on as `repair` to [vctrs::vec_as_names()]. +#' @param .names A glue specification that describes how to name the tables +#' within the output. This can use `{.table}` to stand for the table name, and +#' `{.schema}` to stand for the name of the schema which the table lives +#' within. The default (`NULL`) is equivalent to `"{.table}"` when a single +#' schema is specified in `schema`, and `"{.schema}.{.table}"` for the case +#' where multiple schemas are given. #' @param ... `r lifecycle::badge("experimental")` #' #' Additional parameters for the schema learning query. #' #' - `schema`: supported for MSSQL (default: `"dbo"`), Postgres (default: `"public"`), and MariaDB/MySQL #' (default: current database). Learn the tables in a specific schema (or database for MariaDB/MySQL). -#' If multiple schema names are given as a vector, the returned `dm` object will have names of the form -#' `"schema.table"`. #' - `dbname`: supported for MSSQL. Access different databases on the connected MSSQL-server; #' default: active database. #' - `table_type`: supported for Postgres (default: `"BASE TABLE"`). Specify the table type. Options are: @@ -44,7 +46,7 @@ #' #' # Avoid DBI::dbDisconnect() here, because we don't own the connection dm_from_con <- function(con = NULL, table_names = NULL, learn_keys = NULL, - .name_repair = "check_unique", ...) { + .names = NULL, ...) { stopifnot(is(con, "DBIConnection") || inherits(con, "Pool")) if (inherits(con, "Pool")) { @@ -88,7 +90,7 @@ dm_from_con <- function(con = NULL, table_names = NULL, learn_keys = NULL, } if (is_null(table_names)) { - src_tbl_names <- get_src_tbl_names(src, ..., .name_repair = .name_repair) + src_tbl_names <- get_src_tbl_names(src, ..., names = .names) } else { src_tbl_names <- table_names } diff --git a/man/dm_from_con.Rd b/man/dm_from_con.Rd index 6b0c915a21..1cd4cd30bb 100644 --- a/man/dm_from_con.Rd +++ b/man/dm_from_con.Rd @@ -8,7 +8,7 @@ dm_from_con( con = NULL, table_names = NULL, learn_keys = NULL, - .name_repair = "check_unique", + .names = NULL, ... ) } @@ -24,8 +24,12 @@ foreign keys from the database. Currently works only for Postgres and SQL Server databases. The default attempts to query and issues an informative message.} -\item{.name_repair}{Treatment of problematic table names. This argument is -passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.} +\item{.names}{A glue specification that describes how to name the tables +within the output. This can use \code{{.table}} to stand for the table name, and +\code{{.schema}} to stand for the name of the schema which the table lives +within. The default (\code{NULL}) is equivalent to \code{"{.table}"} when a single +schema is specified in \code{schema}, and \code{"{.schema}.{.table}"} for the case +where multiple schemas are given.} \item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} @@ -33,8 +37,6 @@ Additional parameters for the schema learning query. \itemize{ \item \code{schema}: supported for MSSQL (default: \code{"dbo"}), Postgres (default: \code{"public"}), and MariaDB/MySQL (default: current database). Learn the tables in a specific schema (or database for MariaDB/MySQL). -If multiple schema names are given as a vector, the returned \code{dm} object will have names of the form -\code{"schema.table"}. \item \code{dbname}: supported for MSSQL. Access different databases on the connected MSSQL-server; default: active database. \item \code{table_type}: supported for Postgres (default: \code{"BASE TABLE"}). Specify the table type. Options are: diff --git a/man/dm_from_src.Rd b/man/dm_from_src.Rd index cdc53ba9e1..7ac897c79c 100644 --- a/man/dm_from_src.Rd +++ b/man/dm_from_src.Rd @@ -24,8 +24,6 @@ Additional parameters for the schema learning query. \itemize{ \item \code{schema}: supported for MSSQL (default: \code{"dbo"}), Postgres (default: \code{"public"}), and MariaDB/MySQL (default: current database). Learn the tables in a specific schema (or database for MariaDB/MySQL). -If multiple schema names are given as a vector, the returned \code{dm} object will have names of the form -\code{"schema.table"}. \item \code{dbname}: supported for MSSQL. Access different databases on the connected MSSQL-server; default: active database. \item \code{table_type}: supported for Postgres (default: \code{"BASE TABLE"}). Specify the table type. Options are: diff --git a/tests/testthat/test-db-helpers.R b/tests/testthat/test-db-helpers.R index 3aec624185..a0853db902 100644 --- a/tests/testthat/test-db-helpers.R +++ b/tests/testthat/test-db-helpers.R @@ -69,9 +69,17 @@ test_that("DB helpers work for MSSQL", { get_src_tbl_names(my_test_src(), dbname = "db_helpers_db", schema = "schema_db_helpers_2")["test_db_helpers_4"], DBI::SQL("\"db_helpers_db\".\"schema_db_helpers_2\".\"test_db_helpers_4\"") ) + expect_identical( + get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))["dbo.test_db_helpers_2"], + DBI::SQL("\"dbo\".\"test_db_helpers_2\"") + ) + expect_identical( + get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))["schema_db_helpers.test_db_helpers_2"], + DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") + ) expect_warning( expect_identical( - get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))["test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"), .names = "{.table}")["test_db_helpers_2"], DBI::SQL("\"dbo\".\"test_db_helpers_2\"") ), 'Local name test_db_helpers_2 will refer to <"dbo"."test_db_helpers_2">, rather than to <"schema_db_helpers"."test_db_helpers_2">', @@ -79,7 +87,7 @@ test_that("DB helpers work for MSSQL", { ) expect_warning( expect_identical( - get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "dbo"))["test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "dbo"), .names = "{.table}")["test_db_helpers_2"], DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") ), @@ -129,9 +137,17 @@ test_that("DB helpers work for Postgres", { get_src_tbl_names(my_test_src(), schema = "schema_db_helpers")["test_db_helpers_2"], DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") ) + expect_identical( + get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["public.test_db_helpers_2"], + DBI::SQL("\"public\".\"test_db_helpers_2\"") + ) + expect_identical( + get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["schema_db_helpers.test_db_helpers_2"], + DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") + ) expect_warning( expect_identical( - get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"), .names = "{.table}")["test_db_helpers_2"], DBI::SQL("\"public\".\"test_db_helpers_2\"") ), 'Local name test_db_helpers_2 will refer to <"public"."test_db_helpers_2">, rather than to <"schema_db_helpers"."test_db_helpers_2">', @@ -139,7 +155,7 @@ test_that("DB helpers work for Postgres", { ) expect_warning( expect_identical( - get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "public"))["test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "public"), .names = "{.table}")["test_db_helpers_2"], DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") ), From 06b156e792e0fd996501f50dabf56edb909998eb Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Wed, 22 Mar 2023 10:36:38 +0000 Subject: [PATCH 05/13] Add "local_name" to global variables --- R/global.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/global.R b/R/global.R index 9134d1ea7f..b9d3255266 100644 --- a/R/global.R +++ b/R/global.R @@ -48,6 +48,7 @@ utils::globalVariables(c( "key_fk", "col_tracker_zoom", "kind", + "local_name", "mismatch_or_null", "n_c", "n_p", From 23cceb122a7ff8b4456aeceace0067c1618c91dd Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Wed, 22 Mar 2023 11:01:28 +0000 Subject: [PATCH 06/13] Fix typo in column name --- R/db-helpers.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/db-helpers.R b/R/db-helpers.R index e853e95129..4bffdbdb24 100644 --- a/R/db-helpers.R +++ b/R/db-helpers.R @@ -184,8 +184,8 @@ get_src_tbl_names <- function(src, schema = NULL, dbname = NULL) { # Keep only first schema (positionally) for each local_name names_table <- names_table %>% - mutate(schema = factor(schema, labels = !!schema)) %>% - slice_min(schema, by = table_name) + mutate(schema_name = factor(schema_name, labels = schema)) %>% + slice_min(schema_name, by = local_name) } names_table %>% From 98356a6fed87df89440f1d8e7e5fc1d92fc58dbc Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Wed, 22 Mar 2023 18:45:43 +0000 Subject: [PATCH 07/13] Fix parentheses positions in test --- tests/testthat/test-db-helpers.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-db-helpers.R b/tests/testthat/test-db-helpers.R index 8a2151d1e0..8719661550 100644 --- a/tests/testthat/test-db-helpers.R +++ b/tests/testthat/test-db-helpers.R @@ -78,13 +78,13 @@ test_that("DB helpers work for MSSQL", { DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") ) expect_warning( - out <- get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers", .names = "{.table}"))["test_db_helpers_2"], + out <- get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"), .names = "{.table}")["test_db_helpers_2"], 'Local name test_db_helpers_2 will refer to <"dbo"."test_db_helpers_2">, rather than to <"schema_db_helpers"."test_db_helpers_2">', fixed = TRUE ) expect_identical(out, DBI::SQL("\"dbo\".\"test_db_helpers_2\"")) expect_warning( - out <- get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "dbo", .names = "{.table}"))["test_db_helpers_2"], + out <- get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "dbo"), .names = "{.table}")["test_db_helpers_2"], 'Local name test_db_helpers_2 will refer to <"schema_db_helpers"."test_db_helpers_2">, rather than to <"dbo"."test_db_helpers_2">', fixed = TRUE ) From eeb562c6c466103eb9cc4a330a500ac6be58b257 Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Thu, 22 Jun 2023 20:30:50 +0100 Subject: [PATCH 08/13] Regenerate docs with roxyglobals --- R/globals.R | 1 + man/utils_table_manipulation.Rd | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/globals.R b/R/globals.R index 9246abf5cf..2c510bf6db 100644 --- a/R/globals.R +++ b/R/globals.R @@ -19,6 +19,7 @@ utils::globalVariables(c( "schema_name", # "table_name", # "remote_name", # + "local_name", # "child_uuid", # "parent_uuid", # "child_fk_cols", # diff --git a/man/utils_table_manipulation.Rd b/man/utils_table_manipulation.Rd index daf6e3d639..8975f5951b 100644 --- a/man/utils_table_manipulation.Rd +++ b/man/utils_table_manipulation.Rd @@ -13,8 +13,7 @@ \item{x}{object of class \code{dm_zoomed}} \item{n}{an integer vector of length up to \code{dim(x)} (or 1, - for non-dimensioned objects). A \code{logical} is silently coerced to - integer. Values specify the indices to be + for non-dimensioned objects). Values specify the indices to be selected in the corresponding dimension (or along the length) of the object. A positive value of \code{n[i]} includes the first/last \code{n[i]} indices in that dimension, while a negative value From 220672c5353ef0f5f484b901791ee7ff809e1fe1 Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Thu, 22 Jun 2023 21:15:32 +0100 Subject: [PATCH 09/13] Use DBI::Id() in place of DBI::SQL() --- tests/testthat/test-db-helpers.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-db-helpers.R b/tests/testthat/test-db-helpers.R index 3334b5f908..2b019496a4 100644 --- a/tests/testthat/test-db-helpers.R +++ b/tests/testthat/test-db-helpers.R @@ -71,14 +71,14 @@ test_that("DB helpers work for MSSQL", { ) expect_identical( get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))["dbo.test_db_helpers_2"], - DBI::SQL("\"dbo\".\"test_db_helpers_2\"") + DBI::Id(schema = "dbo", table = "test_db_helpers_2") ) expect_identical( get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))["schema_db_helpers.test_db_helpers_2"], - DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") + DBI::Id(schema = "schema_db_helpers", table = "test_db_helpers_2") ) expect_warning( - out <- get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"), .names = "{.table}")["test_db_helpers_2"], + out <- get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"), names = "{.table}")["test_db_helpers_2"], 'Local name test_db_helpers_2 will refer to <"dbo"."test_db_helpers_2">, rather than to <"schema_db_helpers"."test_db_helpers_2">', fixed = TRUE ) @@ -90,7 +90,7 @@ test_that("DB helpers work for MSSQL", { )) ) expect_warning( - out <- get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "dbo"), .names = "{.table}")["test_db_helpers_2"], + out <- get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "dbo"), names = "{.table}")["test_db_helpers_2"], 'Local name test_db_helpers_2 will refer to <"schema_db_helpers"."test_db_helpers_2">, rather than to <"dbo"."test_db_helpers_2">', fixed = TRUE ) @@ -146,14 +146,14 @@ test_that("DB helpers work for Postgres", { ) expect_identical( get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["public.test_db_helpers_2"], - DBI::SQL("\"public\".\"test_db_helpers_2\"") + DBI::Id(schema = "public", table = "test_db_helpers_2") ) expect_identical( get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["schema_db_helpers.test_db_helpers_2"], - DBI::SQL("\"schema_db_helpers\".\"test_db_helpers_2\"") + DBI::Id(schema = "schema_db_helpers", table = "test_db_helpers_2") ) expect_warning( - out <- get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"), .names = "{.table}")["test_db_helpers_2"], + out <- get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"), names = "{.table}")["test_db_helpers_2"], 'Local name test_db_helpers_2 will refer to <"public"."test_db_helpers_2">, rather than to <"schema_db_helpers"."test_db_helpers_2">', fixed = TRUE ) @@ -165,7 +165,7 @@ test_that("DB helpers work for Postgres", { )) ) expect_warning( - out <- get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "public"), .names = "{.table}")["test_db_helpers_2"], + out <- get_src_tbl_names(my_test_src(), schema = c("schema_db_helpers", "public"), names = "{.table}")["test_db_helpers_2"], 'Local name test_db_helpers_2 will refer to <"schema_db_helpers"."test_db_helpers_2">, rather than to <"public"."test_db_helpers_2">', fixed = TRUE ) From 87d56ac1351335e1f465ac4c7631624ec7b2da5e Mon Sep 17 00:00:00 2001 From: owenjonesuob <21007837+owenjonesuob@users.noreply.github.com> Date: Thu, 22 Jun 2023 21:42:27 +0100 Subject: [PATCH 10/13] Fix list extraction in tests --- tests/testthat/test-db-helpers.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-db-helpers.R b/tests/testthat/test-db-helpers.R index 2b019496a4..cd632adc1a 100644 --- a/tests/testthat/test-db-helpers.R +++ b/tests/testthat/test-db-helpers.R @@ -70,11 +70,11 @@ test_that("DB helpers work for MSSQL", { DBI::Id(catalog = "db_helpers_db", schema = "schema_db_helpers_2", table = "test_db_helpers_4") ) expect_identical( - get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))["dbo.test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))[["dbo.test_db_helpers_2"]], DBI::Id(schema = "dbo", table = "test_db_helpers_2") ) expect_identical( - get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))["schema_db_helpers.test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("dbo", "schema_db_helpers"))[["schema_db_helpers.test_db_helpers_2"]], DBI::Id(schema = "schema_db_helpers", table = "test_db_helpers_2") ) expect_warning( @@ -145,11 +145,11 @@ test_that("DB helpers work for Postgres", { DBI::Id(schema = "schema_db_helpers", table = "test_db_helpers_2") ) expect_identical( - get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["public.test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["public.test_db_helpers_2"][[1]], DBI::Id(schema = "public", table = "test_db_helpers_2") ) expect_identical( - get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["schema_db_helpers.test_db_helpers_2"], + get_src_tbl_names(my_test_src(), schema = c("public", "schema_db_helpers"))["schema_db_helpers.test_db_helpers_2"][[1]], DBI::Id(schema = "schema_db_helpers", table = "test_db_helpers_2") ) expect_warning( From ba4580cdb0ff5c36f4e3b39085de13ccdf3e6fdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Tue, 11 Jul 2023 22:36:08 +0200 Subject: [PATCH 11/13] Explicit assignment, verbosity --- R/db-helpers.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/db-helpers.R b/R/db-helpers.R index a6da6d8857..8350897ed7 100644 --- a/R/db-helpers.R +++ b/R/db-helpers.R @@ -158,10 +158,14 @@ get_src_tbl_names <- function(src, schema = NULL, dbname = NULL, names = NULL) { } # Use smart default for `.names`, if it wasn't provided - names_pattern <- if (length(schema) == 1) { - names %||% "{.table}" + if (!is.null(names)) { + names_pattern <- names + } else if (length(schema) == 1) { + names_pattern <- "{.table}" + cli::cli_inform('Using {.code .names = "{names_pattern}"}') } else { - names %||% "{.schema}.{.table}" + names_pattern <- "{.schema}.{.table}" + cli::cli_inform('Using {.code .names = "{names_pattern}"}') } names_table <- names_table %>% From 9a51f9c7c805d9382af19a9c4645fdb95f208efc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Tue, 11 Jul 2023 22:36:42 +0200 Subject: [PATCH 12/13] Tweak docs --- R/dm_from_con.R | 10 +++++++--- man/dm_from_con.Rd | 9 ++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/R/dm_from_con.R b/R/dm_from_con.R index dea5655a8f..d2269efa4d 100644 --- a/R/dm_from_con.R +++ b/R/dm_from_con.R @@ -16,12 +16,16 @@ #' foreign keys from the database. #' Currently works only for Postgres and SQL Server databases. #' The default attempts to query and issues an informative message. -#' @param .names A glue specification that describes how to name the tables -#' within the output. This can use `{.table}` to stand for the table name, and +#' @param .names +#' `r lifecycle::badge("experimental")` +#' +#' A glue specification that describes how to name the tables +#' within the output, currently only for MSSQL, Postgres and MySQL/MariaDB. +#' This can use `{.table}` to stand for the table name, and #' `{.schema}` to stand for the name of the schema which the table lives #' within. The default (`NULL`) is equivalent to `"{.table}"` when a single #' schema is specified in `schema`, and `"{.schema}.{.table}"` for the case -#' where multiple schemas are given. +#' where multiple schemas are given, and may change in future versions. #' @param ... `r lifecycle::badge("experimental")` #' #' Additional parameters for the schema learning query. diff --git a/man/dm_from_con.Rd b/man/dm_from_con.Rd index 1cd4cd30bb..7b2d79b40a 100644 --- a/man/dm_from_con.Rd +++ b/man/dm_from_con.Rd @@ -24,12 +24,15 @@ foreign keys from the database. Currently works only for Postgres and SQL Server databases. The default attempts to query and issues an informative message.} -\item{.names}{A glue specification that describes how to name the tables -within the output. This can use \code{{.table}} to stand for the table name, and +\item{.names}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +A glue specification that describes how to name the tables +within the output, currently only for MSSQL, Postgres and MySQL/MariaDB. +This can use \code{{.table}} to stand for the table name, and \code{{.schema}} to stand for the name of the schema which the table lives within. The default (\code{NULL}) is equivalent to \code{"{.table}"} when a single schema is specified in \code{schema}, and \code{"{.schema}.{.table}"} for the case -where multiple schemas are given.} +where multiple schemas are given, and may change in future versions.} \item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} From d6cb0c8663953609657ddfdc6e2debdd745288fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 12 Jul 2023 00:59:20 +0200 Subject: [PATCH 13/13] Don't inform in base case --- R/db-helpers.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/db-helpers.R b/R/db-helpers.R index 8350897ed7..eb9a4be1e3 100644 --- a/R/db-helpers.R +++ b/R/db-helpers.R @@ -162,7 +162,6 @@ get_src_tbl_names <- function(src, schema = NULL, dbname = NULL, names = NULL) { names_pattern <- names } else if (length(schema) == 1) { names_pattern <- "{.table}" - cli::cli_inform('Using {.code .names = "{names_pattern}"}') } else { names_pattern <- "{.schema}.{.table}" cli::cli_inform('Using {.code .names = "{names_pattern}"}')