diff --git a/.Rbuildignore b/.Rbuildignore index c97d6d7..1b7bc33 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -29,8 +29,8 @@ ^CRAN-RELEASE$ ^cran-comments\.md$ ^data/abstracts_topicmodels\.rda$ -^data/abstracts_seededlda\.rda$ ^data/abstracts_unseededlda\.rda$ +^data/abstracts_keyatm\.rda$ ^data/abstracts_warplda\.rda$ ^data/abstracts_stm\.rda$ ^tests/testthat/apps/ diff --git a/DESCRIPTION b/DESCRIPTION index 66b813d..80a1588 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: oolong Title: Create Validation Tests for Automated Content Analysis -Version: 0.5.1 +Version: 0.6.0 Authors@R: c(person(given = "Chung-hong", family = "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), person(given = "Marius", family = "Sältzer", role = c("aut"), email = "msaeltze@mail.uni-mannheim.de", comment = c(ORCID = "0000-0002-8604-4666"))) @@ -10,9 +10,9 @@ Encoding: UTF-8 URL: https://gesistsa.github.io/oolong, https://github.com/gesistsa/oolong LazyData: true Depends: - R (>= 4.0) -Imports: - keyATM (>= 0.2.2), + R (>= 3.5.0) +Imports: + seededlda, purrr, tibble, shiny, @@ -26,14 +26,14 @@ Imports: stats, utils RoxygenNote: 7.3.1 -Suggests: +Suggests: + keyATM (>= 0.2.2), testthat (>= 3.0.2), text2vec (>= 0.6), BTM, dplyr, topicmodels, stm, - seededlda, covr, stringr, knitr, diff --git a/R/oolong.R b/R/oolong.R index 699f66d..ecc2b32 100644 --- a/R/oolong.R +++ b/R/oolong.R @@ -71,21 +71,23 @@ Oolong_test <- R6::R6Class( #' @return an oolong test object. #' @examples #' ## Creation of oolong test with only word intrusion test -#' data(abstracts_keyatm) +#' data(abstracts_seededlda) #' data(abstracts) -#' oolong_test <- wi(input_model = abstracts_keyatm, userid = "Hadley") +#' oolong_test <- wi(input_model = abstracts_seededlda, userid = "Hadley") #' ## Creation of oolong test with both word intrusion test and topic intrusion test -#' oolong_test <- witi(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Julia") +#' oolong_test <- witi(input_model = abstracts_seededlda, +#' input_corpus = abstracts$text, userid = "Julia") #' ## Creation of oolong test with topic intrusion test -#' oolong_test <- ti(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Jenny") +#' oolong_test <- ti(input_model = abstracts_seededlda, +#' input_corpus = abstracts$text, userid = "Jenny") #' ## Creation of oolong test with word set intrusion test -#' oolong_test <- wsi(input_model = abstracts_keyatm, userid = "Garrett") +#' oolong_test <- wsi(input_model = abstracts_seededlda, userid = "Garrett") #' ## Creation of gold standard #' oolong_test <- gs(input_corpus = trump2k, userid = "Yihui") #' ## Using create_oolong(); not recommended -#' oolong_test <- create_oolong(input_model = abstracts_keyatm, +#' oolong_test <- create_oolong(input_model = abstracts_seededlda, #' input_corpus = abstracts$text, userid = "JJ") -#' oolong_test <- create_oolong(input_model = abstracts_keyatm, +#' oolong_test <- create_oolong(input_model = abstracts_seededlda, #' input_corpus = abstracts$text, userid = "Mara", type = "ti") #' oolong_test <- create_oolong(input_corpus = abstracts$text, userid = "Winston", type = "gs") #' @author Chung-hong Chan, Marius Sältzer diff --git a/R/oolong_btm.R b/R/oolong_btm.R index bb7aa35..6d28027 100644 --- a/R/oolong_btm.R +++ b/R/oolong_btm.R @@ -4,7 +4,7 @@ K <- input_model$K better_theta <- dirty_theta[match(quanteda::docid(input_corpus), row.names(dirty_theta)),] ## replace NA value with ambiguous theta, i.e. 1/K - better_theta[is.na(better_theta)] <- 1/K + better_theta[is.na(better_theta)] <- 1 / K rownames(better_theta) <- quanteda::docid(input_corpus) return(better_theta) } @@ -13,7 +13,7 @@ input_model <- input_model_s3$model K <- input_model$K V <- input_model$W - terms <- t(apply(input_model$phi, MARGIN = 2, FUN = function(x){ + terms <- t(apply(input_model$phi, MARGIN = 2, FUN = function(x) { x <- data.frame(token = names(x), probability = x) x <- x[order(x$probability, decreasing = TRUE), ] x <- x$token @@ -21,11 +21,11 @@ })) all_terms <- unique(as.vector(terms[,seq_len(n_top_terms)])) if (need_topic) { - if (is.null(input_corpus) | is.null(btm_dataframe) | !"corpus" %in% class(input_corpus)) { + if (is.null(input_corpus) || is.null(btm_dataframe) || !"corpus" %in% class(input_corpus)) { .cstop(TRUE, "You need to provide input_corpus (in quanteda format) and btm_dataframe for generating topic intrusion tests.") } model_terms <- terms[, seq_len(n_topiclabel_words)] - theta <- .generate_btm_theta(input_model, btm_dataframe, input_corpus) + theta <- .generate_btm_theta(input_model, btm_dataframe, input_corpus) } else { model_terms <- NULL theta <- NULL diff --git a/R/oolong_data_misc.R b/R/oolong_data_misc.R index dfe34a3..7800192 100644 --- a/R/oolong_data_misc.R +++ b/R/oolong_data_misc.R @@ -15,9 +15,9 @@ #' Topic models trained with the abstracts dataset. #' #' These are topic models trained with different topic model packages. -"abstracts_keyatm" +"abstracts_seededlda" -#' @rdname abstracts_keyatm +#' @rdname abstracts_seededlda "abstracts_btm" #' AFINN dictionary @@ -44,7 +44,7 @@ #' @importFrom quanteda print corpus NULL -utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'avg_answer', 'abstracts_keyatm', 'abstracts')) +utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'avg_answer', 'abstracts_seededlda', 'abstracts')) ### print the ... if boolean_test is true .cp <- function(boolean_test, ...) { @@ -89,4 +89,3 @@ utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'a } return(digest::digest(x, algo = "sha1")) } - diff --git a/R/oolong_update.R b/R/oolong_update.R index 8354ed1..60c924f 100644 --- a/R/oolong_update.R +++ b/R/oolong_update.R @@ -42,7 +42,7 @@ update_oolong <- function(oolong, verbose = TRUE) { .cstop(!.check_oolong(oolong$.__enclos_env__$private), "This oolong object does not need to be updated.") if ("oolong_test_tm" %in% class(oolong)) { ## generate a dummy oolong object - new_oolong <- create_oolong(abstracts_keyatm) + new_oolong <- create_oolong(abstracts_seededlda) new_oolong$.__enclos_env__$private$finalized <- oolong$.__enclos_env__$private$finalized new_oolong$.__enclos_env__$private$test_content <- oolong$.__enclos_env__$private$test_content ## renaming test_content @@ -65,7 +65,7 @@ update_oolong <- function(oolong, verbose = TRUE) { new_oolong$.__enclos_env__$private$construct <- oolong$.__enclos_env__$private$construct new_oolong$.__enclos_env__$private$hash <- .safe_hash(new_oolong$.__enclos_env__$private$test_content) new_oolong$.__enclos_env__$private$hash_input_corpus <- oolong$.__enclos_env__$private$hash_input_corpus - new_oolong$.__enclos_env__$private$meta <- .generate_meta() + new_oolong$.__enclos_env__$private$meta <- .generate_meta() } if (is.null(new_oolong$userid)) { new_oolong$userid <- NA diff --git a/man/abstracts_keyatm.Rd b/man/abstracts_seededlda.Rd similarity index 68% rename from man/abstracts_keyatm.Rd rename to man/abstracts_seededlda.Rd index b46021f..9ee2c46 100644 --- a/man/abstracts_keyatm.Rd +++ b/man/abstracts_seededlda.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/oolong_data_misc.R \docType{data} -\name{abstracts_keyatm} -\alias{abstracts_keyatm} +\name{abstracts_seededlda} +\alias{abstracts_seededlda} \alias{abstracts_btm} \title{Topic models trained with the abstracts dataset.} \format{ -An object of class \code{keyATM_output} (inherits from \code{base}, \code{list}) of length 18. +An object of class \code{textmodel_lda} (inherits from \code{textmodel}, \code{list}) of length 10. An object of class \code{BTM} of length 9. } \usage{ -abstracts_keyatm +abstracts_seededlda abstracts_btm } diff --git a/man/create_oolong.Rd b/man/create_oolong.Rd index fd6fd49..4020988 100644 --- a/man/create_oolong.Rd +++ b/man/create_oolong.Rd @@ -171,21 +171,23 @@ For more details, please see the overview vignette: \code{vignette("overview", p \examples{ ## Creation of oolong test with only word intrusion test -data(abstracts_keyatm) +data(abstracts_seededlda) data(abstracts) -oolong_test <- wi(input_model = abstracts_keyatm, userid = "Hadley") +oolong_test <- wi(input_model = abstracts_seededlda, userid = "Hadley") ## Creation of oolong test with both word intrusion test and topic intrusion test -oolong_test <- witi(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Julia") +oolong_test <- witi(input_model = abstracts_seededlda, +input_corpus = abstracts$text, userid = "Julia") ## Creation of oolong test with topic intrusion test -oolong_test <- ti(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Jenny") +oolong_test <- ti(input_model = abstracts_seededlda, +input_corpus = abstracts$text, userid = "Jenny") ## Creation of oolong test with word set intrusion test -oolong_test <- wsi(input_model = abstracts_keyatm, userid = "Garrett") +oolong_test <- wsi(input_model = abstracts_seededlda, userid = "Garrett") ## Creation of gold standard oolong_test <- gs(input_corpus = trump2k, userid = "Yihui") ## Using create_oolong(); not recommended -oolong_test <- create_oolong(input_model = abstracts_keyatm, +oolong_test <- create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = "JJ") -oolong_test <- create_oolong(input_model = abstracts_keyatm, +oolong_test <- create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = "Mara", type = "ti") oolong_test <- create_oolong(input_corpus = abstracts$text, userid = "Winston", type = "gs") } diff --git a/tests/testthat.R b/tests/testthat.R index 8edddea..1448f6d 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library(testthat) library(oolong) -test_check("oolong", report = "minimal") +test_check("oolong") diff --git a/tests/testthat/_snaps/printing.md b/tests/testthat/_snaps/printing.md index 32c1f6b..c9d7967 100644 --- a/tests/testthat/_snaps/printing.md +++ b/tests/testthat/_snaps/printing.md @@ -72,7 +72,7 @@ # ti only Code - create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text, + create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, type = "ti") Message @@ -88,7 +88,7 @@ # wsi only Code - create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text, + create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, type = "wsi", wsi_n_top_terms = 100) Message diff --git a/tests/testthat/apps/ti/app.R b/tests/testthat/apps/ti/app.R index 99a8991..daf3449 100644 --- a/tests/testthat/apps/ti/app.R +++ b/tests/testthat/apps/ti/app.R @@ -1,5 +1,5 @@ library(oolong) -x <- ti(abstracts_keyatm, abstracts$text, exact_n = 10) +x <- ti(abstracts_seededlda, abstracts$text, exact_n = 10) x$do_topic_intrusion_test() diff --git a/tests/testthat/apps/wi/app.R b/tests/testthat/apps/wi/app.R index bd4ea60..44e7822 100644 --- a/tests/testthat/apps/wi/app.R +++ b/tests/testthat/apps/wi/app.R @@ -1,5 +1,5 @@ library(oolong) -x <- wi(abstracts_keyatm) +x <- wi(abstracts_seededlda) x$do_word_intrusion_test() diff --git a/tests/testthat/apps/wsi/app.R b/tests/testthat/apps/wsi/app.R index d651236..c29f09e 100644 --- a/tests/testthat/apps/wsi/app.R +++ b/tests/testthat/apps/wsi/app.R @@ -1,5 +1,5 @@ require(oolong) -x <- wsi(abstracts_keyatm) +x <- wsi(abstracts_seededlda) x$do_word_set_intrusion_test() diff --git a/tests/testthat/test-defensive_programming.R b/tests/testthat/test-defensive_programming.R index bccae14..cddc462 100644 --- a/tests/testthat/test-defensive_programming.R +++ b/tests/testthat/test-defensive_programming.R @@ -19,10 +19,10 @@ test_that("precondiction", { test_that("locking", { ## premature locking - x <- create_oolong(abstracts_keyatm) + x <- create_oolong(abstracts_seededlda) expect_error(x$lock()) expect_error(x$lock(force = TRUE), NA) - x <- create_oolong(abstracts_keyatm, abstracts$text) + x <- create_oolong(abstracts_seededlda, abstracts$text) expect_error(x$lock()) ## error when only word intrusion test is done. x <- genius_word(x) @@ -32,16 +32,16 @@ test_that("locking", { }) test_that("cloning", { - x <- create_oolong(abstracts_keyatm) + x <- create_oolong(abstracts_seededlda) expect_error(clone_oolong(x), NA) x <- genius_word(x) ## Cannot clone a partially coded object. expect_error(clone_oolong(x)) - x <- create_oolong(abstracts_keyatm) + x <- create_oolong(abstracts_seededlda) x$lock(force = TRUE) expect_error(clone_oolong(x)) ## Cloned object is not sharing the same private space. - x <- create_oolong(abstracts_keyatm) + x <- create_oolong(abstracts_seededlda) y <- clone_oolong(x) x$lock(force = TRUE) expect_true(x$.__enclos_env__$private$finalized) @@ -50,31 +50,31 @@ test_that("cloning", { test_that("cloning all types", { ## pure wi - x <- wi(abstracts_keyatm) + x <- wi(abstracts_seededlda) expect_error(clone_oolong(x), NA) x$.__enclos_env__$private$test_content$wi$answer[1] <- "x" expect_error(clone_oolong(x)) ## pure ti - x <- ti(abstracts_keyatm, abstracts$text) + x <- ti(abstracts_seededlda, abstracts$text) expect_error(clone_oolong(x), NA) x$.__enclos_env__$private$test_content$ti$answer[1] <- "x" expect_error(clone_oolong(x)) ## witi - x <- witi(abstracts_keyatm, abstracts$text) + x <- witi(abstracts_seededlda, abstracts$text) expect_error(clone_oolong(x), NA) x$.__enclos_env__$private$test_content$ti$answer[1] <- "x" expect_error(clone_oolong(x)) - x <- witi(abstracts_keyatm, abstracts$text) + x <- witi(abstracts_seededlda, abstracts$text) expect_error(clone_oolong(x), NA) x$.__enclos_env__$private$test_content$wi$answer[1] <- "x" expect_error(clone_oolong(x)) - x <- witi(abstracts_keyatm, abstracts$text) + x <- witi(abstracts_seededlda, abstracts$text) expect_error(clone_oolong(x), NA) x$.__enclos_env__$private$test_content$wi$answer[1] <- "x" x$.__enclos_env__$private$test_content$ti$answer[1] <- "x" expect_error(clone_oolong(x)) ## wsi - x <- wsi(abstracts_keyatm) + x <- wsi(abstracts_seededlda) expect_error(clone_oolong(x), NA) x$.__enclos_env__$private$test_content$wsi$answer[1] <- "x" expect_error(clone_oolong(x)) @@ -82,39 +82,39 @@ test_that("cloning all types", { x <- gs(abstracts$text) expect_error(clone_oolong(x), NA) x$.__enclos_env__$private$test_content$gs$answer[1] <- 1 - expect_error(clone_oolong(x)) + expect_error(clone_oolong(x)) }) test_that("Can't launch $do_topic_intrusion_test() when no test content", { - x <- create_oolong(abstracts_keyatm) + x <- create_oolong(abstracts_seededlda) expect_error(x$do_topic_intrusion_test()) }) test_that("Can't launch $do_word_set_intrusion_test() when no test content", { - x <- create_oolong(abstracts_keyatm) + x <- create_oolong(abstracts_seededlda) expect_error(x$do_word_set_intrusion_test()) }) test_that("Can't launch $do_word_intrusion_test() when no test content", { - x <- wsi(abstracts_keyatm) + x <- wsi(abstracts_seededlda) expect_error(x$do_word_intrusion_test()) }) test_that("hash function", { expect_true(is.null(.safe_hash(NULL))) - expect_type(.safe_hash(abstracts_keyatm), "character") + expect_type(.safe_hash(abstracts_seededlda), "character") }) test_that("hash_input_model tm", { ## TI - x <- create_oolong(abstracts_keyatm) + x <- create_oolong(abstracts_seededlda) expect_false(is.null(x$.__enclos_env__$private$hash_input_model)) - expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_keyatm)) + expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_seededlda)) ## WITI - x <- create_oolong(abstracts_keyatm, abstracts$text) + x <- create_oolong(abstracts_seededlda, abstracts$text) expect_false(is.null(x$.__enclos_env__$private$hash_input_model)) expect_false(is.null(x$.__enclos_env__$private$hash_input_corpus)) - expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_keyatm)) + expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_seededlda)) expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text)) }) @@ -122,16 +122,16 @@ test_that("hash_input_corpus gs", { x <- create_oolong(input_corpus = abstracts$text) expect_true(is.null(x$.__enclos_env__$private$hash_input_model)) expect_false(is.null(x$.__enclos_env__$private$hash_input_corpus)) - expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text)) + expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text)) }) test_that("invalid type", { - expect_error(create_oolong(abstracts_keyatm, type = "1111")) - expect_error(create_oolong(abstracts_keyatm, type = NA)) + expect_error(create_oolong(abstracts_seededlda, type = "1111")) + expect_error(create_oolong(abstracts_seededlda, type = NA)) }) test_that("userid", { - expect_error(wi(abstracts_keyatm, userid = c("a", "b"))) - expect_error(wi(abstracts_keyatm, userid = "a"), NA) + expect_error(wi(abstracts_seededlda, userid = c("a", "b"))) + expect_error(wi(abstracts_seededlda, userid = "a"), NA) expect_error(wsi(abtracts_stm, abstracts$text)) }) diff --git a/tests/testthat/test-deploy.R b/tests/testthat/test-deploy.R index 4cafcfc..e00c6c0 100644 --- a/tests/testthat/test-deploy.R +++ b/tests/testthat/test-deploy.R @@ -16,24 +16,24 @@ genius_wsi <- function(obj1) { test_that("preconditions witi", { - expect_error(deploy_oolong(witi(abstracts_keyatm, abstracts$text))) - expect_error(export_oolong(witi(abstracts_keyatm, abstracts$text))) + expect_error(deploy_oolong(witi(abstracts_seededlda, abstracts$text))) + expect_error(export_oolong(witi(abstracts_seededlda, abstracts$text))) }) test_that("preconditions coded", { - x <- wi(abstracts_keyatm) + x <- wi(abstracts_seededlda) expect_error(deploy_oolong(x), NA) expect_error(export_oolong(x, verbose = FALSE), NA) x <- genius_word(x) expect_error(deploy_oolong(x)) expect_error(export_oolong(x)) - x <- ti(abstracts_keyatm, abstracts$text) + x <- ti(abstracts_seededlda, abstracts$text) expect_error(deploy_oolong(x), NA) expect_error(export_oolong(x, verbose = FALSE), NA) x <- genius_topic(x) expect_error(deploy_oolong(x)) expect_error(export_oolong(x, verbose = FALSE)) - x <- wsi(abstracts_keyatm) + x <- wsi(abstracts_seededlda) expect_error(deploy_oolong(x), NA) expect_error(export_oolong(x, verbose = FALSE), NA) x <- genius_wsi(x) @@ -48,15 +48,15 @@ test_that("preconditions coded", { }) test_that("preconditions locked", { - x <- wi(abstracts_keyatm) + x <- wi(abstracts_seededlda) x$lock(force = TRUE) expect_error(deploy_oolong(x)) expect_error(export_oolong(x)) - x <- ti(abstracts_keyatm, abstracts$text) + x <- ti(abstracts_seededlda, abstracts$text) x$lock(force = TRUE) expect_error(deploy_oolong(x)) expect_error(export_oolong(x)) - x <- wsi(abstracts_keyatm) + x <- wsi(abstracts_seededlda) x$lock(force = TRUE) expect_error(deploy_oolong(x)) expect_error(export_oolong(x)) @@ -69,9 +69,9 @@ test_that("preconditions locked", { test_that("export_app dir", { ## new dir newdir <- paste0(tempdir(), "/test") - expect_error(export_oolong(wsi(abstracts_keyatm), dir = newdir, verbose = FALSE), NA) + expect_error(export_oolong(wsi(abstracts_seededlda), dir = newdir, verbose = FALSE), NA) ## existing dir olddir <- newdir - expect_error(export_oolong(wsi(abstracts_keyatm), dir = olddir, verbose = FALSE), NA) + expect_error(export_oolong(wsi(abstracts_seededlda), dir = olddir, verbose = FALSE), NA) unlink(olddir, recursive = TRUE) }) diff --git a/tests/testthat/test-generate_test_content.R b/tests/testthat/test-generate_test_content.R index a1c4238..7b20b94 100644 --- a/tests/testthat/test-generate_test_content.R +++ b/tests/testthat/test-generate_test_content.R @@ -1,48 +1,48 @@ ## context("check helper functions") test_that("generate_test_content", { - x <- oolong:::.generate_test_content(abstracts_keyatm) + x <- oolong:::.generate_test_content(abstracts_seededlda) expect_null(x$ti) - x <- oolong:::.generate_test_content(abstracts_keyatm, quanteda::corpus(abstracts$text)) + x <- oolong:::.generate_test_content(abstracts_seededlda, quanteda::corpus(abstracts$text)) expect_false(is.null(x$ti)) }) test_that("check_complete", { - x <- oolong:::.generate_test_content(abstracts_keyatm) + x <- oolong:::.generate_test_content(abstracts_seededlda) expect_false(oolong:::.check_test_content_complete(x)) x$wi$answer <- 1 expect_true(oolong:::.check_test_content_complete(x)) - y <- oolong:::.generate_test_content(abstracts_keyatm, abstracts$text) + y <- oolong:::.generate_test_content(abstracts_seededlda, abstracts$text) expect_false(oolong:::.check_test_content_complete(y)) y$ti$answer <- 1 expect_false(oolong:::.check_test_content_complete(y)) y$wi$answer <- 1 expect_true(oolong:::.check_test_content_complete(y)) - z <- create_oolong(abstracts_keyatm) + z <- create_oolong(abstracts_seededlda) expect_error(z$lock()) }) test_that("K is too small", { - expect_error(oolong:::.generate_test_content(abstracts_keyatm, abstracts$text, n_top_topics = 20)) + expect_error(oolong:::.generate_test_content(abstracts_seededlda, abstracts$text, n_top_topics = 20)) }) test_that("n_top_terms is considered #29", { - z <- oolong:::.generate_test_content(abstracts_keyatm, abstracts$text, n_top_terms = 10) + z <- oolong:::.generate_test_content(abstracts_seededlda, abstracts$text, n_top_terms = 10) expect_equal(length(z$wi$candidates[[1]]), 11) }) test_that("generate_test_content type", { - x <- oolong:::.generate_test_content(abstracts_keyatm, input_corpus = abstracts$text, type = 'wi') + x <- oolong:::.generate_test_content(abstracts_seededlda, input_corpus = abstracts$text, type = 'wi') expect_null(x$ti) - x <- oolong:::.generate_test_content(abstracts_keyatm, input_corpus = abstracts$text, type = 'ti') + x <- oolong:::.generate_test_content(abstracts_seededlda, input_corpus = abstracts$text, type = 'ti') expect_null(x$wi) }) test_that("generate_test_content wsi", { - x <- oolong:::.generate_test_content(abstracts_keyatm, input_corpus = abstracts$text, n_topiclabel_words = 4, type = 'wsi') + x <- oolong:::.generate_test_content(abstracts_seededlda, input_corpus = abstracts$text, n_topiclabel_words = 4, type = 'wsi') expect_null(x$ti) expect_null(x$wi) expect_false(is.null(x$wsi)) - expect_error(oolong:::.generate_test_content(abstracts_keyatm, n_correct_ws = 10, n_topiclabel_words = 8, type = 'wsi')) - expect_error(oolong:::.generate_test_content(abstracts_keyatm, n_correct_ws = 10, n_topiclabel_words = 8, wsi_n_top_terms = 100, type = 'wsi'), NA) + expect_error(oolong:::.generate_test_content(abstracts_seededlda, n_correct_ws = 10, n_topiclabel_words = 8, type = 'wsi')) + expect_error(oolong:::.generate_test_content(abstracts_seededlda, n_correct_ws = 10, n_topiclabel_words = 8, wsi_n_top_terms = 100, type = 'wsi'), NA) }) diff --git a/tests/testthat/test-gold-standard.R b/tests/testthat/test-gold-standard.R index b96273e..8048cf5 100644 --- a/tests/testthat/test-gold-standard.R +++ b/tests/testthat/test-gold-standard.R @@ -31,6 +31,6 @@ test_that("locking", { }) test_that("type override input_model", { - x <- create_oolong(abstracts_keyatm, abstracts$text, type = "gs") + x <- create_oolong(abstracts_seededlda, abstracts$text, type = "gs") expect_true("oolong_test_gs" %in% class(x)) }) diff --git a/tests/testthat/test-keyatm.R b/tests/testthat/test-keyatm.R index a3f3b09..aa3e6f3 100644 --- a/tests/testthat/test-keyatm.R +++ b/tests/testthat/test-keyatm.R @@ -12,12 +12,14 @@ genius_topic <- function(obj1) { test_that("generate_test_content", { skip_on_cran() + skip_if_not(exists("abstracts_keyatm")) x <- oolong:::.generate_test_content(abstracts_keyatm) expect_null(x$ti) }) test_that("check_complete", { skip_on_cran() + skip_if_not(exists("abstracts_keyatm")) x <- oolong:::.generate_test_content(abstracts_keyatm) expect_false(oolong:::.check_test_content_complete(x)) x$wi$answer <- 1 diff --git a/tests/testthat/test-multiple_objs.R b/tests/testthat/test-multiple_objs.R index 237418b..df6e209 100644 --- a/tests/testthat/test-multiple_objs.R +++ b/tests/testthat/test-multiple_objs.R @@ -11,7 +11,7 @@ genius_topic <- function(obj1) { } test_that("defensive programming", { - obj1 <- create_oolong(abstracts_keyatm) + obj1 <- create_oolong(abstracts_seededlda) obj2 <- clone_oolong(obj1) ### Not lock expect_error(summarize_oolong(obj1)) @@ -21,10 +21,10 @@ test_that("defensive programming", { expect_error(summarize_oolong(obj1, obj2)) ### Testing checking hash. set.seed(1212112) - obj1 <- create_oolong(abstracts_keyatm) + obj1 <- create_oolong(abstracts_seededlda) obj2 <- clone_oolong(obj1) set.seed(12121999) - obj3 <- create_oolong(abstracts_keyatm) + obj3 <- create_oolong(abstracts_seededlda) obj1 <- genius_word(obj1) obj2 <- genius_word(obj2) obj3 <- genius_word(obj3) @@ -36,8 +36,7 @@ test_that("defensive programming", { ## obj2 is a clone of obj1 expect_error(summarise_oolong(obj1, obj2), NA) ## Warning about premature locking - obj1 <- create_oolong(abstracts_keyatm) + obj1 <- create_oolong(abstracts_seededlda) obj1$lock(force = TRUE) expect_warning(summarize_oolong(obj1)) }) - diff --git a/tests/testthat/test-printing.R b/tests/testthat/test-printing.R index 3af7c26..62e8dd6 100644 --- a/tests/testthat/test-printing.R +++ b/tests/testthat/test-printing.R @@ -28,7 +28,7 @@ test_that("gs_turngold", { }) test_that("check_calculation_topic_intrusion_multiobject (Printing)", { - obj1 <- create_oolong(abstracts_keyatm, abstracts$text, exact_n = 10) + obj1 <- create_oolong(abstracts_seededlda, abstracts$text, exact_n = 10) obj2 <- clone_oolong(obj1) obj1 <- genius_word(obj1) obj1 <- genius_topic(obj1) @@ -41,15 +41,15 @@ test_that("check_calculation_topic_intrusion_multiobject (Printing)", { }) test_that("ti only", { - expect_snapshot(create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text, type = "ti")) + expect_snapshot(create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, type = "ti")) }) test_that("wsi only", { - expect_snapshot(create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text, type = "wsi", wsi_n_top_terms = 100)) + expect_snapshot(create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, type = "wsi", wsi_n_top_terms = 100)) }) test_that("check_calculation_wsi_multiobject (printing)", { - obj1 <- wsi(abstracts_keyatm) + obj1 <- wsi(abstracts_seededlda) obj2 <- clone_oolong(obj1) obj3 <- clone_oolong(obj1) ## Mocking coding @@ -70,7 +70,7 @@ test_that("check_calculation_wsi_multiobject (printing)", { test_that("export printing", { skip_on_cran() - obj1 <- wsi(abstracts_keyatm) + obj1 <- wsi(abstracts_seededlda) newdir <- "~/oolong_testing" expect_snapshot(export_oolong(obj1, dir = newdir, verbose = TRUE, use_full_path = FALSE)) expect_snapshot(export_oolong(obj1, dir = newdir, verbose = FALSE, use_full_path = FALSE)) diff --git a/tests/testthat/test-seededlda.R b/tests/testthat/test-seededlda.R index 47fe1e6..647df88 100644 --- a/tests/testthat/test-seededlda.R +++ b/tests/testthat/test-seededlda.R @@ -13,8 +13,6 @@ genius_topic <- function(obj1) { ## seeded lda test_that("seeded lda: generate_test_content", { - skip_on_cran() - skip_if_not(exists("abstracts_seededlda")) x <- oolong:::.generate_test_content(abstracts_seededlda) expect_null(x$ti) x <- oolong:::.generate_test_content(abstracts_seededlda, quanteda::corpus(abstracts$text)) @@ -22,8 +20,6 @@ test_that("seeded lda: generate_test_content", { }) test_that("seeded lda: check_complete", { - skip_on_cran() - skip_if_not(exists("abstracts_seededlda")) x <- oolong:::.generate_test_content(abstracts_seededlda) expect_false(oolong:::.check_test_content_complete(x)) x$wi$answer <- 1 @@ -40,7 +36,7 @@ test_that("seeded lda: check_complete", { test_that("unseeded lda: generate_test_content", { skip_on_cran() - skip_if_not(exists("abstracts_seededlda")) + skip_if_not(exists("abstracts_unseededlda")) x <- oolong:::.generate_test_content(abstracts_unseededlda) expect_null(x$ti) x <- oolong:::.generate_test_content(abstracts_unseededlda, quanteda::corpus(abstracts$text)) @@ -49,7 +45,7 @@ test_that("unseeded lda: generate_test_content", { test_that("unseeded lda: check_complete", { skip_on_cran() - skip_if_not(exists("abstracts_seededlda")) + skip_if_not(exists("abstracts_unseededlda")) x <- oolong:::.generate_test_content(abstracts_unseededlda) expect_false(oolong:::.check_test_content_complete(x)) x$wi$answer <- 1 @@ -66,7 +62,7 @@ test_that("unseeded lda: check_complete", { ## ui test_that("UI", { - skip_if_not(exists("abstracts_seededlda")) + skip_if_not(exists("abstracts_unseededlda")) expect_error(witi(abstracts_seededlda, abstracts$text), NA) expect_error(witi(abstracts_unseededlda, abstracts$text), NA) expect_error(ti(abstracts_seededlda, abstracts$text), NA) diff --git a/tests/testthat/test-shiny.R b/tests/testthat/test-shiny.R index 054a8fd..d2bd4b0 100644 --- a/tests/testthat/test-shiny.R +++ b/tests/testthat/test-shiny.R @@ -18,7 +18,7 @@ SLEEPTIME <- 0.5 test_that("Click of death bug #51", { skip_on_cran() dir <- tempdir() - x <- wi(abstracts_keyatm) + x <- wi(abstracts_seededlda) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) test$click("confirm") @@ -36,13 +36,13 @@ test_that("Click of death bug #51", { test_that("launchable", { skip_on_cran() dir <- tempdir() - x <- wi(abstracts_keyatm) + x <- wi(abstracts_seededlda) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) Sys.sleep(SLEEPTIME) expect_error(test$get_values(), NA) test$stop() - x <- wsi(abstracts_keyatm) + x <- wsi(abstracts_seededlda) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) Sys.sleep(SLEEPTIME) @@ -54,7 +54,7 @@ test_that("launchable", { Sys.sleep(SLEEPTIME) expect_error(test$get_values(), NA) test$stop() - x <- ti(abstracts_keyatm, abstracts$text) + x <- ti(abstracts_seededlda, abstracts$text) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) Sys.sleep(SLEEPTIME) @@ -65,7 +65,7 @@ test_that("launchable", { test_that("Downloading", { skip_on_cran() dir <- tempdir() - x <- wi(abstracts_keyatm) + x <- wi(abstracts_seededlda) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) for (i in 1:10) { @@ -113,7 +113,7 @@ nextq <- function(test, k = 10) { test_that("wi next q & ff (exported)", { skip_on_cran() dir <- tempdir() - x <- wi(abstracts_keyatm) + x <- wi(abstracts_seededlda) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) nextq(test) @@ -135,7 +135,7 @@ test_that("wi next q & ff (native)", { test_that("wsi next q & ff (exported)", { skip_on_cran() dir <- tempdir() - x <- wsi(abstracts_keyatm) + x <- wsi(abstracts_seededlda) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) nextq(test) @@ -172,7 +172,7 @@ test_ti <- function(test, k = 10) { test_that("ti (exported)", { skip_on_cran() dir <- tempdir() - x <- ti(abstracts_keyatm, abstracts$text, exact_n = 10) + x <- ti(abstracts_seededlda, abstracts$text, exact_n = 10) export_oolong(x, dir = dir, verbose = FALSE) test <- AppDriver$new(dir) test_ti(test) @@ -232,4 +232,3 @@ test_that("gs (native)", { expect_false(is.null(test$get_value(input = "done"))) test$stop() }) - diff --git a/tests/testthat/test-summarize_oolong.R b/tests/testthat/test-summarize_oolong.R index 48c1c81..ef64c46 100644 --- a/tests/testthat/test-summarize_oolong.R +++ b/tests/testthat/test-summarize_oolong.R @@ -16,7 +16,7 @@ genius_wsi <- function(obj1) { } test_that("Correct UI", { - obj1 <- create_oolong(abstracts_keyatm) + obj1 <- create_oolong(abstracts_seededlda) obj2 <- create_oolong(input_corpus = trump2k, exact_n = 20) obj1 <- genius_word(obj1) obj1$lock() @@ -29,7 +29,7 @@ test_that("Correct UI", { }) test_that("check_calculation_word_intrusion_multiobject", { - obj1 <- create_oolong(abstracts_keyatm) + obj1 <- create_oolong(abstracts_seededlda) obj2 <- clone_oolong(obj1) obj3 <- clone_oolong(obj1) ## Mocking coding @@ -51,14 +51,14 @@ test_that("check_calculation_word_intrusion_multiobject", { test_that("check_calculation_word_intrusion_single_object", { - obj1 <- create_oolong(abstracts_keyatm) + obj1 <- create_oolong(abstracts_seededlda) obj1 <- genius_word(obj1) obj1$lock() expect_error(summarize_oolong(obj1), NA) }) test_that("check_calculation_witi_single_object", { - obj1 <- create_oolong(abstracts_keyatm, abstracts$text) + obj1 <- create_oolong(abstracts_seededlda, abstracts$text) obj1 <- genius_word(obj1) obj1 <- genius_topic(obj1) obj1$lock() @@ -66,7 +66,7 @@ test_that("check_calculation_witi_single_object", { }) test_that("check_calculation_witi_multiobject", { - obj1 <- create_oolong(abstracts_keyatm, abstracts$text, exact_n = 10) + obj1 <- create_oolong(abstracts_seededlda, abstracts$text, exact_n = 10) obj2 <- clone_oolong(obj1) obj1 <- genius_word(obj1) obj1 <- genius_topic(obj1) @@ -81,14 +81,14 @@ test_that("check_calculation_witi_multiobject", { }) test_that("check_calculation_ti_single_object", { - obj1 <- create_oolong(abstracts_keyatm, abstracts$text, type = "ti") + obj1 <- create_oolong(abstracts_seededlda, abstracts$text, type = "ti") obj1 <- genius_topic(obj1) obj1$lock() expect_error(summarize_oolong(obj1, n_iter = 100), NA) }) test_that("check_calculation_ti_multiobject", { - obj1 <- create_oolong(abstracts_keyatm, abstracts$text, exact_n = 10, type = "ti") + obj1 <- create_oolong(abstracts_seededlda, abstracts$text, exact_n = 10, type = "ti") obj2 <- clone_oolong(obj1) obj1 <- genius_topic(obj1) obj1$lock() @@ -102,7 +102,7 @@ test_that("check_calculation_ti_multiobject", { test_that("Forcibly locking", { - ex1 <- create_oolong(abstracts_keyatm, abstracts$text, exact_n = 100) + ex1 <- create_oolong(abstracts_seededlda, abstracts$text, exact_n = 100) ex2 <- clone_oolong(ex1) ex1 <- genius_word(ex1) ex1 <- genius_topic(ex1) @@ -116,7 +116,7 @@ test_that("Forcibly locking", { }) test_that("Monkeying problem #14", { - obj1 <- create_oolong(abstracts_keyatm, abstracts$text) + obj1 <- create_oolong(abstracts_seededlda, abstracts$text) obj1 <- genius_topic(obj1) obj1 <- genius_word(obj1) previous_answer <- obj1$.__enclos_env__$private$test_content$ti$answer @@ -128,7 +128,7 @@ test_that("Monkeying problem #14", { test_that("check_calculation_wsi_multiobject", { - obj1 <- wsi(abstracts_keyatm) + obj1 <- wsi(abstracts_seededlda) obj2 <- clone_oolong(obj1) obj3 <- clone_oolong(obj1) ## Mocking coding diff --git a/tests/testthat/test-update.R b/tests/testthat/test-update.R index 5804c62..102ecab 100644 --- a/tests/testthat/test-update.R +++ b/tests/testthat/test-update.R @@ -6,17 +6,17 @@ test_that("check_oolong", { expect_error(check_oolong(readRDS("../testdata/oolong_gs_prev4_locked.RDS"))) expect_error(check_oolong(readRDS("../testdata/massive_oolong_old.RDS"))) expect_error(check_oolong(readRDS("../testdata/massive_oolong.RDS")), NA) - expect_error(check_oolong(wi(abstracts_keyatm)), NA) - expect_error(check_oolong(witi(abstracts_keyatm, abstracts$text)), NA) - expect_error(check_oolong(wsi(abstracts_keyatm)), NA) + expect_error(check_oolong(wi(abstracts_seededlda)), NA) + expect_error(check_oolong(witi(abstracts_seededlda, abstracts$text)), NA) + expect_error(check_oolong(wsi(abstracts_seededlda)), NA) expect_error(check_oolong(gs(abstracts$text)), NA) }) test_that("update_oolong; needless update", { skip_on_cran() - expect_error(update_oolong(wi(abstracts_keyatm))) - expect_error(update_oolong(witi(abstracts_keyatm, abstracts$text))) - expect_error(update_oolong(wsi(abstracts_keyatm))) + expect_error(update_oolong(wi(abstracts_seededlda))) + expect_error(update_oolong(witi(abstracts_seededlda, abstracts$text))) + expect_error(update_oolong(wsi(abstracts_seededlda))) expect_error(update_oolong(gs(abstracts$text))) expect_error(update_oolong(readRDS("../testdata/massive_oolong.RDS"))) }) @@ -70,5 +70,5 @@ test_that("update_oolong; without meta", { expect_true(x$.__enclos_env__$private$finalized == y$.__enclos_env__$private$finalized) expect_false(is.null(x$.__enclos_env__$private$meta)) expect_true(is.na(x$userid)) - expect_equal(class(x), class(y)) + expect_equal(class(x), class(y)) }) diff --git a/vignettes/deploy.Rmd b/vignettes/deploy.Rmd index e6580f1..410cc5f 100644 --- a/vignettes/deploy.Rmd +++ b/vignettes/deploy.Rmd @@ -28,7 +28,7 @@ In this guide, we assume you want to deploy a word set intrusion test online. ```{r} library(oolong) -wsi_test <- wsi(abstracts_keyatm) +wsi_test <- wsi(abstracts_seededlda) wsi_test ``` diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd index e936a16..6ea9a92 100644 --- a/vignettes/overview.Rmd +++ b/vignettes/overview.Rmd @@ -47,7 +47,7 @@ install.packages("oolong") #### Word intrusion test -`abstracts_keyatm` is an example topic model trained with the data `abstracts` using the `keyATM` package. Currently, this package supports structural topic models / correlated topic models from `stm`, Warp LDA models from `text2vec` , LDA/CTM models from `topicmodels`, Biterm Topic Models from `BTM`, Keyword Assisted Topic Models from `keyATM`, and seeded LDA models from `seededlda`. Although not strictly a topic model, Naive Bayes models from `quanteda.textmodels` are also supported. See the section on [Naive Bayes](#about-naive-bayes) for more information. +`abstracts_seededlda` is an example topic model trained with the data `abstracts` using the `seededlda` package. Currently, this package supports structural topic models / correlated topic models from `stm`, Warp LDA models from `text2vec` , LDA/CTM models from `topicmodels`, Biterm Topic Models from `BTM`, Keyword Assisted Topic Models from `keyATM`, and seeded LDA models from `seededlda`. Although not strictly a topic model, Naive Bayes models from `quanteda.textmodels` are also supported. See the section on [Naive Bayes](#about-naive-bayes) for more information. ```{r} library(oolong) @@ -57,13 +57,13 @@ library(dplyr) ``` ```{r example} -abstracts_keyatm +abstracts_seededlda ``` To create an oolong test with word intrusion test, use the function `wi`. It is recommended to provide a user id of coder who are going to be doing the test. ```{r createtest} -oolong_test <- wi(abstracts_keyatm, userid = "Hadley") +oolong_test <- wi(abstracts_seededlda, userid = "Hadley") oolong_test ``` @@ -93,7 +93,7 @@ oolong_test Word set intrusion test is a variant of word intrusion test (Ying et al., 2021), in which multiple word sets generated from top terms of one topic are juxtaposed with one intruder word set generated similarly from another topic. In Ying et al., this test is called "R4WSI" because 4 word sets are displayed. By default, oolong generates also R4WSI. However, it is also possible to generate R(N)WSI by setting the parameter `n_correct_ws` to N - 1. ```{r wsi1} -oolong_test <- wsi(abstracts_keyatm, userid = "Garrett") +oolong_test <- wsi(abstracts_seededlda, userid = "Garrett") oolong_test ``` @@ -116,7 +116,7 @@ oolong_test #### Topic intrusion test -For example, `abstracts_keyatm` was generated with the corpus `abstracts$text` +For example, `abstracts_seededlda` was generated with the corpus `abstracts$text` ```{r newgroup5} library(tibble) @@ -126,7 +126,7 @@ abstracts Creating the oolong test object with the corpus used for training the topic model will generate topic intrusion test cases. ```{r createtest2} -oolong_test <- ti(abstracts_keyatm, abstracts$text, userid = "Julia") +oolong_test <- ti(abstracts_seededlda, abstracts$text, userid = "Julia") oolong_test ``` @@ -168,14 +168,14 @@ dfm(abstracts$text, tolower = TRUE, stem = TRUE, remove = stopwords('english'), Train a topic model. ```{r step0, eval = FALSE} -require(keyATM) -abstracts_keyatm <- keyATM(keyATM_read(abstracts_dfm), no_keyword_topics = 0, keywords = abstracts_dictionary, model = "base", options = list(seed = 46709394)) +require(seededlda) +abstracts_seededlda <- textmodel_seededlda(x = abstracts_dfm, dictionary = dictionary(abstracts_dictionary), seeds = 46709394, verbose = TRUE) ``` Create a new oolong object. ```{r step1} -oolong_test_rater1 <- witi(abstracts_keyatm, abstracts$text, userid = "Yihui") +oolong_test_rater1 <- witi(abstracts_seededlda, abstracts$text, userid = "Yihui") ``` Clone the oolong object to be used by other raters.