From 11ffcbb59c5ce77019306cd28e1f403d63461d6e Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Tue, 28 Jun 2022 14:13:13 +0200 Subject: [PATCH] Fix #67 Exclude 'abstracts_warplda' from `data` because it is R6. It creates problem when 'text2vec' (a suggested package) is not installed together with the main package, e.g. `install.packages("oolong")`. --- .Rbuildignore | 1 + R/oolong_data_misc.R | 3 - btm_gh.md | 13 +-- deploy_gh.md | 4 +- man/abstracts_keyatm.Rd | 5 -- overview_gh.Rmd | 31 ------- overview_gh.md | 145 +++++------------------------- tests/testthat/_snaps/printing.md | 32 +++++++ vig_body.Rmd | 31 ------- vignettes/overview.Rmd | 31 ------- 10 files changed, 63 insertions(+), 233 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 1325b1b..9dca73c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -31,6 +31,7 @@ ^data/abstracts_topicmodels\.rda$ ^data/abstracts_seededlda\.rda$ ^data/abstracts_unseededlda\.rda$ +^data/abstracts_warplda\.rda$ ^data/abstracts_stm\.rda$ ^tests/testthat/apps/ ^tests/testdata/downloaded$ diff --git a/R/oolong_data_misc.R b/R/oolong_data_misc.R index acec35c..dfe34a3 100644 --- a/R/oolong_data_misc.R +++ b/R/oolong_data_misc.R @@ -17,9 +17,6 @@ #' These are topic models trained with different topic model packages. "abstracts_keyatm" -#' @rdname abstracts_keyatm -"abstracts_warplda" - #' @rdname abstracts_keyatm "abstracts_btm" diff --git a/btm_gh.md b/btm_gh.md index 4df4642..fe43fbf 100644 --- a/btm_gh.md +++ b/btm_gh.md @@ -20,7 +20,7 @@ require(BTM) #> Loading required package: BTM require(quanteda) #> Loading required package: quanteda -#> Package version: 3.2.0 +#> Package version: 3.2.1 #> Unicode version: 13.0 #> ICU version: 66.1 #> Parallel computing: 8 of 8 threads used. @@ -63,9 +63,10 @@ trump_btm <- BTM(trump_dat, k = 8, iter = 500, trace = 10) ## Pecularities of BTM -This is how you should generate \(\theta_{t}\) . However, there are many -NaN and there are only 1994 rows (`trump2k` has 2000 tweets) due to -empty documents. +This is how you should generate +![\\theta\_{t}](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;%5Ctheta_%7Bt%7D +"\\theta_{t}") . However, there are many NaN and there are only 1994 +rows (`trump2k` has 2000 tweets) due to empty documents. ``` r theta <- predict(trump_btm, newdata = trump_dat) @@ -117,7 +118,7 @@ with other topic models. oolong <- create_oolong(trump_btm) oolong #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> @@ -135,7 +136,7 @@ frame you used for training (in this case `trump_dat`). Your oolong <- create_oolong(trump_btm, trump_corpus, btm_dataframe = trump_dat) oolong #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✔ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> ℹ TI: n = 20, 0 coded. diff --git a/deploy_gh.md b/deploy_gh.md index 8a4d228..a36c401 100644 --- a/deploy_gh.md +++ b/deploy_gh.md @@ -27,7 +27,7 @@ library(oolong) wsi_test <- wsi(abstracts_keyatm) wsi_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 10, 0 coded. #> @@ -116,7 +116,7 @@ revert_oolong(wsi_test, "oolong_2021-05-22 20 51 26 Hadley Wickham.RDS") ``` #> - #> ── oolong (topic model) ──────────────────────────────────────────────────────── + #> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Hadley Wickham #> ℹ WSI: n = 10, 10 coded. diff --git a/man/abstracts_keyatm.Rd b/man/abstracts_keyatm.Rd index 56259c3..b46021f 100644 --- a/man/abstracts_keyatm.Rd +++ b/man/abstracts_keyatm.Rd @@ -3,21 +3,16 @@ \docType{data} \name{abstracts_keyatm} \alias{abstracts_keyatm} -\alias{abstracts_warplda} \alias{abstracts_btm} \title{Topic models trained with the abstracts dataset.} \format{ An object of class \code{keyATM_output} (inherits from \code{base}, \code{list}) of length 18. -An object of class \code{WarpLDA} (inherits from \code{LDA}, \code{TopicModel}, \code{mlapiDecomposition}, \code{mlapiTransformation}, \code{mlapiBase}, \code{R6}) of length 9. - An object of class \code{BTM} of length 9. } \usage{ abstracts_keyatm -abstracts_warplda - abstracts_btm } \description{ diff --git a/overview_gh.Rmd b/overview_gh.Rmd index 2d48548..5de9e47 100644 --- a/overview_gh.Rmd +++ b/overview_gh.Rmd @@ -226,37 +226,6 @@ H1: Median TLO is better than random guess. One must notice that the two statistical tests are testing the bear minimum. A significant test only indicates the topic model can make the rater(s) perform better than random guess. It is not an indication of good topic interpretability. Also, one should use a very conservative significant level, e.g. $\alpha < 0.001$. -### About Warp LDA - -There is a subtle difference between the support for `stm` and for `text2vec`. - -`abstracts_warplda` is a Warp LDA object trained with the same dataset as the `abstracts_stm` - -```{r warplda} -abstracts_warplda -``` - -All the API endpoints are the same, except the one for the creation of topic intrusion test cases. You must supply also the `input_dfm`. - -```{r warplda2} -### Just word intrusion test. -oolong_test <- wi(abstracts_warplda, userid = "Lionel") -oolong_test -``` - - -```{r warplda3} -abstracts_dfm -``` - -```{r warplda4, , message = FALSE, results = 'hide', warning = FALSE} -oolong_test <- witi(abstracts_warplda, abstracts$text, input_dfm = abstracts_dfm, userid = "Mara") -``` - -```{r warplda5} -oolong_test -``` - ## About Biterm Topic Model Please refer to the vignette about BTM. diff --git a/overview_gh.md b/overview_gh.md index e39615a..93f68ae 100644 --- a/overview_gh.md +++ b/overview_gh.md @@ -55,7 +55,7 @@ library(keyATM) #> Papers, examples, resources, and other materials are at #> https://keyatm.github.io/keyATM/ library(quanteda) -#> Package version: 3.2.0 +#> Package version: 3.2.1 #> Unicode version: 13.0 #> ICU version: 66.1 #> Parallel computing: 8 of 8 threads used. @@ -84,7 +84,7 @@ be doing the test. oolong_test <- wi(abstracts_keyatm, userid = "Hadley") oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 0 coded. @@ -109,7 +109,7 @@ the model precision by printing the oolong test. oolong_test$lock() oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 10 coded. @@ -133,7 +133,7 @@ parameter `n_correct_ws` to N - 1. oolong_test <- wsi(abstracts_keyatm, userid = "Garrett") oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 0 coded. @@ -154,7 +154,7 @@ oolong_test$do_word_set_intrusion_test() oolong_test$lock() oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 10 coded. @@ -195,7 +195,7 @@ topic model will generate topic intrusion test cases. oolong_test <- ti(abstracts_keyatm, abstracts$text, userid = "Julia") oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 0 coded. @@ -218,7 +218,7 @@ oolong_test$lock() ``` r oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 25 coded. @@ -278,7 +278,7 @@ Get a summary of the two objects. ``` r summarize_oolong(oolong_test_rater1, oolong_test_rater2) #> -#> ── Summary (topic model): ────────────────────────────────────────────────────── +#> ── Summary (topic model): ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> #> ── Word intrusion test ── #> @@ -320,113 +320,9 @@ One must notice that the two statistical tests are testing the bear minimum. A significant test only indicates the topic model can make the rater(s) perform better than random guess. It is not an indication of good topic interpretability. Also, one should use a very conservative -significant level, e.g. \(\alpha < 0.001\). - -### About Warp LDA - -There is a subtle difference between the support for `stm` and for -`text2vec`. - -`abstracts_warplda` is a Warp LDA object trained with the same dataset -as the `abstracts_stm` - -``` r -abstracts_warplda -#> -#> Inherits from: -#> Public: -#> clone: function (deep = FALSE) -#> components: 0 1 0 46 0 95 0 20 42 8 31 36 50 23 0 0 0 58 0 43 0 0 0 ... -#> fit_transform: function (x, n_iter = 1000, convergence_tol = 0.001, n_check_convergence = 10, -#> get_top_words: function (n = 10, topic_number = 1L:private$n_topics, lambda = 1) -#> initialize: function (n_topics = 10L, doc_topic_prior = 50/n_topics, topic_word_prior = 1/n_topics, -#> plot: function (lambda.step = 0.1, reorder.topics = FALSE, doc_len = private$doc_len, -#> topic_word_distribution: 0 9.41796948577887e-05 0 0.00446992517733942 0 0.0086837 ... -#> transform: function (x, n_iter = 1000, convergence_tol = 0.001, n_check_convergence = 10, -#> Private: -#> calc_pseudo_loglikelihood: function (ptr = private$ptr) -#> check_convert_input: function (x) -#> components_: 0 1 0 46 0 95 0 20 42 8 31 36 50 23 0 0 0 58 0 43 0 0 0 ... -#> doc_len: 80 68 85 88 69 118 99 50 57 88 70 67 53 62 66 92 89 79 1 ... -#> doc_topic_distribution: function () -#> doc_topic_distribution_with_prior: function () -#> doc_topic_matrix: 0 0 0 0 0 3 111 0 0 0 0 0 90 134 0 174 0 321 0 0 109 38 ... -#> doc_topic_prior: 0.1 -#> fit_transform_internal: function (model_ptr, n_iter, convergence_tol, n_check_convergence, -#> get_c_all: function () -#> get_c_all_local: function () -#> get_doc_topic_matrix: function (prt, nr) -#> get_topic_word_count: function () -#> init_model_dtm: function (x, ptr = private$ptr) -#> internal_matrix_formats: list -#> is_initialized: FALSE -#> n_iter_inference: 10 -#> n_topics: 20 -#> ptr: externalptr -#> reset_c_local: function () -#> run_iter_doc: function (update_topics = TRUE, ptr = private$ptr) -#> run_iter_word: function (update_topics = TRUE, ptr = private$ptr) -#> seeds: 135203513.874082 471172603.061186 -#> set_c_all: function (x) -#> set_internal_matrix_formats: function (sparse = NULL, dense = NULL) -#> topic_word_distribution_with_prior: function () -#> topic_word_prior: 0.01 -#> transform_internal: function (x, n_iter = 1000, convergence_tol = 0.001, n_check_convergence = 10, -#> vocabulary: explor benefit risk featur medic broker websit well type ... -``` - -All the API endpoints are the same, except the one for the creation of -topic intrusion test cases. You must supply also the `input_dfm`. - -``` r -### Just word intrusion test. -oolong_test <- wi(abstracts_warplda, userid = "Lionel") -oolong_test -#> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── -#> ✔ WI ✖ TI ✖ WSI -#> ☺ Lionel -#> ℹ WI: k = 20, 0 coded. -#> -#> ── Methods ── -#> -#> • <$do_word_intrusion_test()>: do word intrusion test -#> • <$lock()>: finalize and see the results -``` - -``` r -abstracts_dfm -#> Document-feature matrix of: 2,500 documents, 3,998 features (98.61% sparse) and 0 docvars. -#> features -#> docs explor benefit risk featur medic broker websit well type persuas -#> text1 1 2 2 2 6 3 6 1 3 1 -#> text2 0 0 1 0 0 0 0 0 1 0 -#> text3 0 1 0 0 0 0 0 0 0 0 -#> text4 1 0 0 0 0 0 0 0 0 0 -#> text5 1 0 0 0 0 0 0 0 0 0 -#> text6 0 1 1 0 0 0 0 0 0 0 -#> [ reached max_ndoc ... 2,494 more documents, reached max_nfeat ... 3,988 more features ] -``` - -``` r -oolong_test <- witi(abstracts_warplda, abstracts$text, input_dfm = abstracts_dfm, userid = "Mara") -``` - -``` r -oolong_test -#> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── -#> ✔ WI ✔ TI ✖ WSI -#> ☺ Mara -#> ℹ WI: k = 20, 0 coded. -#> ℹ TI: n = 25, 0 coded. -#> -#> ── Methods ── -#> -#> • <$do_word_intrusion_test()>: do word intrusion test -#> • <$do_topic_intrusion_test()>: do topic intrusion test -#> • <$lock()>: finalize and see the results -``` +significant level, e.g. ![\\alpha +\< 0.001](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;%5Calpha%20%3C%200.001 +"\\alpha \< 0.001"). ## About Biterm Topic Model @@ -451,7 +347,7 @@ You can still generate word intrusion and word set intrusion tests. ``` r wi(newsgroup_nb) #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 20, 0 coded. #> @@ -464,7 +360,7 @@ wi(newsgroup_nb) ``` r wsi(newsgroup_nb) #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────── +#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 20, 0 coded. #> @@ -509,7 +405,7 @@ should be an adjective, e.g. positive, liberal, populistic, etc. oolong_test <- gs(input_corpus = trump2k, construct = "positive", userid = "Joe") oolong_test #> -#> ── oolong (gold standard generation) ─────────────────────────────────────────── +#> ── oolong (gold standard generation) ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 0 coded. #> ℹ Construct: positive. @@ -534,7 +430,7 @@ After the coding, you need to first lock the test and then the oolong_test$lock() oolong_test #> -#> ── oolong (gold standard generation) ─────────────────────────────────────────── +#> ── oolong (gold standard generation) ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 20 coded. #> ℹ Construct: positive. @@ -603,13 +499,14 @@ study the correlation between the gold standard and AFINN. ``` r summarize_oolong(oolong_test, target_value = all_afinn_score) #> New names: -#> * NA -> ...1 #> `geom_smooth()` using formula 'y ~ x' #> `geom_smooth()` using formula 'y ~ x' #> -#> ── Summary (gold standard generation): ───────────────────────────────────────── +#> ── Summary (gold standard generation): +#> ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ℹ Correlation: 0.718 (p = 4e-04) #> ℹ Effect of content length: -0.323 (p = 0.1643) +#> • `` -> `...1` ``` ### Suggested workflow @@ -650,10 +547,10 @@ Summarize all oolong objects with the target value. ``` r res <- summarize_oolong(trump, trump2, target_value = target_value) #> New names: -#> * NA -> ...1 -#> * NA -> ...2 #> `geom_smooth()` using formula 'y ~ x' #> `geom_smooth()` using formula 'y ~ x' +#> • `` -> `...1` +#> • `` -> `...2` ``` Read the results. The diagnostic plot consists of 4 subplots. It is a @@ -686,7 +583,7 @@ acceptable cut-off. ``` r res #> -#> ── Summary (gold standard generation): ───────────────────────────────────────── +#> ── Summary (gold standard generation): ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── #> ℹ Krippendorff's Alpha: 0.931 #> ℹ Correlation: 0.744 (p = 2e-04) #> ℹ Effect of content length: -0.323 (p = 0.1643) diff --git a/tests/testthat/_snaps/printing.md b/tests/testthat/_snaps/printing.md index 645bd51..1edf7ad 100644 --- a/tests/testthat/_snaps/printing.md +++ b/tests/testthat/_snaps/printing.md @@ -3,10 +3,13 @@ Code create_oolong(input_corpus = abstracts$text) Message + -- oolong (gold standard generation) ------------------------------------------- i GS: n = 25, 0 coded. i Construct: positive. + -- Methods -- + * <$do_gold_standard_test()>: generate gold standard * <$lock()>: finalize this object and see the results @@ -45,8 +48,11 @@ Code res Message + -- Summary (topic model): ------------------------------------------------------ + -- Word intrusion test -- + i Mean model precision: 1 i Quantiles of model precision: 1, 1, 1, 1, 1 i P-value of the model precision @@ -54,7 +60,9 @@ i Krippendorff's alpha: 1 i K Precision: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + -- Topic intrusion test -- + i Mean TLO: 0 i Median TLO: 0 i Quantiles of TLO: 0, 0, 0, 0, 0 @@ -67,10 +75,13 @@ create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text, type = "ti") Message + -- oolong (topic model) -------------------------------------------------------- x WI v TI x WSI i TI: n = 25, 0 coded. + -- Methods -- + * <$do_topic_intrusion_test()>: do topic intrusion test * <$lock()>: finalize and see the results @@ -80,10 +91,13 @@ create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text, type = "wsi", wsi_n_top_terms = 100) Message + -- oolong (topic model) -------------------------------------------------------- x WI x TI v WSI i WSI: n = 10, 0 coded. + -- Methods -- + * <$do_word_set_intrusion_test()>: do word set intrusion test * <$lock()>: finalize and see the results @@ -92,8 +106,11 @@ Code res Message + -- Summary (topic model): ------------------------------------------------------ + -- Word set intrusion test -- + i Mean model precision: 0.833333333333333 i K Precision: 0.3, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1 @@ -104,8 +121,11 @@ Code res Message + -- Summary (topic model): ------------------------------------------------------ + -- Word set intrusion test -- + i Mean model precision: 1 i K Precision: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 @@ -131,11 +151,14 @@ Please consider setting the userid by assigning the userid to the slot $userid, e.g. oolong$userid <- "myname" The oolong object is too old. Some security features might not be available in the updated oolong object. Message + -- oolong (topic model) -------------------------------------------------------- v WI v TI x WSI i WI: k = 10, 0 coded. i TI: n = 10, 0 coded. + -- Methods -- + * <$do_word_intrusion_test()>: do word intrusion test * <$do_topic_intrusion_test()>: do topic intrusion test * <$lock()>: finalize and see the results @@ -148,10 +171,13 @@ Please consider setting the userid by assigning the userid to the slot $userid, e.g. oolong$userid <- "myname" The oolong object is too old. Some security features might not be available in the updated oolong object. Message + -- oolong (gold standard generation) ------------------------------------------- i GS: n = 25, 0 coded. i Construct: positive. + -- Methods -- + * <$do_gold_standard_test()>: generate gold standard * <$lock()>: finalize this object and see the results @@ -163,10 +189,13 @@ Please consider setting the userid by assigning the userid to the slot $userid, e.g. oolong$userid <- "myname" The oolong object is too old. Some security features might not be available in the updated oolong object. Message + -- oolong (gold standard generation) ------------------------------------------- i GS: n = 25, 0 coded. i Construct: positive. + -- Methods -- + * <$turn_gold()>: convert the test results into a quanteda corpus --- @@ -178,10 +207,13 @@ Code update_oolong(y) Message + -- oolong (topic model) -------------------------------------------------------- v WI x TI x WSI i WI: k = 20, 0 coded. + -- Methods -- + * <$do_word_intrusion_test()>: do word intrusion test * <$lock()>: finalize and see the results diff --git a/vig_body.Rmd b/vig_body.Rmd index 8e54fb7..b98d892 100644 --- a/vig_body.Rmd +++ b/vig_body.Rmd @@ -210,37 +210,6 @@ H1: Median TLO is better than random guess. One must notice that the two statistical tests are testing the bear minimum. A significant test only indicates the topic model can make the rater(s) perform better than random guess. It is not an indication of good topic interpretability. Also, one should use a very conservative significant level, e.g. $\alpha < 0.001$. -### About Warp LDA - -There is a subtle difference between the support for `stm` and for `text2vec`. - -`abstracts_warplda` is a Warp LDA object trained with the same dataset as the `abstracts_stm` - -```{r warplda} -abstracts_warplda -``` - -All the API endpoints are the same, except the one for the creation of topic intrusion test cases. You must supply also the `input_dfm`. - -```{r warplda2} -### Just word intrusion test. -oolong_test <- wi(abstracts_warplda, userid = "Lionel") -oolong_test -``` - - -```{r warplda3} -abstracts_dfm -``` - -```{r warplda4, , message = FALSE, results = 'hide', warning = FALSE} -oolong_test <- witi(abstracts_warplda, abstracts$text, input_dfm = abstracts_dfm, userid = "Mara") -``` - -```{r warplda5} -oolong_test -``` - ## About Biterm Topic Model Please refer to the vignette about BTM. diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd index bba8e20..9195668 100644 --- a/vignettes/overview.Rmd +++ b/vignettes/overview.Rmd @@ -229,37 +229,6 @@ H1: Median TLO is better than random guess. One must notice that the two statistical tests are testing the bear minimum. A significant test only indicates the topic model can make the rater(s) perform better than random guess. It is not an indication of good topic interpretability. Also, one should use a very conservative significant level, e.g. $\alpha < 0.001$. -### About Warp LDA - -There is a subtle difference between the support for `stm` and for `text2vec`. - -`abstracts_warplda` is a Warp LDA object trained with the same dataset as the `abstracts_stm` - -```{r warplda} -abstracts_warplda -``` - -All the API endpoints are the same, except the one for the creation of topic intrusion test cases. You must supply also the `input_dfm`. - -```{r warplda2} -### Just word intrusion test. -oolong_test <- wi(abstracts_warplda, userid = "Lionel") -oolong_test -``` - - -```{r warplda3} -abstracts_dfm -``` - -```{r warplda4, , message = FALSE, results = 'hide', warning = FALSE} -oolong_test <- witi(abstracts_warplda, abstracts$text, input_dfm = abstracts_dfm, userid = "Mara") -``` - -```{r warplda5} -oolong_test -``` - ## About Biterm Topic Model Please refer to the vignette about BTM.