From b6f8aee8047a799feeed276ef7d2359b7cfe3c43 Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Tue, 3 Oct 2023 18:09:02 +0200 Subject: [PATCH] Fix #76 (#77) --- btm_gh.md | 4 +- deploy_gh.md | 4 +- overview_gh.Rmd | 150 ++++++++++++++++++++--------------------- overview_gh.md | 30 ++++----- vig_body.Rmd | 150 ++++++++++++++++++++--------------------- vignettes/overview.Rmd | 150 ++++++++++++++++++++--------------------- 6 files changed, 244 insertions(+), 244 deletions(-) diff --git a/btm_gh.md b/btm_gh.md index 31374fe..a64ab28 100644 --- a/btm_gh.md +++ b/btm_gh.md @@ -118,7 +118,7 @@ with other topic models. oolong <- create_oolong(trump_btm) oolong #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> @@ -136,7 +136,7 @@ frame you used for training (in this case `trump_dat`). Your oolong <- create_oolong(trump_btm, trump_corpus, btm_dataframe = trump_dat) oolong #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✔ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> ℹ TI: n = 20, 0 coded. diff --git a/deploy_gh.md b/deploy_gh.md index 2769eab..390ed7c 100644 --- a/deploy_gh.md +++ b/deploy_gh.md @@ -27,7 +27,7 @@ library(oolong) wsi_test <- wsi(abstracts_keyatm) wsi_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 10, 0 coded. #> @@ -116,7 +116,7 @@ revert_oolong(wsi_test, "oolong_2021-05-22 20 51 26 Hadley Wickham.RDS") ``` #> - #> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + #> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Hadley Wickham #> ℹ WSI: n = 10, 10 coded. diff --git a/overview_gh.Rmd b/overview_gh.Rmd index 7b16818..904359a 100644 --- a/overview_gh.Rmd +++ b/overview_gh.Rmd @@ -65,7 +65,7 @@ oolong_test <- wi(abstracts_keyatm, userid = "Hadley") oolong_test ``` -As instructed, use the method `$do_word_intrusion_test()` to start coding. +As instructed, use the method `$do_word_intrusion_test()` to start coding. ```{r, eval = FALSE} oolong_test$do_word_intrusion_test() @@ -95,7 +95,7 @@ oolong_test <- wsi(abstracts_keyatm, userid = "Garrett") oolong_test ``` -Use the method `$do_word_set_intrusion_test()` to start coding. +Use the method `$do_word_set_intrusion_test()` to start coding. ```{r wsi2, eval = FALSE} oolong_test$do_word_set_intrusion_test() @@ -121,7 +121,7 @@ library(tibble) abstracts ``` -Creating the oolong test object with the corpus used for training the topic model will generate topic intrusion test cases. +Creating the oolong test object with the corpus used for training the topic model will generate topic intrusion test cases. ```{r createtest2} oolong_test <- ti(abstracts_keyatm, abstracts$text, userid = "Julia") @@ -215,9 +215,9 @@ summarize_oolong(oolong_test_rater1, oolong_test_rater2) The test for model precision (MP) is based on an one-tailed, one-sample binomial test for each rater. In a multiple-rater situation, the p-values from all raters are combined using the Fisher's method (a.k.a. Fisher's omnibus test). -H0: MP is not better than 1/ n\_top\_terms +H0: MP is not better than 1/ (n\_top\_terms + 1) -H1: MP is better than 1/ n\_top\_terms +H1: MP is better than 1/ (n\_top\_terms + 1) The test for the median of TLO is based on a permutation test. @@ -280,30 +280,30 @@ oolong_test$do_gold_standard_test() After the coding, you need to first lock the test and then the `$turn_gold()` method is available. ```{r, include = FALSE} -oolong_test$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +oolong_test$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) ``` @@ -358,57 +358,57 @@ trump2$lock() ``` ```{r, include = FALSE} -trump$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +trump$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) -trump2$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +trump2$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(5L, 3L, 2L, 5L, 3L, 1L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(5L, 3L, 2L, 5L, 3L, 1L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) trump$lock() @@ -437,7 +437,7 @@ Read the results. The diagnostic plot consists of 4 subplots. It is a good idea * Subplot (top left): Raw correlation between human judgement and target value. One should want to have a good correlation between the two. * Subplot (top right): Bland-Altman plot. One should want to have no correlation. Also, the dots should be randomly scattering around the mean value. If it is so, the two measurements (human judgement and target value) are in good agreement. * Subplot (bottom left): Raw correlation between target value and content length. One should want to have no correlation, as an indication of good reliability against the influence of content length. (See Chan et al. 2020) -* Subplot (bottom right): Cook's distance of all data point. One should want to have no dot (or at least very few dots) above the threshold. It is an indication of how the raw correlation between human judgement and target value can or cannot be influenced by extreme values in your data. +* Subplot (bottom right): Cook's distance of all data point. One should want to have no dot (or at least very few dots) above the threshold. It is an indication of how the raw correlation between human judgement and target value can or cannot be influenced by extreme values in your data. The textual output contains the Krippendorff's alpha of the codings by your raters. In order to claim validity of your target value, you must first establish the reliability of your gold standard. Song et al. (2020) suggest Krippendorff's Alpha > 0.7 as an acceptable cut-off. diff --git a/overview_gh.md b/overview_gh.md index 7768343..30b275c 100644 --- a/overview_gh.md +++ b/overview_gh.md @@ -84,7 +84,7 @@ be doing the test. oolong_test <- wi(abstracts_keyatm, userid = "Hadley") oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 0 coded. @@ -115,7 +115,7 @@ can look at the model precision by printing the oolong test. oolong_test$lock() oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 10 coded. @@ -139,7 +139,7 @@ parameter `n_correct_ws` to N - 1. oolong_test <- wsi(abstracts_keyatm, userid = "Garrett") oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 0 coded. @@ -160,7 +160,7 @@ oolong_test$do_word_set_intrusion_test() oolong_test$lock() oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 10 coded. @@ -201,7 +201,7 @@ topic model will generate topic intrusion test cases. oolong_test <- ti(abstracts_keyatm, abstracts$text, userid = "Julia") oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 0 coded. @@ -224,7 +224,7 @@ oolong_test$lock() ``` r oolong_test #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 25 coded. @@ -284,7 +284,7 @@ Get a summary of the two objects. ``` r summarize_oolong(oolong_test_rater1, oolong_test_rater2) #> -#> ── Summary (topic model): ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── Summary (topic model): ──────────────────────────────────────────────────────────────────────────────────────────── #> #> ── Word intrusion test ── #> @@ -312,9 +312,9 @@ binomial test for each rater. In a multiple-rater situation, the p-values from all raters are combined using the Fisher’s method (a.k.a. Fisher’s omnibus test). -H0: MP is not better than 1/ n\_top\_terms +H0: MP is not better than 1/ (n\_top\_terms + 1) -H1: MP is better than 1/ n\_top\_terms +H1: MP is better than 1/ (n\_top\_terms + 1) The test for the median of TLO is based on a permutation test. @@ -353,7 +353,7 @@ You can still generate word intrusion and word set intrusion tests. ``` r wi(newsgroup_nb) #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 20, 0 coded. #> @@ -366,7 +366,7 @@ wi(newsgroup_nb) ``` r wsi(newsgroup_nb) #> -#> ── oolong (topic model) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (topic model) ────────────────────────────────────────────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 20, 0 coded. #> @@ -411,7 +411,7 @@ should be an adjective, e.g. positive, liberal, populistic, etc. oolong_test <- gs(input_corpus = trump2k, construct = "positive", userid = "Joe") oolong_test #> -#> ── oolong (gold standard generation) ─────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (gold standard generation) ───────────────────────────────────────────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 0 coded. #> ℹ Construct: positive. @@ -436,7 +436,7 @@ After the coding, you need to first lock the test and then the oolong_test$lock() oolong_test #> -#> ── oolong (gold standard generation) ─────────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── oolong (gold standard generation) ───────────────────────────────────────────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 20 coded. #> ℹ Construct: positive. @@ -509,7 +509,7 @@ summarize_oolong(oolong_test, target_value = all_afinn_score) #> `geom_smooth()` using formula = 'y ~ x' #> #> ── Summary (gold standard generation): -#> ───────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ─────────────────────────────────────────────────────────────────────────────── #> ℹ Correlation: 0.718 (p = 4e-04) #> ℹ Effect of content length: -0.319 (p = 0.1709) #> • `` -> `...1` @@ -589,7 +589,7 @@ cut-off. ``` r res #> -#> ── Summary (gold standard generation): ───────────────────────────────────────────────────────────────────────────────────────────────────────── +#> ── Summary (gold standard generation): ─────────────────────────────────────────────────────────────────────────────── #> ℹ Krippendorff's Alpha: 0.931 #> ℹ Correlation: 0.744 (p = 2e-04) #> ℹ Effect of content length: -0.319 (p = 0.1709) diff --git a/vig_body.Rmd b/vig_body.Rmd index c26ad87..32ec04e 100644 --- a/vig_body.Rmd +++ b/vig_body.Rmd @@ -49,7 +49,7 @@ oolong_test <- wi(abstracts_keyatm, userid = "Hadley") oolong_test ``` -As instructed, use the method `$do_word_intrusion_test()` to start coding. +As instructed, use the method `$do_word_intrusion_test()` to start coding. ```{r, eval = FALSE} oolong_test$do_word_intrusion_test() @@ -79,7 +79,7 @@ oolong_test <- wsi(abstracts_keyatm, userid = "Garrett") oolong_test ``` -Use the method `$do_word_set_intrusion_test()` to start coding. +Use the method `$do_word_set_intrusion_test()` to start coding. ```{r wsi2, eval = FALSE} oolong_test$do_word_set_intrusion_test() @@ -105,7 +105,7 @@ library(tibble) abstracts ``` -Creating the oolong test object with the corpus used for training the topic model will generate topic intrusion test cases. +Creating the oolong test object with the corpus used for training the topic model will generate topic intrusion test cases. ```{r createtest2} oolong_test <- ti(abstracts_keyatm, abstracts$text, userid = "Julia") @@ -199,9 +199,9 @@ summarize_oolong(oolong_test_rater1, oolong_test_rater2) The test for model precision (MP) is based on an one-tailed, one-sample binomial test for each rater. In a multiple-rater situation, the p-values from all raters are combined using the Fisher's method (a.k.a. Fisher's omnibus test). -H0: MP is not better than 1/ n\_top\_terms +H0: MP is not better than 1/ (n\_top\_terms + 1) -H1: MP is better than 1/ n\_top\_terms +H1: MP is better than 1/ (n\_top\_terms + 1) The test for the median of TLO is based on a permutation test. @@ -264,30 +264,30 @@ oolong_test$do_gold_standard_test() After the coding, you need to first lock the test and then the `$turn_gold()` method is available. ```{r, include = FALSE} -oolong_test$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +oolong_test$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) ``` @@ -342,57 +342,57 @@ trump2$lock() ``` ```{r, include = FALSE} -trump$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +trump$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) -trump2$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +trump2$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(5L, 3L, 2L, 5L, 3L, 1L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(5L, 3L, 2L, 5L, 3L, 1L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) trump$lock() @@ -421,7 +421,7 @@ Read the results. The diagnostic plot consists of 4 subplots. It is a good idea * Subplot (top left): Raw correlation between human judgement and target value. One should want to have a good correlation between the two. * Subplot (top right): Bland-Altman plot. One should want to have no correlation. Also, the dots should be randomly scattering around the mean value. If it is so, the two measurements (human judgement and target value) are in good agreement. * Subplot (bottom left): Raw correlation between target value and content length. One should want to have no correlation, as an indication of good reliability against the influence of content length. (See Chan et al. 2020) -* Subplot (bottom right): Cook's distance of all data point. One should want to have no dot (or at least very few dots) above the threshold. It is an indication of how the raw correlation between human judgement and target value can or cannot be influenced by extreme values in your data. +* Subplot (bottom right): Cook's distance of all data point. One should want to have no dot (or at least very few dots) above the threshold. It is an indication of how the raw correlation between human judgement and target value can or cannot be influenced by extreme values in your data. The textual output contains the Krippendorff's alpha of the codings by your raters. In order to claim validity of your target value, you must first establish the reliability of your gold standard. Song et al. (2020) suggest Krippendorff's Alpha > 0.7 as an acceptable cut-off. diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd index 49bbea6..d2ef050 100644 --- a/vignettes/overview.Rmd +++ b/vignettes/overview.Rmd @@ -68,7 +68,7 @@ oolong_test <- wi(abstracts_keyatm, userid = "Hadley") oolong_test ``` -As instructed, use the method `$do_word_intrusion_test()` to start coding. +As instructed, use the method `$do_word_intrusion_test()` to start coding. ```{r, eval = FALSE} oolong_test$do_word_intrusion_test() @@ -98,7 +98,7 @@ oolong_test <- wsi(abstracts_keyatm, userid = "Garrett") oolong_test ``` -Use the method `$do_word_set_intrusion_test()` to start coding. +Use the method `$do_word_set_intrusion_test()` to start coding. ```{r wsi2, eval = FALSE} oolong_test$do_word_set_intrusion_test() @@ -124,7 +124,7 @@ library(tibble) abstracts ``` -Creating the oolong test object with the corpus used for training the topic model will generate topic intrusion test cases. +Creating the oolong test object with the corpus used for training the topic model will generate topic intrusion test cases. ```{r createtest2} oolong_test <- ti(abstracts_keyatm, abstracts$text, userid = "Julia") @@ -218,9 +218,9 @@ summarize_oolong(oolong_test_rater1, oolong_test_rater2) The test for model precision (MP) is based on an one-tailed, one-sample binomial test for each rater. In a multiple-rater situation, the p-values from all raters are combined using the Fisher's method (a.k.a. Fisher's omnibus test). -H0: MP is not better than 1/ n\_top\_terms +H0: MP is not better than 1/ (n\_top\_terms + 1) -H1: MP is better than 1/ n\_top\_terms +H1: MP is better than 1/ (n\_top\_terms + 1) The test for the median of TLO is based on a permutation test. @@ -283,30 +283,30 @@ oolong_test$do_gold_standard_test() After the coding, you need to first lock the test and then the `$turn_gold()` method is available. ```{r, include = FALSE} -oolong_test$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +oolong_test$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) ``` @@ -361,57 +361,57 @@ trump2$lock() ``` ```{r, include = FALSE} -trump$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +trump$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(4L, 4L, 2L, 5L, 3L, 2L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) -trump2$.__enclos_env__$private$test_content$gs <- -structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", -"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", -"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", -"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", -"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", -"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", -"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", -"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", -"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", -"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", -"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", -".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", -"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", -"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", -"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", -"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", -"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", -"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", -"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", +trump2$.__enclos_env__$private$test_content$gs <- +structure(list(case = 1:20, text = c("Thank you Eau Claire, Wisconsin. \n#VoteTrump on Tuesday, April 5th!\nMAKE AMERICA GREAT AGAIN! https://t.co/JI5JqwHnMC", +"\"@bobby990r_1: @realDonaldTrump would lead polls the second he announces candidacy! America is waiting for him to LEAD us out of this mess!", +"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump My money is on Kenya getting fired first.\"", +"Thank you for a great afternoon Birmingham, Alabama! #Trump2016 #MakeAmericaGreatAgain https://t.co/FrOkqCzBoD", +"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 http://t.co/UlQWGKUrCJ\"", +"People believe CNN these days almost as little as they believe Hillary....that's really saying something!", +"It was great being in Michigan. Remember, I am the only presidential candidate who will bring jobs back to the U.S.and protect car industry!", +"\"@DomineekSmith: @realDonaldTrump is the best Republican presidential candidate of all time.\" Thank you.", +"Word is that little Morty Zuckerman’s @NYDailyNews loses more than $50 million per year---can that be possible?", +"\"@Chevy_Mama: @realDonaldTrump I'm obsessed with @celebrityapprenticeNBC. Honestly, Mr Trump, you are very inspiring\"", +"President Obama said \"ISIL continues to shrink\" in an interview just hours before the horrible attack in Paris. He is just so bad! CHANGE.", +".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It is time to #DrainTheSwamp! Debates https://t.co/3Mz4T7qTTR", +"\"@jerrimoore: @realDonaldTrump awesome. A treat to get to see the brilliant Joan Rivers once more #icon\"", +"\"@shoegoddesss: @realDonaldTrump Will definitely vote for you. Breath of fresh air. America needs you!\"", +"Ted is the ultimate hypocrite. Says one thing for money, does another for votes. \nhttps://t.co/hxdfy0mjVw", +"\"@Lisa_Milicaj: Truth be told, I never heard of The National Review until they \"tried\" to declare war on you. No worries, you got my vote!\"", +"THANK YOU Daytona Beach, Florida!\n#MakeAmericaGreatAgain https://t.co/IAcLfXe463", +"People rarely say that many conservatives didn’t vote for Mitt Romney. If I can get them to vote for me, we win in a landslide.", +"Trump National Golf Club, Washington, D.C. is on 600 beautiful acres fronting the Potomac River. A fantastic setting! http://t.co/pYtkbyKwt5", "\"@DRUDGE_REPORT: REUTERS 5-DAY ROLLING POLL: TRUMP 34%, CARSON 19.6%, RUBIO 9.7%, CRUZ 7.7%...\" Thank you - a great honor!" -), answer = c(5L, 3L, 2L, 5L, 3L, 1L, 4L, 5L, 2L, 4L, 1L, 1L, -4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, -NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, +), answer = c(5L, 3L, 2L, 5L, 3L, 1L, 4L, 5L, 2L, 4L, 1L, 1L, +4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L), target_value = c(NA, NA, NA, +NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame" )) trump$lock() @@ -440,7 +440,7 @@ Read the results. The diagnostic plot consists of 4 subplots. It is a good idea * Subplot (top left): Raw correlation between human judgement and target value. One should want to have a good correlation between the two. * Subplot (top right): Bland-Altman plot. One should want to have no correlation. Also, the dots should be randomly scattering around the mean value. If it is so, the two measurements (human judgement and target value) are in good agreement. * Subplot (bottom left): Raw correlation between target value and content length. One should want to have no correlation, as an indication of good reliability against the influence of content length. (See Chan et al. 2020) -* Subplot (bottom right): Cook's distance of all data point. One should want to have no dot (or at least very few dots) above the threshold. It is an indication of how the raw correlation between human judgement and target value can or cannot be influenced by extreme values in your data. +* Subplot (bottom right): Cook's distance of all data point. One should want to have no dot (or at least very few dots) above the threshold. It is an indication of how the raw correlation between human judgement and target value can or cannot be influenced by extreme values in your data. The textual output contains the Krippendorff's alpha of the codings by your raters. In order to claim validity of your target value, you must first establish the reliability of your gold standard. Song et al. (2020) suggest Krippendorff's Alpha > 0.7 as an acceptable cut-off.