diff --git a/articles/btm.html b/articles/btm.html index ad97261..d09bf8b 100644 --- a/articles/btm.html +++ b/articles/btm.html @@ -168,6 +168,8 @@

Oolong’s support for BTM
 oolong <- create_oolong(trump_btm)
+#> Error in get(paste0(generic, ".", class), envir = get_method_env()) : 
+#>   object 'type_sum.accel' not found
 oolong
 #> 
 #> ── oolong (topic model) ────────────────────────────────────────────────────────
diff --git a/articles/deploy.html b/articles/deploy.html
index 341dc74..fa75453 100644
--- a/articles/deploy.html
+++ b/articles/deploy.html
@@ -94,6 +94,8 @@ 

Create an oolong test
 library(oolong)
 wsi_test <- wsi(abstracts_seededlda)
+#> Error in get(paste0(generic, ".", class), envir = get_method_env()) : 
+#>   object 'type_sum.accel' not found
 wsi_test
 #> 
 #> ── oolong (topic model) ────────────────────────────────────────────────────────
diff --git a/articles/overview.html b/articles/overview.html
index 54d66ae..ea882cd 100644
--- a/articles/overview.html
+++ b/articles/overview.html
@@ -169,6 +169,8 @@ 

Word intrusion test#> terms library(quanteda) library(dplyr) +#> Error in get(paste0(generic, ".", class), envir = get_method_env()) : +#> object 'type_sum.accel' not found #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': diff --git a/methodshub.html b/methodshub.html index a574945..bb27121 100644 --- a/methodshub.html +++ b/methodshub.html @@ -160,7 +160,7 @@

Contact Details

Publication

-
  1. Chan, C. H., & Sältzer, M. (2020). oolong: An R package for validating automated content analysis tools. The Journal of Open Source Software: JOSS, 5(55), 2461. https:://doi.org/10.21105/joss.02461 +
    1. Chan, C. H., & Sältzer, M. (2020). oolong: An R package for validating automated content analysis tools. The Journal of Open Source Software: JOSS, 5(55), 2461. https://doi.org/10.21105/joss.02461

diff --git a/pkgdown.yml b/pkgdown.yml index 4fdc9f0..2935b94 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -5,7 +5,7 @@ articles: btm: btm.html deploy: deploy.html overview: overview.html -last_built: 2024-12-12T15:00Z +last_built: 2024-12-30T16:18Z urls: reference: https://gesistsa.github.io/oolong/reference article: https://gesistsa.github.io/oolong/articles diff --git a/search.json b/search.json index c440574..b318e64 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":[]},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement [INSERT CONTACT METHOD]. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.0, available https://www.contributor-covenant.org/version/2/0/ code_of_conduct.html. Community Impact Guidelines inspired Mozilla’s code conduct enforcement ladder. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https:// www.contributor-covenant.org/translations.","code":""},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"gnu-lesser-general-public-license","dir":"","previous_headings":"","what":"GNU LESSER GENERAL PUBLIC LICENSE","title":"NA","text":"Version 2.1, February 1999","code":"Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.]"},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"preamble","dir":"","previous_headings":"","what":"Preamble","title":"NA","text":"licenses software designed take away freedom share change . contrast, GNU General Public Licenses intended guarantee freedom share change free software–make sure software free users. license, Lesser General Public License, applies specially designated software packages–typically libraries–Free Software Foundation authors decide use . can use , suggest first think carefully whether license ordinary General Public License better strategy use particular case, based explanations . speak free software, referring freedom use, price. General Public Licenses designed make sure freedom distribute copies free software (charge service wish); receive source code can get want ; can change software use pieces new free programs; informed can things. protect rights, need make restrictions forbid distributors deny rights ask surrender rights. restrictions translate certain responsibilities distribute copies library modify . example, distribute copies library, whether gratis fee, must give recipients rights gave . must make sure , , receive can get source code. link code library, must provide complete object files recipients, can relink library making changes library recompiling . must show terms know rights. protect rights two-step method: (1) copyright library, (2) offer license, gives legal permission copy, distribute /modify library. protect distributor, want make clear warranty free library. Also, library modified someone else passed , recipients know original version, original author’s reputation affected problems might introduced others. Finally, software patents pose constant threat existence free program. wish make sure company effectively restrict users free program obtaining restrictive license patent holder. Therefore, insist patent license obtained version library must consistent full freedom use specified license. GNU software, including libraries, covered ordinary GNU General Public License. license, GNU Lesser General Public License, applies certain designated libraries, quite different ordinary General Public License. use license certain libraries order permit linking libraries non-free programs. program linked library, whether statically using shared library, combination two legally speaking combined work, derivative original library. ordinary General Public License therefore permits linking entire combination fits criteria freedom. Lesser General Public License permits lax criteria linking code library. call license “Lesser” General Public License Less protect user’s freedom ordinary General Public License. also provides free software developers Less advantage competing non-free programs. disadvantages reason use ordinary General Public License many libraries. However, Lesser license provides advantages certain special circumstances. example, rare occasions, may special need encourage widest possible use certain library, becomes de-facto standard. achieve , non-free programs must allowed use library. frequent case free library job widely used non-free libraries. case, little gain limiting free library free software , use Lesser General Public License. cases, permission use particular library non-free programs enables greater number people use large body free software. example, permission use GNU C Library non-free programs enables many people use whole GNU operating system, well variant, GNU/Linux operating system. Although Lesser General Public License Less protective users’ freedom, ensure user program linked Library freedom wherewithal run program using modified version Library. precise terms conditions copying, distribution modification follow. Pay close attention difference “work based library” “work uses library”. former contains code derived library, whereas latter must combined library order run.","code":""},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"terms-and-conditions-for-copying-distribution-and-modification","dir":"","previous_headings":"","what":"TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION","title":"NA","text":"0. License Agreement applies software library program contains notice placed copyright holder authorized party saying may distributed terms Lesser General Public License (also called “License”). licensee addressed “”. “library” means collection software functions /data prepared conveniently linked application programs (use functions data) form executables. “Library”, , refers software library work distributed terms. “work based Library” means either Library derivative work copyright law: say, work containing Library portion , either verbatim modifications /translated straightforwardly another language. (Hereinafter, translation included without limitation term “modification”.) “Source code” work means preferred form work making modifications . library, complete source code means source code modules contains, plus associated interface definition files, plus scripts used control compilation installation library. Activities copying, distribution modification covered License; outside scope. act running program using Library restricted, output program covered contents constitute work based Library (independent use Library tool writing ). Whether true depends Library program uses Library . 1. may copy distribute verbatim copies Library’s complete source code receive , medium, provided conspicuously appropriately publish copy appropriate copyright notice disclaimer warranty; keep intact notices refer License absence warranty; distribute copy License along Library. may charge fee physical act transferring copy, may option offer warranty protection exchange fee. 2. may modify copy copies Library portion , thus forming work based Library, copy distribute modifications work terms Section 1 , provided also meet conditions: ) modified work must software library. b) must cause files modified carry prominent notices stating changed files date change. c) must cause whole work licensed charge third parties terms License. d) facility modified Library refers function table data supplied application program uses facility, argument passed facility invoked, must make good faith effort ensure , event application supply function table, facility still operates, performs whatever part purpose remains meaningful. (example, function library compute square roots purpose entirely well-defined independent application. Therefore, Subsection 2d requires application-supplied function table used function must optional: application supply , square root function must still compute square roots.) requirements apply modified work whole. identifiable sections work derived Library, can reasonably considered independent separate works , License, terms, apply sections distribute separate works. distribute sections part whole work based Library, distribution whole must terms License, whose permissions licensees extend entire whole, thus every part regardless wrote . Thus, intent section claim rights contest rights work written entirely ; rather, intent exercise right control distribution derivative collective works based Library. addition, mere aggregation another work based Library Library (work based Library) volume storage distribution medium bring work scope License. 3. may opt apply terms ordinary GNU General Public License instead License given copy Library. , must alter notices refer License, refer ordinary GNU General Public License, version 2, instead License. (newer version version 2 ordinary GNU General Public License appeared, can specify version instead wish.) make change notices. change made given copy, irreversible copy, ordinary GNU General Public License applies subsequent copies derivative works made copy. option useful wish copy part code Library program library. 4. may copy distribute Library (portion derivative , Section 2) object code executable form terms Sections 1 2 provided accompany complete corresponding machine-readable source code, must distributed terms Sections 1 2 medium customarily used software interchange. distribution object code made offering access copy designated place, offering equivalent access copy source code place satisfies requirement distribute source code, even though third parties compelled copy source along object code. 5. program contains derivative portion Library, designed work Library compiled linked , called “work uses Library”. work, isolation, derivative work Library, therefore falls outside scope License. However, linking “work uses Library” Library creates executable derivative Library (contains portions Library), rather “work uses library”. executable therefore covered License. Section 6 states terms distribution executables. “work uses Library” uses material header file part Library, object code work may derivative work Library even though source code . Whether true especially significant work can linked without Library, work library. threshold true precisely defined law. object file uses numerical parameters, data structure layouts accessors, small macros small inline functions (ten lines less length), use object file unrestricted, regardless whether legally derivative work. (Executables containing object code plus portions Library still fall Section 6.) Otherwise, work derivative Library, may distribute object code work terms Section 6. executables containing work also fall Section 6, whether linked directly Library . 6. exception Sections , may also combine link “work uses Library” Library produce work containing portions Library, distribute work terms choice, provided terms permit modification work customer’s use reverse engineering debugging modifications. must give prominent notice copy work Library used Library use covered License. must supply copy License. work execution displays copyright notices, must include copyright notice Library among , well reference directing user copy License. Also, must one things: ) Accompany work complete corresponding machine-readable source code Library including whatever changes used work (must distributed Sections 1 2 ); , work executable linked Library, complete machine-readable “work uses Library”, object code /source code, user can modify Library relink produce modified executable containing modified Library. (understood user changes contents definitions files Library necessarily able recompile application use modified definitions.) b) Use suitable shared library mechanism linking Library. suitable mechanism one (1) uses run time copy library already present user’s computer system, rather copying library functions executable, (2) operate properly modified version library, user installs one, long modified version interface-compatible version work made . c) Accompany work written offer, valid least three years, give user materials specified Subsection 6a, , charge cost performing distribution. d) distribution work made offering access copy designated place, offer equivalent access copy specified materials place. e) Verify user already received copy materials already sent user copy. executable, required form “work uses Library” must include data utility programs needed reproducing executable . However, special exception, materials distributed need include anything normally distributed (either source binary form) major components (compiler, kernel, ) operating system executable runs, unless component accompanies executable. may happen requirement contradicts license restrictions proprietary libraries normally accompany operating system. contradiction means use Library together executable distribute. 7. may place library facilities work based Library side--side single library together library facilities covered License, distribute combined library, provided separate distribution work based Library library facilities otherwise permitted, provided two things: ) Accompany combined library copy work based Library, uncombined library facilities. must distributed terms Sections . b) Give prominent notice combined library fact part work based Library, explaining find accompanying uncombined form work. 8. may copy, modify, sublicense, link , distribute Library except expressly provided License. attempt otherwise copy, modify, sublicense, link , distribute Library void, automatically terminate rights License. However, parties received copies, rights, License licenses terminated long parties remain full compliance. 9. required accept License, since signed . However, nothing else grants permission modify distribute Library derivative works. actions prohibited law accept License. Therefore, modifying distributing Library (work based Library), indicate acceptance License , terms conditions copying, distributing modifying Library works based . 10. time redistribute Library (work based Library), recipient automatically receives license original licensor copy, distribute, link modify Library subject terms conditions. may impose restrictions recipients’ exercise rights granted herein. responsible enforcing compliance third parties License. 11. , consequence court judgment allegation patent infringement reason (limited patent issues), conditions imposed (whether court order, agreement otherwise) contradict conditions License, excuse conditions License. distribute satisfy simultaneously obligations License pertinent obligations, consequence may distribute Library . example, patent license permit royalty-free redistribution Library receive copies directly indirectly , way satisfy License refrain entirely distribution Library. portion section held invalid unenforceable particular circumstance, balance section intended apply, section whole intended apply circumstances. purpose section induce infringe patents property right claims contest validity claims; section sole purpose protecting integrity free software distribution system implemented public license practices. Many people made generous contributions wide range software distributed system reliance consistent application system; author/donor decide willing distribute software system licensee impose choice. section intended make thoroughly clear believed consequence rest License. 12. distribution /use Library restricted certain countries either patents copyrighted interfaces, original copyright holder places Library License may add explicit geographical distribution limitation excluding countries, distribution permitted among countries thus excluded. case, License incorporates limitation written body License. 13. Free Software Foundation may publish revised /new versions Lesser General Public License time time. new versions similar spirit present version, may differ detail address new problems concerns. version given distinguishing version number. Library specifies version number License applies “later version”, option following terms conditions either version later version published Free Software Foundation. Library specify license version number, may choose version ever published Free Software Foundation. 14. wish incorporate parts Library free programs whose distribution conditions incompatible , write author ask permission. software copyrighted Free Software Foundation, write Free Software Foundation; sometimes make exceptions . decision guided two goals preserving free status derivatives free software promoting sharing reuse software generally. WARRANTY 15. LIBRARY LICENSED FREE CHARGE, WARRANTY LIBRARY, EXTENT PERMITTED APPLICABLE LAW. EXCEPT OTHERWISE STATED WRITING COPYRIGHT HOLDERS /PARTIES PROVIDE LIBRARY “” WITHOUT WARRANTY KIND, EITHER EXPRESSED IMPLIED, INCLUDING, LIMITED , IMPLIED WARRANTIES MERCHANTABILITY FITNESS PARTICULAR PURPOSE. ENTIRE RISK QUALITY PERFORMANCE LIBRARY . LIBRARY PROVE DEFECTIVE, ASSUME COST NECESSARY SERVICING, REPAIR CORRECTION. 16. EVENT UNLESS REQUIRED APPLICABLE LAW AGREED WRITING COPYRIGHT HOLDER, PARTY MAY MODIFY /REDISTRIBUTE LIBRARY PERMITTED , LIABLE DAMAGES, INCLUDING GENERAL, SPECIAL, INCIDENTAL CONSEQUENTIAL DAMAGES ARISING USE INABILITY USE LIBRARY (INCLUDING LIMITED LOSS DATA DATA RENDERED INACCURATE LOSSES SUSTAINED THIRD PARTIES FAILURE LIBRARY OPERATE SOFTWARE), EVEN HOLDER PARTY ADVISED POSSIBILITY DAMAGES.","code":""},{"path":[]},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"how-to-apply-these-terms-to-your-new-libraries","dir":"","previous_headings":"","what":"How to Apply These Terms to Your New Libraries","title":"NA","text":"develop new library, want greatest possible use public, recommend making free software everyone can redistribute change. can permitting redistribution terms (, alternatively, terms ordinary General Public License). apply terms, attach following notices library. safest attach start source file effectively convey exclusion warranty; file least “copyright” line pointer full notice found. Also add information contact electronic paper mail. also get employer (work programmer) school, , sign “copyright disclaimer” library, necessary. sample; alter names: ’s !","code":"one line to give the library's name and an idea of what it does. Copyright (C) year name of author This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. signature of Ty Coon, 1 April 1990 Ty Coon, President of Vice"},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"requirement-1-keep-your-quanteda-corpus","dir":"Articles","previous_headings":"","what":"Requirement #1: Keep your quanteda corpus","title":"BTM","text":"every document unique document id. can regular text cleaning, stemming procedure quanteda. Instead making product DFM object, make token object. may read issue Benoit et al.","code":"require(BTM) #> Loading required package: BTM require(quanteda) #> Loading required package: quanteda #> Package version: 4.1.0 #> Unicode version: 14.0 #> ICU version: 70.1 #> Parallel computing: disabled #> See https://quanteda.io for tutorials and examples. require(oolong) #> Loading required package: oolong trump_corpus <- corpus(trump2k) tokens(trump_corpus, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, split_hyphens = TRUE, remove_url = TRUE) %>% tokens_tolower() %>% tokens_remove(stopwords(\"en\")) %>% tokens_remove(\"@*\") -> trump_toks"},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"requirement-2-keep-your-data-frame","dir":"Articles","previous_headings":"","what":"Requirement #2: Keep your data frame","title":"BTM","text":"Use function convert token object data frame. Train BTM model","code":"as.data.frame.tokens <- function(x) { data.frame( doc_id = rep(names(x), lengths(x)), tokens = unlist(x, use.names = FALSE) ) } trump_dat <- as.data.frame.tokens(trump_toks) trump_btm <- BTM(trump_dat, k = 8, iter = 500, trace = 10)"},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"pecularities-of-btm","dir":"Articles","previous_headings":"Requirement #2: Keep your data frame","what":"Pecularities of BTM","title":"BTM","text":"generate θt\\theta_{t} . However, many NaN 1994 rows (trump2k 2000 tweets) due empty documents. Also, row order messed .","code":"theta <- predict(trump_btm, newdata = trump_dat) dim(theta) #> [1] 1994 8 setdiff(docid(trump_corpus), row.names(theta)) #> [1] \"text604\" \"text633\" \"text659\" \"text1586\" \"text1587\" \"text1761\" trump_corpus[604] #> Corpus consisting of 1 document. #> text604 : #> \"http://t.co/PtViAyrO4A\" head(row.names(theta), 100) #> [1] \"text1\" \"text10\" \"text100\" \"text1000\" \"text1001\" \"text1002\" #> [7] \"text1003\" \"text1004\" \"text1005\" \"text1006\" \"text1007\" \"text1008\" #> [13] \"text1009\" \"text101\" \"text1010\" \"text1011\" \"text1012\" \"text1013\" #> [19] \"text1014\" \"text1015\" \"text1016\" \"text1017\" \"text1018\" \"text1019\" #> [25] \"text102\" \"text1020\" \"text1021\" \"text1022\" \"text1023\" \"text1024\" #> [31] \"text1025\" \"text1026\" \"text1027\" \"text1028\" \"text1029\" \"text103\" #> [37] \"text1030\" \"text1031\" \"text1032\" \"text1033\" \"text1034\" \"text1035\" #> [43] \"text1036\" \"text1037\" \"text1038\" \"text1039\" \"text104\" \"text1040\" #> [49] \"text1041\" \"text1042\" \"text1043\" \"text1044\" \"text1045\" \"text1046\" #> [55] \"text1047\" \"text1048\" \"text1049\" \"text105\" \"text1050\" \"text1051\" #> [61] \"text1052\" \"text1053\" \"text1054\" \"text1055\" \"text1056\" \"text1057\" #> [67] \"text1058\" \"text1059\" \"text106\" \"text1060\" \"text1061\" \"text1062\" #> [73] \"text1063\" \"text1064\" \"text1065\" \"text1066\" \"text1067\" \"text1068\" #> [79] \"text1069\" \"text107\" \"text1070\" \"text1071\" \"text1072\" \"text1073\" #> [85] \"text1074\" \"text1075\" \"text1076\" \"text1077\" \"text1078\" \"text1079\" #> [91] \"text108\" \"text1080\" \"text1081\" \"text1082\" \"text1083\" \"text1084\" #> [97] \"text1085\" \"text1086\" \"text1087\" \"text1088\""},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"oolongs-support-for-btm","dir":"Articles","previous_headings":"","what":"Oolong’s support for BTM","title":"BTM","text":"Oolong problem generating word intrusion test BTM like topic models. generating topic intrusion tests, however, must provide data frame used training (case trump_dat). input_corpus must quanteda corpus . btm_dataframe must NULL. input_corpus must quanteda corpus.","code":"oolong <- create_oolong(trump_btm) oolong #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$lock()>: finalize and see the results oolong <- create_oolong(trump_btm, trump_corpus, btm_dataframe = trump_dat) oolong #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✔ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> ℹ TI: n = 20, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$do_topic_intrusion_test()>: do topic intrusion test #> • <$lock()>: finalize and see the results oolong <- create_oolong(trump_btm, trump_corpus) #> Error: You need to provide input_corpus (in quanteda format) and btm_dataframe for generating topic intrusion tests. oolong <- create_oolong(trump_btm, trump2k, btm_dataframe = trump_dat) #> Error: You need to provide input_corpus (in quanteda format) and btm_dataframe for generating topic intrusion tests."},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"create-an-oolong-test","dir":"Articles","previous_headings":"","what":"Create an oolong test","title":"Deploy","text":"Please note one deploy oolong test objects word topic intrusion tests, .e. created using witi() online. need tests, need deploy two separate instances: one created using wi() another created using ti(). guide, assume want deploy word set intrusion test online.","code":"library(oolong) wsi_test <- wsi(abstracts_seededlda) wsi_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 10, 0 coded. #> #> ── Methods ── #> #> • <$do_word_set_intrusion_test()>: do word set intrusion test #> • <$lock()>: finalize and see the results"},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"deploy-the-test-online","dir":"Articles","previous_headings":"","what":"Deploy the test online","title":"Deploy","text":"First, need export oolong test object stand alone Shiny app. stand alone Shiny app directory. directory two files structure called “Single-file Shiny app.” Experienced Shiny users might preferred method deploying app whatever Shiny server can master. less experienced users, simplest way deploy app online use shinyapps.io (free tier available 25 hours computational time per month). Please register account shinyapps.io configure rsconnect. Please refer guide information. Please remember configure tokens. RStudio users, simplest way deploy app shinyapps.io first launch app. click Publish button right corner launched window. asked title app, just give name, e.g. wsi_test. probably can keep default settings push Publish button initialize deployment process. hiccup, get URL deployed oolong test. Something like: https://yourname.shinyapps.io/wsi_test/","code":"export_oolong(wsi_test, dir = \"./wsi_test\", use_full_path = FALSE) #> ℹ The Shiny has been written to the directory: ./wsi_test #> ℹ You can test the app with: shiny::runApp(\"./wsi_test\") fs::dir_tree(\"./wsi_test\") #> ./wsi_test #> ├── app.R #> └── oolong.RDS ## replace , , with the information from your profile on Shinyapps.io: click Your name -> Tokens rsconnect::setAccountInfo(name=\"\", token=\"\", secret=\"\") library(shiny) runApp(\"./wsi_test\")"},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"conduct-the-test","dir":"Articles","previous_headings":"","what":"Conduct the test","title":"Deploy","text":"can give URL coders conduct test browser online. difference deployed version , userid prompt download button coding. instruct coders download data file coding return . 2","code":""},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"revert","dir":"Articles","previous_headings":"","what":"Revert","title":"Deploy","text":"can obtain locked oolong object original oolong downloaded data file. revert_oolong verifications original oolong object make sure error cheating.","code":"revert_oolong(wsi_test, \"oolong_2021-05-22 20 51 26 Hadley Wickham.RDS\") #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Hadley Wickham #> ℹ WSI: n = 10, 10 coded. #> #> ── Results: ── #> #> ℹ 80% precision (WSI)"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"Overview","text":"package constantly changing, suggest using development version GitHub: can also install “stable” (slightly older) version CRAN:","code":"# install.packages(\"devtools\") devtools::install_github(\"chainsawriot/oolong\") install.packages(\"oolong\")"},{"path":[]},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"word-intrusion-test","dir":"Articles","previous_headings":"Validating Topic Models","what":"Word intrusion test","title":"Overview","text":"abstracts_seededlda example topic model trained data abstracts using seededlda package. Currently, package supports structural topic models / correlated topic models stm, Warp LDA models text2vec , LDA/CTM models topicmodels, Biterm Topic Models BTM, Keyword Assisted Topic Models keyATM, seeded LDA models seededlda. Although strictly topic model, Naive Bayes models quanteda.textmodels also supported. See section Naive Bayes information. create oolong test word intrusion test, use function wi. recommended provide user id coder going test. instructed, use method $do_word_intrusion_test() start coding. can pause test clicking “Exit” button. progress recorded object. want save progress, just save object (e.g. saveRDS(oolong_test, \"oolong_test.RDS\")). resume test, launch test . coding (items coded), need press “Exit” button quit coding interface lock test. , can look model precision printing oolong test.","code":"library(oolong) library(seededlda) #> Loading required package: quanteda #> Package version: 4.1.0 #> Unicode version: 14.0 #> ICU version: 70.1 #> Parallel computing: disabled #> See https://quanteda.io for tutorials and examples. #> Loading required package: proxyC #> #> Attaching package: 'proxyC' #> The following object is masked from 'package:stats': #> #> dist #> #> Attaching package: 'seededlda' #> The following object is masked from 'package:quanteda': #> #> info_tbb #> The following object is masked from 'package:stats': #> #> terms library(quanteda) library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union abstracts_seededlda #> #> Call: #> lda(x = x, k = k, label = label, max_iter = max_iter, alpha = alpha, #> beta = beta, seeds = seeds, words = NULL, verbose = verbose) #> #> 10 topics; 2,500 documents; 3,908 features. oolong_test <- wi(abstracts_seededlda, userid = \"Hadley\") oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$lock()>: finalize and see the results oolong_test$do_word_intrusion_test() oolong_test$lock() oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 10 coded. #> #> ── Results: ── #> #> ℹ 90% precision"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"word-set-intrusion-test","dir":"Articles","previous_headings":"Validating Topic Models","what":"Word set intrusion test","title":"Overview","text":"Word set intrusion test variant word intrusion test (Ying et al., 2021), multiple word sets generated top terms one topic juxtaposed one intruder word set generated similarly another topic. Ying et al., test called “R4WSI” 4 word sets displayed. default, oolong generates also R4WSI. However, also possible generate R(N)WSI setting parameter n_correct_ws N - 1. Use method $do_word_set_intrusion_test() start coding.","code":"oolong_test <- wsi(abstracts_seededlda, userid = \"Garrett\") oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 0 coded. #> #> ── Methods ── #> #> • <$do_word_set_intrusion_test()>: do word set intrusion test #> • <$lock()>: finalize and see the results oolong_test$do_word_set_intrusion_test() oolong_test$lock() oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 10 coded. #> #> ── Results: ── #> #> ℹ 90% precision (WSI)"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"topic-intrusion-test","dir":"Articles","previous_headings":"Validating Topic Models","what":"Topic intrusion test","title":"Overview","text":"example, abstracts_seededlda generated corpus abstracts$text Creating oolong test object corpus used training topic model generate topic intrusion test cases. Similarly, use $do_topic_intrusion_test code test cases, lock test $lock() can look TLO (topic log odds) value printing oolong test.","code":"library(tibble) abstracts #> # A tibble: 2,500 × 1 #> text #> #> 1 This study explores the benefits and risks featured in medical tourism broke… #> 2 This article puts forth the argument that with the transfer of stock trading… #> 3 The purpose of this study was to evaluate the effect the visual fidelity of … #> 4 Among the many health issues relevant to college students, overconsumption o… #> 5 This address, delivered at ICA's 50th anniversary conference, calls on the a… #> 6 The Internet has often been used to reach men who have sex with men (MSMs) i… #> 7 This article argues that the literature describing the internet revolution i… #> 8 This research study examined Bud Goodall's online health narrative as a case… #> 9 Information technology and new media allow for collecting and sharing person… #> 10 Using a national, telephone survey of 1,762 adolescents aged 12-17 years, th… #> # ℹ 2,490 more rows oolong_test <- ti(abstracts_seededlda, abstracts$text, userid = \"Julia\") oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 0 coded. #> #> ── Methods ── #> #> • <$do_topic_intrusion_test()>: do topic intrusion test #> • <$lock()>: finalize and see the results oolong_test$do_topic_intrusion_test() oolong_test$lock() oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 25 coded. #> #> ── Results: ── #> #> ℹ TLO: -0.187"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"suggested-workflow","dir":"Articles","previous_headings":"Validating Topic Models","what":"Suggested workflow","title":"Overview","text":"test makes sense one coder involved. suggested workflow create test, clone oolong object. Ask multiple coders test(s) summarize results. Preprocess create document-feature matrix Train topic model. Create new oolong object. Clone oolong object used raters. Ask different coders code object lock object. Get summary two objects.","code":"tokens(abstracts$text, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, remove_url = TRUE, spilit_hyphens = TRUE) %>% tokens_wordstem %>% tokens_remove(stopwords(\"en\")) %>% dfm(tolower = TRUE) %>% dfm_trim(min_docfreq = 3, max_docfreq = 500) %>% dfm_select(min_nchar = 3, pattern = \"^[a-zA-Z]+$\", valuetype = \"regex\") -> abstracts_dfm require(seededlda) abstracts_seededlda <- textmodel_seededlda(x = abstracts_dfm, dictionary = dictionary(abstracts_dictionary), seeds = 46709394, verbose = TRUE) oolong_test_rater1 <- witi(abstracts_seededlda, abstracts$text, userid = \"Yihui\") oolong_test_rater2 <- clone_oolong(oolong_test_rater1, userid = \"Jenny\") ## Let Yihui do the test. oolong_test_rater1$do_word_intrusion_test() oolong_test_rater1$do_topic_intrusion_test() oolong_test_rater1$lock() ## Let Jenny do the test. oolong_test_rater2$do_word_intrusion_test() oolong_test_rater2$do_topic_intrusion_test() oolong_test_rater2$lock() summarize_oolong(oolong_test_rater1, oolong_test_rater2) #> #> ── Summary (topic model): ────────────────────────────────────────────────────── #> #> ── Word intrusion test ── #> #> ℹ Mean model precision: 0.25 #> ℹ Quantiles of model precision: 0.2, 0.225, 0.25, 0.275, 0.3 #> ℹ P-value of the model precision #> (H0: Model precision is not better than random guess): 0.3656 #> ℹ Krippendorff's alpha: 0.747 #> ℹ K Precision: #> 0, 0.5, 1, 0, 0, 0, 0, 0, 0, 1 #> #> ── Topic intrusion test ── #> #> ℹ Mean TLO: -1.5 #> ℹ Median TLO: -1.12 #> ℹ Quantiles of TLO: -4.71, -2.83, -1.12, 0, 0 #> ℹ P-Value of the median TLO #> (H0: Median TLO is not better than random guess): 0.114"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"about-the-p-values","dir":"Articles","previous_headings":"Validating Topic Models","what":"About the p-values","title":"Overview","text":"test model precision (MP) based one-tailed, one-sample binomial test rater. multiple-rater situation, p-values raters combined using Fisher’s method (.k.. Fisher’s omnibus test). H0: MP better 1/ (n_top_terms + 1) H1: MP better 1/ (n_top_terms + 1) test median TLO based permutation test. H0: Median TLO better random guess. H1: Median TLO better random guess. One must notice two statistical tests testing bear minimum. significant test indicates topic model can make rater(s) perform better random guess. indication good topic interpretability. Also, one use conservative significant level, e.g. α<0.001\\alpha < 0.001.","code":""},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"about-biterm-topic-model","dir":"Articles","previous_headings":"","what":"About Biterm Topic Model","title":"Overview","text":"Please refer vignette BTM.","code":""},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"about-naive-bayes","dir":"Articles","previous_headings":"","what":"About Naive Bayes","title":"Overview","text":"Naive Bayes model supervised machine learning model. package supports Naive Bayes models trained using quanteda.textmodels. Suppose newsgroup_nb Naive Bayes model trained subset classic [20 newsgroups] dataset. can still generate word intrusion word set intrusion tests.","code":"tokens(newsgroup5$text, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, remove_url = TRUE, spilit_hyphens = TRUE) %>% tokens_wordstem %>% tokens_remove(stopwords(\"en\")) %>% dfm(tolower = TRUE) %>% dfm_trim(min_termfreq = 3, max_docfreq = 0.06, docfreq_type = \"prop\") -> newsgroup_dfm docvars(newsgroup_dfm, \"group\") <- newsgroup5$title newsgroup_nb <- textmodel_nb(newsgroup_dfm, docvars(newsgroup_dfm, \"group\"), distribution = \"Bernoulli\") wi(newsgroup_nb) #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 20, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$lock()>: finalize and see the results wsi(newsgroup_nb) #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 20, 0 coded. #> #> ── Methods ── #> #> • <$do_word_set_intrusion_test()>: do word set intrusion test #> • <$lock()>: finalize and see the results"},{"path":[]},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"creating-gold-standard","dir":"Articles","previous_headings":"Validating Dictionary-based Methods","what":"Creating gold standard","title":"Overview","text":"trump2k dataset 2,000 tweets @realdonaldtrump. example, interested studying sentiment tweets. One can use tools AFINN automatically extract sentiment tweets. However, oolong recommends generate gold standard human coding first using subset. default, oolong selects 1% origin corpus test cases. parameter construct adjective, e.g. positive, liberal, populistic, etc. instructed, use method $do_gold_standard_test() start coding. coding, need first lock test $turn_gold() method available.","code":"tibble(text = trump2k) #> # A tibble: 2,000 × 1 #> text #> #> 1 \"In just out book, Secret Service Agent Gary Byrne doesn't believe that Croo… #> 2 \"Hillary Clinton has announced that she is letting her husband out to campai… #> 3 \"\\\"@TheBrodyFile: Always great to visit with @TheBrodyFile one-on-one with \\… #> 4 \"Explain to @brithume and @megynkelly, who know nothing, that I will beat Hi… #> 5 \"Nobody beats me on National Security. https://t.co/sCrj4Ha1I5\" #> 6 \"\\\"@realbill2016: @realDonaldTrump @Brainykid2010 @shl Trump leading LA Time… #> 7 \"\\\"@teapartynews: Trump Wins Tea Party Group's 'Nashville Straw Poll' - News… #> 8 \"Big Republican Dinner tonight at Mar-a-Lago in Palm Beach. I will be there!\" #> 9 \".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It … #> 10 \"\\\"@brianstoya: @realDonaldTrump For POTUS #2016\\\"\" #> # ℹ 1,990 more rows oolong_test <- gs(input_corpus = trump2k, construct = \"positive\", userid = \"Joe\") oolong_test #> #> ── oolong (gold standard generation) ─────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 0 coded. #> ℹ Construct: positive. #> #> ── Methods ── #> #> • <$do_gold_standard_test()>: generate gold standard #> • <$lock()>: finalize this object and see the results oolong_test$do_gold_standard_test() oolong_test$lock() oolong_test #> #> ── oolong (gold standard generation) ─────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 20 coded. #> ℹ Construct: positive. #> #> ── Methods ── #> #> • <$turn_gold()>: convert the test results into a quanteda corpus"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"example-validating-afinn-using-the-gold-standard","dir":"Articles","previous_headings":"Validating Dictionary-based Methods","what":"Example: Validating AFINN using the gold standard","title":"Overview","text":"locked oolong test can converted quanteda-compatible corpus analysis. corpus contains two docvars, ‘answer’. example, calculate AFINN score tweet using quanteda. dictionary afinn bundle package. Put back vector AFINN score respective docvars study correlation gold standard AFINN.","code":"oolong_test$turn_gold() #> Corpus consisting of 20 documents and 1 docvar. #> text1 : #> \"Thank you Eau Claire, Wisconsin. #VoteTrump on Tuesday, Apr...\" #> #> text2 : #> \"\"@bobby990r_1: @realDonaldTrump would lead polls the second ...\" #> #> text3 : #> \"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump...\" #> #> text4 : #> \"Thank you for a great afternoon Birmingham, Alabama! #Trump2...\" #> #> text5 : #> \"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 ht...\" #> #> text6 : #> \"People believe CNN these days almost as little as they belie...\" #> #> [ reached max_ndoc ... 14 more documents ] #> ℹ Access the answer from the coding with quanteda::docvars(obj, 'answer') gold_standard <- oolong_test$turn_gold() gold_standard %>% tokens(remove_punct = TRUE) %>% dfm() %>% dfm_lookup(afinn) %>% quanteda::convert(to = \"data.frame\") %>% mutate(matching_word_valence = (neg5 * -5) + (neg4 * -4) + (neg3 * -3) + (neg2 * -2) + (neg1 * -1) + (zero * 0) + (pos1 * 1) + (pos2 * 2) + (pos3 * 3) + (pos4 * 4) + (pos5 * 5), base = ntoken(gold_standard, remove_punct = TRUE), afinn_score = matching_word_valence / base) %>% pull(afinn_score) -> all_afinn_score all_afinn_score #> text1 text2 text3 text4 text5 text6 #> 0.33333333 -0.09090909 -0.16666667 0.45454545 0.00000000 0.00000000 #> text7 text8 text9 text10 text11 text12 #> 0.16666667 0.38461538 0.00000000 0.38461538 -0.29166667 0.00000000 #> text13 text14 text15 text16 text17 text18 #> 0.50000000 0.07142857 0.00000000 -0.12000000 0.28571429 0.16000000 #> text19 text20 #> 0.36842105 0.38888889 summarize_oolong(oolong_test, target_value = all_afinn_score) #> New names: #> `geom_smooth()` using formula = 'y ~ x' #> `geom_smooth()` using formula = 'y ~ x' #> #> ── Summary (gold standard generation): #> ───────────────────────────────────────── #> ℹ Correlation: 0.718 (p = 4e-04) #> ℹ Effect of content length: -0.323 (p = 0.1643) #> • `` -> `...1`"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"suggested-workflow-1","dir":"Articles","previous_headings":"Validating Dictionary-based Methods","what":"Suggested workflow","title":"Overview","text":"Create oolong object, clone another coder. According Song et al. (2020), least draw 1% data. Instruct two coders code tweets lock objects. Calculate target value (case, AFINN score) turning one object corpus. Summarize oolong objects target value. Read results. diagnostic plot consists 4 subplots. good idea read Bland & Altman (1986) difference correlation agreement. Subplot (top left): Raw correlation human judgement target value. One want good correlation two. Subplot (top right): Bland-Altman plot. One want correlation. Also, dots randomly scattering around mean value. , two measurements (human judgement target value) good agreement. Subplot (bottom left): Raw correlation target value content length. One want correlation, indication good reliability influence content length. (See Chan et al. 2020) Subplot (bottom right): Cook’s distance data point. One want dot (least dots) threshold. indication raw correlation human judgement target value can influenced extreme values data. textual output contains Krippendorff’s alpha codings raters. order claim validity target value, must first establish reliability gold standard. Song et al. (2020) suggest Krippendorff’s Alpha > 0.7 acceptable cut-.","code":"trump <- gs(input_corpus = trump2k, exact_n = 40, userid = \"JJ\") trump2 <- clone_oolong(trump, userid = \"Winston\") trump$do_gold_standard_test() trump2$do_gold_standard_test() trump$lock() trump2$lock() gold_standard <- trump$turn_gold() gold_standard %>% tokens(remove_punct = TRUE) %>% dfm() %>% dfm_lookup(afinn) %>% quanteda::convert(to = \"data.frame\") %>% mutate(matching_word_valence = (neg5 * -5) + (neg4 * -4) + (neg3 * -3) + (neg2 * -2) + (neg1 * -1) + (zero * 0) + (pos1 * 1) + (pos2 * 2) + (pos3 * 3) + (pos4 * 4) + (pos5 * 5), base = ntoken(gold_standard, remove_punct = TRUE), afinn_score = matching_word_valence / base) %>% pull(afinn_score) -> target_value res <- summarize_oolong(trump, trump2, target_value = target_value) #> New names: #> `geom_smooth()` using formula = 'y ~ x' #> `geom_smooth()` using formula = 'y ~ x' #> • `` -> `...1` #> • `` -> `...2` res #> #> ── Summary (gold standard generation): ───────────────────────────────────────── #> ℹ Krippendorff's Alpha: 0.931 #> ℹ Correlation: 0.744 (p = 2e-04) #> ℹ Effect of content length: -0.323 (p = 0.1643) plot(res)"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"backward-compatibility","dir":"Articles","previous_headings":"","what":"Backward compatibility","title":"Overview","text":"Historically, oolong test objects generated one function: create_oolong. longer case longer recommended anymore. still retained backward compatibility purposes. still need use create_oolong(), important parameters input_model input_corpus. Setting NULL generates different tests.","code":""},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"references","dir":"Articles","previous_headings":"","what":"References","title":"Overview","text":"Chang, J., Gerrish, S., Wang, C., Boyd-Graber, J. L., & Blei, D. M. (2009). Reading tea leaves: humans interpret topic models. Advances neural information processing systems (pp. 288-296). link Ying, L., Montgomery, J. M., & Stewart, B. M. (2021). Inferring concepts topics: Towards procedures validating topics measures. Political Analysis. link Song et al. (2020) validations trust? impact imperfect human annotations gold standard quality validation automated content analysis. Political Communication. link Bland, J. M., & Altman, D. (1986). Statistical methods assessing agreement two methods clinical measurement. lancet, 327(8476), 307-310. Chan et al. (2020) Four best practices measuring news sentiment using ‘--shelf’ dictionaries: large-scale p-hacking experiment. Computational Communication Research. link Nielsen, F. Å. (2011). new ANEW: Evaluation word list sentiment analysis microblogs. arXiv preprint arXiv:1103.2903. link","code":""},{"path":"https://gesistsa.github.io/oolong/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chung-hong Chan. Author, maintainer. Marius Sältzer. Author.","code":""},{"path":"https://gesistsa.github.io/oolong/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan C, Sältzer M (2020). “oolong: R package validating automated content analysis tools.” Journal Open Source Software, 5(55), 2461. doi:10.21105/joss.02461, https://github.com/gesistsa/oolong.","code":"@Article{, title = {oolong: An R package for validating automated content analysis tools}, journal = {Journal of Open Source Software}, author = {Chung-hong Chan and Marius Sältzer}, doi = {10.21105/joss.02461}, url = {https://github.com/gesistsa/oolong}, volume = {5}, number = {55}, pages = {2461}, year = {2020}, }"},{"path":"https://gesistsa.github.io/oolong/index.html","id":"oolong-","dir":"","previous_headings":"","what":"Create Validation Tests for Automated Content Analysis","title":"Create Validation Tests for Automated Content Analysis","text":"goal oolong [1] generate administrate validation tests easily typical automated content analysis tools topic models dictionary-based tools. Please refer overview introduction package. need deploy test online, please refer Deployment Vignette. use BTM, please refer BTM Vignette.","code":""},{"path":"https://gesistsa.github.io/oolong/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Create Validation Tests for Automated Content Analysis","text":"Please cite package : Chan C-h. & Sältzer M., (2020). oolong: R package validating automated content analysis tools. Journal Open Source Software, 5(55), 2461, https://doi.org/10.21105/joss.02461 BibTeX entry, use output citation(package = \"oolong\").","code":""},{"path":"https://gesistsa.github.io/oolong/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Create Validation Tests for Automated Content Analysis","text":"Contributions form feedback, comments, code, bug report welcome. Fork source code, modify, issue pull request. Issues, bug reports: File Github issue.","code":""},{"path":"https://gesistsa.github.io/oolong/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Create Validation Tests for Automated Content Analysis","text":"Please note oolong project released Contributor Code Conduct. contributing project, agree abide terms. /ˈuːlʊŋ/ 烏龍, literally means “Dark Dragon”, semi-oxidized tea Asia. popular Taiwan, Japan Hong Kong. Cantonese Taiwanese Mandarin, word can also mean “confused”. perfectly captures spirit human---loop validation.","code":""},{"path":[]},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"description","dir":"","previous_headings":"","what":"Description","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Intended create standard human---loop validity tests typical automated content analysis topic modeling dictionary-based methods. package offers standard workflow functions prepare, administer evaluate human---loop validity test. package provides functions validating topic models using word intrusion, topic intrusion (Chang et al. 2009, https://papers.nips.cc/paper/3700-reading-tea-leaves--humans-interpret-topic-models) word set intrusion (Ying et al. 2021) doi:10.1017/pan.2021.33 tests. package also provides functions generating gold-standard data useful validating dictionary-based methods. default settings generated tests match suggested Chang et al. (2009) Song et al. (2020) doi:10.1080/10584609.2020.1723752.","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"keywords","dir":"","previous_headings":"","what":"Keywords","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Validity Text Analysis Topic Model","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"science-usecases","dir":"","previous_headings":"","what":"Science Usecase(s)","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"package used literature valid topic models prediction models trained text data, e.g. Rauchfleisch et al. (2023), Rothut, et al. (2023), Eisele, et al. (2023).","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"repository-structure","dir":"","previous_headings":"","what":"Repository structure","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"repository follows standard structure R package.","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"environment-setup","dir":"","previous_headings":"","what":"Environment Setup","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"R installed:","code":"install.packages(\"oolong\")"},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"input-data","dir":"","previous_headings":"","what":"Input Data","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"input data topic model prediction model trained text data. example, one can train topic model text data (tweets Donald trump) included package :","code":"library(seededlda) library(quanteda) trump_corpus <- corpus(trump2k) tokens(trump_corpus, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, split_hyphens = TRUE, remove_url = TRUE) %>% tokens_tolower() %>% tokens_remove(stopwords(\"en\")) %>% tokens_remove(\"@*\") -> trump_toks model <- textmodel_lda(x = dfm(trump_toks), k = 8, verbose = TRUE)"},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"sample-input-and-output-data","dir":"","previous_headings":"","what":"Sample Input and Output Data","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"sample input model trained text data, e.g. sample output oolong R6 object.","code":"library(oolong) library(seededlda) abstracts_seededlda Call: lda(x = x, k = k, label = label, max_iter = max_iter, alpha = alpha, beta = beta, seeds = seeds, words = NULL, verbose = verbose) 10 topics; 2,500 documents; 3,908 features."},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"how-to-use","dir":"","previous_headings":"","what":"How to Use","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Please refer overview package comprehensive introduction test types. Suppose topic model trained text data called abstracts_seededlda, included package. Suppose one like conduct word intrusion test (Chang et al. 2009) validate topic model. test can generated wi() function. One can conduct test following instruction displayed, .e. oolong_test$$do_word_intrusion_test(). One see graphic interface like following conduct test. test, one can finalize test locking test. obtain result test. example:","code":"library(oolong) abstracts_seededlda Call: lda(x = x, k = k, label = label, max_iter = max_iter, alpha = alpha, beta = beta, seeds = seeds, words = NULL, verbose = verbose) 10 topics; 2,500 documents; 3,908 features. oolong_test <- wi(abstracts_seededlda, userid = \"Hadley\") oolong_test ── oolong (topic model) ──────────────────────────────────────────────────────── ✔ WI ✖ TI ✖ WSI ☺ Hadley ℹ WI: k = 10, 0 coded. ── Methods ── • <$do_word_intrusion_test()>: do word intrusion test • <$lock()>: finalize and see the results oolong_test$do_word_intrusion_test() oolong_test$lock() oolong_test ── oolong (topic model) ──────────────────────────────────────────────────────── ✔ WI ✖ TI ✖ WSI ☺ Hadley ℹ WI: k = 10, 10 coded. ── Results: ── ℹ 90% precision"},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"contact-details","dir":"","previous_headings":"","what":"Contact Details","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Maintainer: Chung-hong Chan chainsawtiney@gmail.com Issue Tracker: https://github.com/gesistsa/oolong/issues","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"publication","dir":"","previous_headings":"","what":"Publication","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Chan, C. H., & Sältzer, M. (2020). oolong: R package validating automated content analysis tools. Journal Open Source Software: JOSS, 5(55), 2461. https:://doi.org/10.21105/joss.02461","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":null,"dir":"Reference","previous_headings":"","what":"Abstracts of communication journals dataset — abstracts","title":"Abstracts of communication journals dataset — abstracts","text":"random sample abstracts papers published high-impact communication journals 2000 2017. abstracts_dictionary list terms can used semisupervised techniques keyATM.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Abstracts of communication journals dataset — abstracts","text":"","code":"abstracts abstracts_dfm abstracts_dictionary"},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Abstracts of communication journals dataset — abstracts","text":"object class tbl_df (inherits tbl, data.frame) 2500 rows 1 columns. object class dfm 2500 rows 3998 columns. object class list length 10.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Abstracts of communication journals dataset — abstracts","text":"Chan, C-h, & Grill, C. (2020). [Highs Communication Research: Research Topics High Supply, High Popularity, High Prestige High-Impact Journals.](https://doi.org/10.1177/0093650220944790) Communication Research.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts_seededlda.html","id":null,"dir":"Reference","previous_headings":"","what":"Topic models trained with the abstracts dataset. — abstracts_seededlda","title":"Topic models trained with the abstracts dataset. — abstracts_seededlda","text":"topic models trained different topic model packages.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts_seededlda.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Topic models trained with the abstracts dataset. — abstracts_seededlda","text":"","code":"abstracts_seededlda abstracts_btm"},{"path":"https://gesistsa.github.io/oolong/reference/abstracts_seededlda.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Topic models trained with the abstracts dataset. — abstracts_seededlda","text":"object class textmodel_lda (inherits textmodel, list) length 10. object class BTM length 9.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":null,"dir":"Reference","previous_headings":"","what":"AFINN dictionary — afinn","title":"AFINN dictionary — afinn","text":"AFINN sentiment dictionary quanteda::dictionary format.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"AFINN dictionary — afinn","text":"","code":"afinn"},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"AFINN dictionary — afinn","text":"object class dictionary2 length 11.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"AFINN dictionary — afinn","text":"Nielsen, F. Å. (2011). new ANEW: Evaluation word list sentiment analysis microblogs. arXiv preprint arXiv:1103.2903.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Check whether the oolong needs to be updated — check_oolong","title":"Check whether the oolong needs to be updated — check_oolong","text":"function raises error input oolong object needs updated. Oolong objects generated old version oolong need updated use functionalities recent versions oolong.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check whether the oolong needs to be updated — check_oolong","text":"","code":"check_oolong(oolong, verbose = FALSE)"},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check whether the oolong needs to be updated — check_oolong","text":"oolong oolong object checked verbose, logical, display messages","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check whether the oolong needs to be updated — check_oolong","text":"Nothing","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Check whether the oolong needs to be updated — check_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Clone an oolong object — clone_oolong","title":"Clone an oolong object — clone_oolong","text":"Clone new oolong object. oolong must locked ever coded.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clone an oolong object — clone_oolong","text":"","code":"clone_oolong(oolong, userid = NA)"},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clone an oolong object — clone_oolong","text":"oolong oolong object. userid character string denote name coder","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clone an oolong object — clone_oolong","text":"oolong object","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Clone an oolong object — clone_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Generate an oolong test — create_oolong","title":"Generate an oolong test — create_oolong","text":"create_oolong generates oolong test object can either used validating topic model creating ground truth (gold standard) text corpus. wi (word intrusion test), ti (topic intrusion test), witi (word topic intrusion tests), wsi (word set intrusion test) gs handy wrappers create_oolong. recommended use wrappers instead create_oolong.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate an oolong test — create_oolong","text":"","code":"create_oolong( input_model = NULL, input_corpus = NULL, n_top_terms = 5, bottom_terms_percentile = 0.6, exact_n = NULL, frac = 0.01, n_top_topics = 3, n_topiclabel_words = 8, use_frex_words = FALSE, frexweight = 0.5, input_dfm = NULL, construct = \"positive\", btm_dataframe = NULL, n_correct_ws = 3, wsi_n_top_terms = 20, userid = NA, type = \"witi\", lambda = 1, difficulty = NULL ) wi( input_model = NULL, userid = NA, n_top_terms = 5, bottom_terms_percentile = 0.6, frexweight = 0.5, use_frex_words = FALSE, lambda = 1, difficulty = NULL ) witi( input_model = NULL, input_corpus = NULL, userid = NA, n_top_terms = 5, bottom_terms_percentile = 0.6, exact_n = NULL, frac = 0.01, n_top_topics = 3, n_topiclabel_words = 8, frexweight = 0.5, use_frex_words = FALSE, input_dfm = NULL, btm_dataframe = NULL, lambda = 1, difficulty = NULL ) ti( input_model = NULL, input_corpus = NULL, userid = NA, exact_n = NULL, frac = 0.01, n_top_topics = 3, n_topiclabel_words = 8, frexweight = 0.5, use_frex_words = FALSE, input_dfm = NULL, btm_dataframe = NULL, lambda = 1, difficulty = NULL ) wsi( input_model = NULL, userid = NA, n_topiclabel_words = 4, n_correct_ws = 3, wsi_n_top_terms = 20, frexweight = 0.5, use_frex_words = FALSE, lambda = 1, difficulty = NULL ) gs( input_corpus = NULL, userid = NA, construct = \"positive\", exact_n = NULL, frac = 0.01 )"},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generate an oolong test — create_oolong","text":"input_model (wi, ti, witi, wsi) STM, WarpLDA, topicmodels, KeyATM, seededlda, textmodel_nb, BTM object; NULL, create_oolong assumes want create gold standard. input_corpus (wi, ti, witi, wsi, gs) input_model null, corpus (character vector quanteda::corpus object) generate model object. input_model input_corpus NULL, topic intrusion test cases generated. input_model BTM object, argument ignored. input_model null, generates gold standard test cases. n_top_terms (wi, witi) integer, number top topic words included candidates word intrusion test. bottom_terms_percentile (wi, witi) double, term considered word intruder theta less percentile theta, must within range 0 1 exact_n (ti, witi, gs) integer, number topic intrusion test cases generate, ignore frac NULL frac (ti, witi, gs) double, fraction test cases generated corpus n_top_topics (wi, witi) integer, number relevant topics shown alongside intruder topic n_topiclabel_words (witi, ti, wsi) integer, number topic words shown topic (\"ti\" \"witi\") / word set (\"wsi\") label use_frex_words (wi, witi, ti, wsi) logical, STM object, use FREX words TRUE, use PROB words FALSE frexweight (wi, witi, ti, wsi) double, adjust `frexweight` STM (see [stm::labelTopics()]), effect STM use_frex_words FALSE input_dfm (wi, witi, ti, wsi) dfm object used training input_model, input_model WarpLDA object construct (gs) string, adjective describe construct want coders code gold standard test cases btm_dataframe (witi, ti) dataframe used training input_model, input_model BTM object n_correct_ws (wsi) number word sets shown alongside intruder word set wsi_n_top_terms (wsi) number top topic words topic randomized selected word set label userid character string denote name coder. Default NA (userid); recommended type (create_oolong) character string denote want create. \"wi\": word intrusion test; \"ti\": topic intrusion test; \"witi\": word intrusion test topic intrusion test; \"gs\": gold standard generation lambda (wi, witi, ti, wsi) double, adjust `lambda` WarpLDA (see [text2vec::LatentDirichletAllocation()]) difficulty (wi, witi, ti, wsi) double, deprecated, backward compatibility","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generate an oolong test — create_oolong","text":"oolong test object.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate an oolong test — create_oolong","text":"Use wi, ti, witi, wsi gs generate oolong test choice. recommended supply also userid (current coder). names tests (word intrusion test topic intrusion test) follow Chang et al (2009). Ying et al. (2021), topic intrusion test named \"T8WSI\" (Top 8 Word Set Intrusion). Word set intrusion test package actually \"R4WSI\" (Random 4 Word Set Intrusion) Ying et al. default settings wi, witi, ti follow Chang et al (2009), e.g. n_top_terms = 5; instead n_top_terms = 4 Ying et al. default setting wsi follows Ying et al., e.g. n_topiclabel_words = 4. suggested Song et al. (2020), 1","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"about-create-oolong","dir":"Reference","previous_headings":"","what":"About create_oolong","title":"Generate an oolong test — create_oolong","text":"create_oolong intuitive use, longer recommended use create_oolong generate oolong test. create_oolong retained backward compatibility purposes. function generates oolong test object based input_model input_corpus. input_model NULL, generates oolong test topic model (tm). input_model NULL input_corpus NULL, generates oolong test generating gold standard (gs).","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Generate an oolong test — create_oolong","text":"oolong object, depends purpose, following methods: $do_word_intrusion_test() (tm) launch shiny-based word intrusion test. coder find intruder word related words. $do_topic_intrusion_test() (tm) launch shiny-based topic intrusion test. coder find intruder topic least likely topic document. $do_word_set_intrusion_test() (tm) launch shiny-based word set intrusion test. coder find intruder word set related word sets. $do_gold_standard_test() (gs) launch shiny-based test generating gold standard. coder determine level predetermined constructs 5-point Likert scale. $lock(force = FALSE) (gs/tm) lock object changed anymore. enables summarize_oolong following method. $turn_gold() (gs) convert oolong object quanteda compatible corpus. details, please see overview vignette: vignette(\"overview\", package = \"oolong\")","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Generate an oolong test — create_oolong","text":"Chang, J., Gerrish, S., Wang, C., Boyd-Graber, J. L., & Blei, D. M. (2009). Reading tea leaves: humans interpret topic models. Advances neural information processing systems (pp. 288-296). Song et al. (2020) validations trust? impact imperfect human annotations gold standard quality validation automated content analysis. Political Communication. Ying, L., Montgomery, J. M., & Stewart, B. M. (2021). Topics, Concepts, Measurement: Crowdsourced Procedure Validating Topics Measures. Political Analysis","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Generate an oolong test — create_oolong","text":"Chung-hong Chan, Marius Sältzer","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generate an oolong test — create_oolong","text":"","code":"## Creation of oolong test with only word intrusion test data(abstracts_seededlda) data(abstracts) oolong_test <- wi(input_model = abstracts_seededlda, userid = \"Hadley\") ## Creation of oolong test with both word intrusion test and topic intrusion test oolong_test <- witi(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"Julia\") ## Creation of oolong test with topic intrusion test oolong_test <- ti(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"Jenny\") ## Creation of oolong test with word set intrusion test oolong_test <- wsi(input_model = abstracts_seededlda, userid = \"Garrett\") ## Creation of gold standard oolong_test <- gs(input_corpus = trump2k, userid = \"Yihui\") ## Using create_oolong(); not recommended oolong_test <- create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"JJ\") oolong_test <- create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"Mara\", type = \"ti\") oolong_test <- create_oolong(input_corpus = abstracts$text, userid = \"Winston\", type = \"gs\")"},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Deploy an oolong test — deploy_oolong","title":"Deploy an oolong test — deploy_oolong","text":"time, use function. write deployable version app directory using export_oolong instead. Please refer vignette(\"deploy\", package = \"oolong\") details.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Deploy an oolong test — deploy_oolong","text":"","code":"deploy_oolong(oolong)"},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Deploy an oolong test — deploy_oolong","text":"oolong oolong object deployed. Please note \"witi\" type, .e. oolong object word topic intrusion tests, deployed. Also object must locked ever coded.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Deploy an oolong test — deploy_oolong","text":"Nothing, launches deployable version coding interface","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Deploy an oolong test — deploy_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Deploy an oolong test — deploy_oolong","text":"","code":"# Please try this example in interactive R sessions only. if (interactive()) { data(abstracts_stm) x <- wi(abstracts_stm) deploy_oolong(x) }"},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"function exports oolong test launched Shiny app ideal online deployment. Deploying Shiny app online allows coders conduct test online browser, rather install R computer. contrast testing interfaces launched methods $do_word_intrusion_test(), deployable version provides data download coder finished coding. Downloaded data can revert back locked oolong object using revert_oolong. version might provide solutions permanent storage. deployable Shiny app directory. Shiny app launchable shiny::runApp() deployable rsconnect::deployApp(). Please refer vignette(\"deploy\", package = \"oolong\") details.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"","code":"export_oolong( oolong, dir = base::tempdir(), verbose = TRUE, use_full_path = TRUE )"},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"oolong oolong object exported. Please note \"witi\" type, .e. oolong object word topic intrusion tests, exported. Also object must locked ever coded. dir character string, directory exported. Default temporary directory verbose logical, whether display information exporting use_full_path logical, whether expand dir full path","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"directory exported, invisible","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"","code":"# Please try this example in interactive R sessions only. if (interactive()) { data(abstracts_stm) x <- wi(abstracts_stm) export_oolong(x) }"},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":null,"dir":"Reference","previous_headings":"","what":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"Naive Bayes model (class 'textmodel_nb') trained 20 newsgroups data.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"","code":"newsgroup_nb"},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"object class textmodel_nb (inherits textmodel, list) length 7.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"Lang, K. (1995). Newsweeder: Learning filter netnews. Machine Learning Proceedings 1995 (pp. 331-339). Morgan Kaufmann.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":null,"dir":"Reference","previous_headings":"","what":"Print oolong gold standard object — print.oolong_gold_standard","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"function prints summary oolong gold standard object. oolong gold standard object result $turn_gold() method. quanteda::corpus compatible object.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"","code":"# S3 method for class 'oolong_gold_standard' print(x, ...)"},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"x oolong gold standard object ... parameters","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"None, summary quanteda::corpus displayed","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":null,"dir":"Reference","previous_headings":"","what":"Print and plot oolong summary — print.oolong_summary","title":"Print and plot oolong summary — print.oolong_summary","text":"functions print plot useful summary results summarize_oolong. details, please see overview vignette: vignette(\"overview\", package = \"oolong\")","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Print and plot oolong summary — print.oolong_summary","text":"","code":"# S3 method for class 'oolong_summary' print(x, ...) # S3 method for class 'oolong_summary' plot(x, ...)"},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Print and plot oolong summary — print.oolong_summary","text":"x oolong_summary ... parameters","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Print and plot oolong summary — print.oolong_summary","text":"None","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"summary","dir":"Reference","previous_headings":"","what":"Summary","title":"Print and plot oolong summary — print.oolong_summary","text":"Print function displays following information: Mean model precision (wi, wsi) Higher value indicates better topic interpretability Quantiles model precision (wi) Higher value indicates better topic interpretability P-value model precision (wi) Model precision's p-value calculated one-sample binomial test Fisher's Omnibus method. Krippendorff's alpha (wi, wsi, gs) Krippendorff's Alpha, one oolong object analyzed. K Precision (wi, wsi) Model precision topic. Mean TLO (ti) Mean topic log odds, higher value indicates better interpretability Median TLO (ti) Median topic log odds, higher value indicates better interpretability Quantiles TLO (ti) Quantiles topic log odds P-Value median TLO (ti) Median topic log odds's p-value calculated permutation test. Correlation (average answer) (gs) Pearson's correlation average answer target value Corrlation (content length) (gs) Pearson's correlation content length target value","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"diagnostic-plot","dir":"Reference","previous_headings":"","what":"Diagnostic plot","title":"Print and plot oolong summary — print.oolong_summary","text":"Plot function displays diagnostic plot following subplots (gs ). Top left Correlation answer coders target value check correlation two values. axes minmax transformed. Top right Bland-altman plot answer coders target value check agreement two values. Bottom left Correlation target value content length check influence content length. Bottom right Cook's distance check influential observations.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Print and plot oolong summary — print.oolong_summary","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Obtain a locked oolong from a downloaded data file — revert_oolong","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"generate locked oolong object original oolong object RDS file. RDS file downloaded deployed Shiny app.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"","code":"revert_oolong(oolong, rds_file)"},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"oolong oolong object used deployment rds_file path downloaded RDS file","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"locked oolong object based data downloaded RDS file","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarize oolong objects — summarize_oolong","title":"Summarize oolong objects — summarize_oolong","text":"function summarizes one oolong objects. oolong objects must locked.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarize oolong objects — summarize_oolong","text":"","code":"summarize_oolong(..., target_value = NULL, n_iter = 1500) summarise_oolong(..., target_value = NULL, n_iter = 1500)"},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarize oolong objects — summarize_oolong","text":"... (tm/gs) one oolong objects summarized target_value (gs) vector numeric values, value want validate human-coded gold standard. One example target value sentiment score extracted automatically text n_iter (ti) number iterations calculate median test","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarize oolong objects — summarize_oolong","text":"oolong summary. Depends purpose, oolong summary object following values: $type (gs/tm) type analysis, either 'gs' 'tm' $kripp_alpha; $kripp_alpha_wsi (wi, wsi) Krippendorff's Alpha, one oolong object analyzed. $rater_precision; $rater_precision_wsi (wi, wsi) Model precision $res$rater_precision_p_value (wi) Model precision's p-value calculated one-sample binomial test Fisher's Omnibus method. $k_precision; $k_precision_wsi (wi, wsi) precision topic $tlo (ti) vector topic log odds $tlo_pvalue (ti) Median topic log odds's p-value calculated permutation test. $cor (gs) Pearson's correlation average answer target value $cor_length (gs) Pearson's correlation content length target value $diag_plot (gs) diagnostic plot. useful summary object can obtained either print.oolong_summary plot.oolong_summary. details, please see overview vignette: vignette(\"overview\", package = \"oolong\")","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Summarize oolong objects — summarize_oolong","text":"Chang, J., Gerrish, S., Wang, C., Boyd-Graber, J. L., & Blei, D. M. (2009). Reading tea leaves: humans interpret topic models. Advances neural information processing systems (pp. 288-296). Song et al. (2020) validations trust? impact imperfect human annotations gold standard quality validation automated content analysis. Political Communication. Ying, L., Montgomery, J. M., & Stewart, B. M. (2021). Topics, Concepts, Measurement: Crowdsourced Procedure Validating Topics Measures. Political Analysis.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Summarize oolong objects — summarize_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarize oolong objects — summarize_oolong","text":"","code":"# Please try this example in interactive R sessions only. if (interactive()) { data(abstracts_stm) oolong_test1 <- create_oolong(abstracts_stm) oolong_test2 <- clone_oolong(oolong_test1) oolong_test1$do_word_intrusion_test() oolong_test2$do_word_intrusion_test() oolong_test1$lock() oolong_test2$lock() summarize_oolong(oolong_test1, oolong_test2) }"},{"path":"https://gesistsa.github.io/oolong/reference/trump2k.html","id":null,"dir":"Reference","previous_headings":"","what":"Trump's tweets dataset — trump2k","title":"Trump's tweets dataset — trump2k","text":"random sample 2000 tweets @realdonaldtrump account assumption duty president United States.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/trump2k.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trump's tweets dataset — trump2k","text":"","code":"trump2k"},{"path":"https://gesistsa.github.io/oolong/reference/trump2k.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Trump's tweets dataset — trump2k","text":"object class character length 2000.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Update an oolong object to the latest version — update_oolong","title":"Update an oolong object to the latest version — update_oolong","text":"function update old oolong object latest version.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update an oolong object to the latest version — update_oolong","text":"","code":"update_oolong(oolong, verbose = TRUE)"},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update an oolong object to the latest version — update_oolong","text":"oolong oolong object updated verbose, logical, display messages","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Update an oolong object to the latest version — update_oolong","text":"updated oolong object","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Update an oolong object to the latest version — update_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-062-development","dir":"Changelog","previous_headings":"","what":"oolong 0.6.2 (development)","title":"oolong 0.6.2 (development)","text":"Add content MH Use icr calculation Krippendorff’s Alpha","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-061","dir":"Changelog","previous_headings":"","what":"oolong 0.6.1","title":"oolong 0.6.1","text":"CRAN release: 2024-04-15 Add fixes quanteda 4.0.0.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-060","dir":"Changelog","previous_headings":"","what":"oolong 0.6.0","title":"oolong 0.6.0","text":"CRAN release: 2024-02-10 Use seededlda instead keyATM demo, can reduce version requirement.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-051","dir":"Changelog","previous_headings":"","what":"oolong 0.5.1","title":"oolong 0.5.1","text":"Transfer ownership gesistsa. Add pkgdown website clean many documents.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-050","dir":"Changelog","previous_headings":"","what":"oolong 0.5.0","title":"oolong 0.5.0","text":"CRAN release: 2023-08-25 Potential breaking change: parameter difficulty deprecated. Instead, please use respective frewweight (STM) lambda (Warp LDA). legacy code explicitly using difficulty parameter, change break code. However, use following pattern, might need change legacy code accordingly. package-level documentation removed. Increase required R version 4.0 keyATM","code":"## This code is okay set.seed(123) wsi(abstracts_stm, use_frex_words = TRUE, difficulty = 0.8) set.seed(123) ## You will get different results with oolong 0.5.0 wsi(abstracts_stm, use_frex_words = TRUE) ## You need to explicitly use the old default, which is quite high set.seed(123) wsi(abstracts_stm, use_frex_words = TRUE, frexweight = 1)"},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-043","dir":"Changelog","previous_headings":"","what":"oolong 0.4.3","title":"oolong 0.4.3","text":"CRAN release: 2023-06-11 Upgrade Shiny test cases shinytest2 Clarify coding task can paused, saved, resumed Vignette Package maintenance","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-041","dir":"Changelog","previous_headings":"","what":"oolong 0.4.1","title":"oolong 0.4.1","text":"CRAN release: 2021-11-09 Eliminate miniUI dependency. Update documentation reflect newly published papers, e.g. Ying et al.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-040","dir":"Changelog","previous_headings":"","what":"oolong 0.4.0","title":"oolong 0.4.0","text":"CRAN release: 2021-05-31 Add export_oolong deploy_oolong online deployment [thanks Marius Sältzer, Daniel Braby (friend Louis), Johannes Gruber Felicia Loecherbach testing feature; thanks SAGE Ocean concept grant support development feature] Support models seededlda [thanks Marius Sältzer] Support Naive Bayes models quanteda.textmodels [thanks Marius Sältzer] Support generation word set intrusion test (Ying et al. forthcoming) Support generation oolong object topic intrusion test Add new wrappers: wi, ti, witi, wsi, gs Add userid suggested parameter Total revamp object oolong tests; add meta data. Add update_oolong updating object created older versions oolong Update print method oolong tests; now based cli Various bug fixes; Shiny components now automatically tested","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-0311","dir":"Changelog","previous_headings":"","what":"oolong 0.3.11","title":"oolong 0.3.11","text":"CRAN release: 2020-11-13 Support BTM [thanks Marius Sältzer] Update Shiny UI (jump button) Various bug fixes","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-034","dir":"Changelog","previous_headings":"","what":"oolong 0.3.4","title":"oolong 0.3.4","text":"CRAN release: 2020-03-21 Initial CRAN version.","code":""}] +[{"path":[]},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement [INSERT CONTACT METHOD]. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://gesistsa.github.io/oolong/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.0, available https://www.contributor-covenant.org/version/2/0/ code_of_conduct.html. Community Impact Guidelines inspired Mozilla’s code conduct enforcement ladder. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https:// www.contributor-covenant.org/translations.","code":""},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"gnu-lesser-general-public-license","dir":"","previous_headings":"","what":"GNU LESSER GENERAL PUBLIC LICENSE","title":"NA","text":"Version 2.1, February 1999","code":"Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.]"},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"preamble","dir":"","previous_headings":"","what":"Preamble","title":"NA","text":"licenses software designed take away freedom share change . contrast, GNU General Public Licenses intended guarantee freedom share change free software–make sure software free users. license, Lesser General Public License, applies specially designated software packages–typically libraries–Free Software Foundation authors decide use . can use , suggest first think carefully whether license ordinary General Public License better strategy use particular case, based explanations . speak free software, referring freedom use, price. General Public Licenses designed make sure freedom distribute copies free software (charge service wish); receive source code can get want ; can change software use pieces new free programs; informed can things. protect rights, need make restrictions forbid distributors deny rights ask surrender rights. restrictions translate certain responsibilities distribute copies library modify . example, distribute copies library, whether gratis fee, must give recipients rights gave . must make sure , , receive can get source code. link code library, must provide complete object files recipients, can relink library making changes library recompiling . must show terms know rights. protect rights two-step method: (1) copyright library, (2) offer license, gives legal permission copy, distribute /modify library. protect distributor, want make clear warranty free library. Also, library modified someone else passed , recipients know original version, original author’s reputation affected problems might introduced others. Finally, software patents pose constant threat existence free program. wish make sure company effectively restrict users free program obtaining restrictive license patent holder. Therefore, insist patent license obtained version library must consistent full freedom use specified license. GNU software, including libraries, covered ordinary GNU General Public License. license, GNU Lesser General Public License, applies certain designated libraries, quite different ordinary General Public License. use license certain libraries order permit linking libraries non-free programs. program linked library, whether statically using shared library, combination two legally speaking combined work, derivative original library. ordinary General Public License therefore permits linking entire combination fits criteria freedom. Lesser General Public License permits lax criteria linking code library. call license “Lesser” General Public License Less protect user’s freedom ordinary General Public License. also provides free software developers Less advantage competing non-free programs. disadvantages reason use ordinary General Public License many libraries. However, Lesser license provides advantages certain special circumstances. example, rare occasions, may special need encourage widest possible use certain library, becomes de-facto standard. achieve , non-free programs must allowed use library. frequent case free library job widely used non-free libraries. case, little gain limiting free library free software , use Lesser General Public License. cases, permission use particular library non-free programs enables greater number people use large body free software. example, permission use GNU C Library non-free programs enables many people use whole GNU operating system, well variant, GNU/Linux operating system. Although Lesser General Public License Less protective users’ freedom, ensure user program linked Library freedom wherewithal run program using modified version Library. precise terms conditions copying, distribution modification follow. Pay close attention difference “work based library” “work uses library”. former contains code derived library, whereas latter must combined library order run.","code":""},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"terms-and-conditions-for-copying-distribution-and-modification","dir":"","previous_headings":"","what":"TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION","title":"NA","text":"0. License Agreement applies software library program contains notice placed copyright holder authorized party saying may distributed terms Lesser General Public License (also called “License”). licensee addressed “”. “library” means collection software functions /data prepared conveniently linked application programs (use functions data) form executables. “Library”, , refers software library work distributed terms. “work based Library” means either Library derivative work copyright law: say, work containing Library portion , either verbatim modifications /translated straightforwardly another language. (Hereinafter, translation included without limitation term “modification”.) “Source code” work means preferred form work making modifications . library, complete source code means source code modules contains, plus associated interface definition files, plus scripts used control compilation installation library. Activities copying, distribution modification covered License; outside scope. act running program using Library restricted, output program covered contents constitute work based Library (independent use Library tool writing ). Whether true depends Library program uses Library . 1. may copy distribute verbatim copies Library’s complete source code receive , medium, provided conspicuously appropriately publish copy appropriate copyright notice disclaimer warranty; keep intact notices refer License absence warranty; distribute copy License along Library. may charge fee physical act transferring copy, may option offer warranty protection exchange fee. 2. may modify copy copies Library portion , thus forming work based Library, copy distribute modifications work terms Section 1 , provided also meet conditions: ) modified work must software library. b) must cause files modified carry prominent notices stating changed files date change. c) must cause whole work licensed charge third parties terms License. d) facility modified Library refers function table data supplied application program uses facility, argument passed facility invoked, must make good faith effort ensure , event application supply function table, facility still operates, performs whatever part purpose remains meaningful. (example, function library compute square roots purpose entirely well-defined independent application. Therefore, Subsection 2d requires application-supplied function table used function must optional: application supply , square root function must still compute square roots.) requirements apply modified work whole. identifiable sections work derived Library, can reasonably considered independent separate works , License, terms, apply sections distribute separate works. distribute sections part whole work based Library, distribution whole must terms License, whose permissions licensees extend entire whole, thus every part regardless wrote . Thus, intent section claim rights contest rights work written entirely ; rather, intent exercise right control distribution derivative collective works based Library. addition, mere aggregation another work based Library Library (work based Library) volume storage distribution medium bring work scope License. 3. may opt apply terms ordinary GNU General Public License instead License given copy Library. , must alter notices refer License, refer ordinary GNU General Public License, version 2, instead License. (newer version version 2 ordinary GNU General Public License appeared, can specify version instead wish.) make change notices. change made given copy, irreversible copy, ordinary GNU General Public License applies subsequent copies derivative works made copy. option useful wish copy part code Library program library. 4. may copy distribute Library (portion derivative , Section 2) object code executable form terms Sections 1 2 provided accompany complete corresponding machine-readable source code, must distributed terms Sections 1 2 medium customarily used software interchange. distribution object code made offering access copy designated place, offering equivalent access copy source code place satisfies requirement distribute source code, even though third parties compelled copy source along object code. 5. program contains derivative portion Library, designed work Library compiled linked , called “work uses Library”. work, isolation, derivative work Library, therefore falls outside scope License. However, linking “work uses Library” Library creates executable derivative Library (contains portions Library), rather “work uses library”. executable therefore covered License. Section 6 states terms distribution executables. “work uses Library” uses material header file part Library, object code work may derivative work Library even though source code . Whether true especially significant work can linked without Library, work library. threshold true precisely defined law. object file uses numerical parameters, data structure layouts accessors, small macros small inline functions (ten lines less length), use object file unrestricted, regardless whether legally derivative work. (Executables containing object code plus portions Library still fall Section 6.) Otherwise, work derivative Library, may distribute object code work terms Section 6. executables containing work also fall Section 6, whether linked directly Library . 6. exception Sections , may also combine link “work uses Library” Library produce work containing portions Library, distribute work terms choice, provided terms permit modification work customer’s use reverse engineering debugging modifications. must give prominent notice copy work Library used Library use covered License. must supply copy License. work execution displays copyright notices, must include copyright notice Library among , well reference directing user copy License. Also, must one things: ) Accompany work complete corresponding machine-readable source code Library including whatever changes used work (must distributed Sections 1 2 ); , work executable linked Library, complete machine-readable “work uses Library”, object code /source code, user can modify Library relink produce modified executable containing modified Library. (understood user changes contents definitions files Library necessarily able recompile application use modified definitions.) b) Use suitable shared library mechanism linking Library. suitable mechanism one (1) uses run time copy library already present user’s computer system, rather copying library functions executable, (2) operate properly modified version library, user installs one, long modified version interface-compatible version work made . c) Accompany work written offer, valid least three years, give user materials specified Subsection 6a, , charge cost performing distribution. d) distribution work made offering access copy designated place, offer equivalent access copy specified materials place. e) Verify user already received copy materials already sent user copy. executable, required form “work uses Library” must include data utility programs needed reproducing executable . However, special exception, materials distributed need include anything normally distributed (either source binary form) major components (compiler, kernel, ) operating system executable runs, unless component accompanies executable. may happen requirement contradicts license restrictions proprietary libraries normally accompany operating system. contradiction means use Library together executable distribute. 7. may place library facilities work based Library side--side single library together library facilities covered License, distribute combined library, provided separate distribution work based Library library facilities otherwise permitted, provided two things: ) Accompany combined library copy work based Library, uncombined library facilities. must distributed terms Sections . b) Give prominent notice combined library fact part work based Library, explaining find accompanying uncombined form work. 8. may copy, modify, sublicense, link , distribute Library except expressly provided License. attempt otherwise copy, modify, sublicense, link , distribute Library void, automatically terminate rights License. However, parties received copies, rights, License licenses terminated long parties remain full compliance. 9. required accept License, since signed . However, nothing else grants permission modify distribute Library derivative works. actions prohibited law accept License. Therefore, modifying distributing Library (work based Library), indicate acceptance License , terms conditions copying, distributing modifying Library works based . 10. time redistribute Library (work based Library), recipient automatically receives license original licensor copy, distribute, link modify Library subject terms conditions. may impose restrictions recipients’ exercise rights granted herein. responsible enforcing compliance third parties License. 11. , consequence court judgment allegation patent infringement reason (limited patent issues), conditions imposed (whether court order, agreement otherwise) contradict conditions License, excuse conditions License. distribute satisfy simultaneously obligations License pertinent obligations, consequence may distribute Library . example, patent license permit royalty-free redistribution Library receive copies directly indirectly , way satisfy License refrain entirely distribution Library. portion section held invalid unenforceable particular circumstance, balance section intended apply, section whole intended apply circumstances. purpose section induce infringe patents property right claims contest validity claims; section sole purpose protecting integrity free software distribution system implemented public license practices. Many people made generous contributions wide range software distributed system reliance consistent application system; author/donor decide willing distribute software system licensee impose choice. section intended make thoroughly clear believed consequence rest License. 12. distribution /use Library restricted certain countries either patents copyrighted interfaces, original copyright holder places Library License may add explicit geographical distribution limitation excluding countries, distribution permitted among countries thus excluded. case, License incorporates limitation written body License. 13. Free Software Foundation may publish revised /new versions Lesser General Public License time time. new versions similar spirit present version, may differ detail address new problems concerns. version given distinguishing version number. Library specifies version number License applies “later version”, option following terms conditions either version later version published Free Software Foundation. Library specify license version number, may choose version ever published Free Software Foundation. 14. wish incorporate parts Library free programs whose distribution conditions incompatible , write author ask permission. software copyrighted Free Software Foundation, write Free Software Foundation; sometimes make exceptions . decision guided two goals preserving free status derivatives free software promoting sharing reuse software generally. WARRANTY 15. LIBRARY LICENSED FREE CHARGE, WARRANTY LIBRARY, EXTENT PERMITTED APPLICABLE LAW. EXCEPT OTHERWISE STATED WRITING COPYRIGHT HOLDERS /PARTIES PROVIDE LIBRARY “” WITHOUT WARRANTY KIND, EITHER EXPRESSED IMPLIED, INCLUDING, LIMITED , IMPLIED WARRANTIES MERCHANTABILITY FITNESS PARTICULAR PURPOSE. ENTIRE RISK QUALITY PERFORMANCE LIBRARY . LIBRARY PROVE DEFECTIVE, ASSUME COST NECESSARY SERVICING, REPAIR CORRECTION. 16. EVENT UNLESS REQUIRED APPLICABLE LAW AGREED WRITING COPYRIGHT HOLDER, PARTY MAY MODIFY /REDISTRIBUTE LIBRARY PERMITTED , LIABLE DAMAGES, INCLUDING GENERAL, SPECIAL, INCIDENTAL CONSEQUENTIAL DAMAGES ARISING USE INABILITY USE LIBRARY (INCLUDING LIMITED LOSS DATA DATA RENDERED INACCURATE LOSSES SUSTAINED THIRD PARTIES FAILURE LIBRARY OPERATE SOFTWARE), EVEN HOLDER PARTY ADVISED POSSIBILITY DAMAGES.","code":""},{"path":[]},{"path":"https://gesistsa.github.io/oolong/LICENSE.html","id":"how-to-apply-these-terms-to-your-new-libraries","dir":"","previous_headings":"","what":"How to Apply These Terms to Your New Libraries","title":"NA","text":"develop new library, want greatest possible use public, recommend making free software everyone can redistribute change. can permitting redistribution terms (, alternatively, terms ordinary General Public License). apply terms, attach following notices library. safest attach start source file effectively convey exclusion warranty; file least “copyright” line pointer full notice found. Also add information contact electronic paper mail. also get employer (work programmer) school, , sign “copyright disclaimer” library, necessary. sample; alter names: ’s !","code":"one line to give the library's name and an idea of what it does. Copyright (C) year name of author This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. signature of Ty Coon, 1 April 1990 Ty Coon, President of Vice"},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"requirement-1-keep-your-quanteda-corpus","dir":"Articles","previous_headings":"","what":"Requirement #1: Keep your quanteda corpus","title":"BTM","text":"every document unique document id. can regular text cleaning, stemming procedure quanteda. Instead making product DFM object, make token object. may read issue Benoit et al.","code":"require(BTM) #> Loading required package: BTM require(quanteda) #> Loading required package: quanteda #> Package version: 4.1.0 #> Unicode version: 14.0 #> ICU version: 70.1 #> Parallel computing: disabled #> See https://quanteda.io for tutorials and examples. require(oolong) #> Loading required package: oolong trump_corpus <- corpus(trump2k) tokens(trump_corpus, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, split_hyphens = TRUE, remove_url = TRUE) %>% tokens_tolower() %>% tokens_remove(stopwords(\"en\")) %>% tokens_remove(\"@*\") -> trump_toks"},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"requirement-2-keep-your-data-frame","dir":"Articles","previous_headings":"","what":"Requirement #2: Keep your data frame","title":"BTM","text":"Use function convert token object data frame. Train BTM model","code":"as.data.frame.tokens <- function(x) { data.frame( doc_id = rep(names(x), lengths(x)), tokens = unlist(x, use.names = FALSE) ) } trump_dat <- as.data.frame.tokens(trump_toks) trump_btm <- BTM(trump_dat, k = 8, iter = 500, trace = 10)"},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"pecularities-of-btm","dir":"Articles","previous_headings":"Requirement #2: Keep your data frame","what":"Pecularities of BTM","title":"BTM","text":"generate θt\\theta_{t} . However, many NaN 1994 rows (trump2k 2000 tweets) due empty documents. Also, row order messed .","code":"theta <- predict(trump_btm, newdata = trump_dat) dim(theta) #> [1] 1994 8 setdiff(docid(trump_corpus), row.names(theta)) #> [1] \"text604\" \"text633\" \"text659\" \"text1586\" \"text1587\" \"text1761\" trump_corpus[604] #> Corpus consisting of 1 document. #> text604 : #> \"http://t.co/PtViAyrO4A\" head(row.names(theta), 100) #> [1] \"text1\" \"text10\" \"text100\" \"text1000\" \"text1001\" \"text1002\" #> [7] \"text1003\" \"text1004\" \"text1005\" \"text1006\" \"text1007\" \"text1008\" #> [13] \"text1009\" \"text101\" \"text1010\" \"text1011\" \"text1012\" \"text1013\" #> [19] \"text1014\" \"text1015\" \"text1016\" \"text1017\" \"text1018\" \"text1019\" #> [25] \"text102\" \"text1020\" \"text1021\" \"text1022\" \"text1023\" \"text1024\" #> [31] \"text1025\" \"text1026\" \"text1027\" \"text1028\" \"text1029\" \"text103\" #> [37] \"text1030\" \"text1031\" \"text1032\" \"text1033\" \"text1034\" \"text1035\" #> [43] \"text1036\" \"text1037\" \"text1038\" \"text1039\" \"text104\" \"text1040\" #> [49] \"text1041\" \"text1042\" \"text1043\" \"text1044\" \"text1045\" \"text1046\" #> [55] \"text1047\" \"text1048\" \"text1049\" \"text105\" \"text1050\" \"text1051\" #> [61] \"text1052\" \"text1053\" \"text1054\" \"text1055\" \"text1056\" \"text1057\" #> [67] \"text1058\" \"text1059\" \"text106\" \"text1060\" \"text1061\" \"text1062\" #> [73] \"text1063\" \"text1064\" \"text1065\" \"text1066\" \"text1067\" \"text1068\" #> [79] \"text1069\" \"text107\" \"text1070\" \"text1071\" \"text1072\" \"text1073\" #> [85] \"text1074\" \"text1075\" \"text1076\" \"text1077\" \"text1078\" \"text1079\" #> [91] \"text108\" \"text1080\" \"text1081\" \"text1082\" \"text1083\" \"text1084\" #> [97] \"text1085\" \"text1086\" \"text1087\" \"text1088\""},{"path":"https://gesistsa.github.io/oolong/articles/btm.html","id":"oolongs-support-for-btm","dir":"Articles","previous_headings":"","what":"Oolong’s support for BTM","title":"BTM","text":"Oolong problem generating word intrusion test BTM like topic models. generating topic intrusion tests, however, must provide data frame used training (case trump_dat). input_corpus must quanteda corpus . btm_dataframe must NULL. input_corpus must quanteda corpus.","code":"oolong <- create_oolong(trump_btm) #> Error in get(paste0(generic, \".\", class), envir = get_method_env()) : #> object 'type_sum.accel' not found oolong #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$lock()>: finalize and see the results oolong <- create_oolong(trump_btm, trump_corpus, btm_dataframe = trump_dat) oolong #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✔ TI ✖ WSI #> ℹ WI: k = 8, 0 coded. #> ℹ TI: n = 20, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$do_topic_intrusion_test()>: do topic intrusion test #> • <$lock()>: finalize and see the results oolong <- create_oolong(trump_btm, trump_corpus) #> Error: You need to provide input_corpus (in quanteda format) and btm_dataframe for generating topic intrusion tests. oolong <- create_oolong(trump_btm, trump2k, btm_dataframe = trump_dat) #> Error: You need to provide input_corpus (in quanteda format) and btm_dataframe for generating topic intrusion tests."},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"create-an-oolong-test","dir":"Articles","previous_headings":"","what":"Create an oolong test","title":"Deploy","text":"Please note one deploy oolong test objects word topic intrusion tests, .e. created using witi() online. need tests, need deploy two separate instances: one created using wi() another created using ti(). guide, assume want deploy word set intrusion test online.","code":"library(oolong) wsi_test <- wsi(abstracts_seededlda) #> Error in get(paste0(generic, \".\", class), envir = get_method_env()) : #> object 'type_sum.accel' not found wsi_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 10, 0 coded. #> #> ── Methods ── #> #> • <$do_word_set_intrusion_test()>: do word set intrusion test #> • <$lock()>: finalize and see the results"},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"deploy-the-test-online","dir":"Articles","previous_headings":"","what":"Deploy the test online","title":"Deploy","text":"First, need export oolong test object stand alone Shiny app. stand alone Shiny app directory. directory two files structure called “Single-file Shiny app.” Experienced Shiny users might preferred method deploying app whatever Shiny server can master. less experienced users, simplest way deploy app online use shinyapps.io (free tier available 25 hours computational time per month). Please register account shinyapps.io configure rsconnect. Please refer guide information. Please remember configure tokens. RStudio users, simplest way deploy app shinyapps.io first launch app. click Publish button right corner launched window. asked title app, just give name, e.g. wsi_test. probably can keep default settings push Publish button initialize deployment process. hiccup, get URL deployed oolong test. Something like: https://yourname.shinyapps.io/wsi_test/","code":"export_oolong(wsi_test, dir = \"./wsi_test\", use_full_path = FALSE) #> ℹ The Shiny has been written to the directory: ./wsi_test #> ℹ You can test the app with: shiny::runApp(\"./wsi_test\") fs::dir_tree(\"./wsi_test\") #> ./wsi_test #> ├── app.R #> └── oolong.RDS ## replace , , with the information from your profile on Shinyapps.io: click Your name -> Tokens rsconnect::setAccountInfo(name=\"\", token=\"\", secret=\"\") library(shiny) runApp(\"./wsi_test\")"},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"conduct-the-test","dir":"Articles","previous_headings":"","what":"Conduct the test","title":"Deploy","text":"can give URL coders conduct test browser online. difference deployed version , userid prompt download button coding. instruct coders download data file coding return . 2","code":""},{"path":"https://gesistsa.github.io/oolong/articles/deploy.html","id":"revert","dir":"Articles","previous_headings":"","what":"Revert","title":"Deploy","text":"can obtain locked oolong object original oolong downloaded data file. revert_oolong verifications original oolong object make sure error cheating.","code":"revert_oolong(wsi_test, \"oolong_2021-05-22 20 51 26 Hadley Wickham.RDS\") #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Hadley Wickham #> ℹ WSI: n = 10, 10 coded. #> #> ── Results: ── #> #> ℹ 80% precision (WSI)"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"installation","dir":"Articles","previous_headings":"","what":"Installation","title":"Overview","text":"package constantly changing, suggest using development version GitHub: can also install “stable” (slightly older) version CRAN:","code":"# install.packages(\"devtools\") devtools::install_github(\"chainsawriot/oolong\") install.packages(\"oolong\")"},{"path":[]},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"word-intrusion-test","dir":"Articles","previous_headings":"Validating Topic Models","what":"Word intrusion test","title":"Overview","text":"abstracts_seededlda example topic model trained data abstracts using seededlda package. Currently, package supports structural topic models / correlated topic models stm, Warp LDA models text2vec , LDA/CTM models topicmodels, Biterm Topic Models BTM, Keyword Assisted Topic Models keyATM, seeded LDA models seededlda. Although strictly topic model, Naive Bayes models quanteda.textmodels also supported. See section Naive Bayes information. create oolong test word intrusion test, use function wi. recommended provide user id coder going test. instructed, use method $do_word_intrusion_test() start coding. can pause test clicking “Exit” button. progress recorded object. want save progress, just save object (e.g. saveRDS(oolong_test, \"oolong_test.RDS\")). resume test, launch test . coding (items coded), need press “Exit” button quit coding interface lock test. , can look model precision printing oolong test.","code":"library(oolong) library(seededlda) #> Loading required package: quanteda #> Package version: 4.1.0 #> Unicode version: 14.0 #> ICU version: 70.1 #> Parallel computing: disabled #> See https://quanteda.io for tutorials and examples. #> Loading required package: proxyC #> #> Attaching package: 'proxyC' #> The following object is masked from 'package:stats': #> #> dist #> #> Attaching package: 'seededlda' #> The following object is masked from 'package:quanteda': #> #> info_tbb #> The following object is masked from 'package:stats': #> #> terms library(quanteda) library(dplyr) #> Error in get(paste0(generic, \".\", class), envir = get_method_env()) : #> object 'type_sum.accel' not found #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union abstracts_seededlda #> #> Call: #> lda(x = x, k = k, label = label, max_iter = max_iter, alpha = alpha, #> beta = beta, seeds = seeds, words = NULL, verbose = verbose) #> #> 10 topics; 2,500 documents; 3,908 features. oolong_test <- wi(abstracts_seededlda, userid = \"Hadley\") oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$lock()>: finalize and see the results oolong_test$do_word_intrusion_test() oolong_test$lock() oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ☺ Hadley #> ℹ WI: k = 10, 10 coded. #> #> ── Results: ── #> #> ℹ 90% precision"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"word-set-intrusion-test","dir":"Articles","previous_headings":"Validating Topic Models","what":"Word set intrusion test","title":"Overview","text":"Word set intrusion test variant word intrusion test (Ying et al., 2021), multiple word sets generated top terms one topic juxtaposed one intruder word set generated similarly another topic. Ying et al., test called “R4WSI” 4 word sets displayed. default, oolong generates also R4WSI. However, also possible generate R(N)WSI setting parameter n_correct_ws N - 1. Use method $do_word_set_intrusion_test() start coding.","code":"oolong_test <- wsi(abstracts_seededlda, userid = \"Garrett\") oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 0 coded. #> #> ── Methods ── #> #> • <$do_word_set_intrusion_test()>: do word set intrusion test #> • <$lock()>: finalize and see the results oolong_test$do_word_set_intrusion_test() oolong_test$lock() oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ☺ Garrett #> ℹ WSI: n = 10, 10 coded. #> #> ── Results: ── #> #> ℹ 90% precision (WSI)"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"topic-intrusion-test","dir":"Articles","previous_headings":"Validating Topic Models","what":"Topic intrusion test","title":"Overview","text":"example, abstracts_seededlda generated corpus abstracts$text Creating oolong test object corpus used training topic model generate topic intrusion test cases. Similarly, use $do_topic_intrusion_test code test cases, lock test $lock() can look TLO (topic log odds) value printing oolong test.","code":"library(tibble) abstracts #> # A tibble: 2,500 × 1 #> text #> #> 1 This study explores the benefits and risks featured in medical tourism broke… #> 2 This article puts forth the argument that with the transfer of stock trading… #> 3 The purpose of this study was to evaluate the effect the visual fidelity of … #> 4 Among the many health issues relevant to college students, overconsumption o… #> 5 This address, delivered at ICA's 50th anniversary conference, calls on the a… #> 6 The Internet has often been used to reach men who have sex with men (MSMs) i… #> 7 This article argues that the literature describing the internet revolution i… #> 8 This research study examined Bud Goodall's online health narrative as a case… #> 9 Information technology and new media allow for collecting and sharing person… #> 10 Using a national, telephone survey of 1,762 adolescents aged 12-17 years, th… #> # ℹ 2,490 more rows oolong_test <- ti(abstracts_seededlda, abstracts$text, userid = \"Julia\") oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 0 coded. #> #> ── Methods ── #> #> • <$do_topic_intrusion_test()>: do topic intrusion test #> • <$lock()>: finalize and see the results oolong_test$do_topic_intrusion_test() oolong_test$lock() oolong_test #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✔ TI ✖ WSI #> ☺ Julia #> ℹ TI: n = 25, 25 coded. #> #> ── Results: ── #> #> ℹ TLO: -0.187"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"suggested-workflow","dir":"Articles","previous_headings":"Validating Topic Models","what":"Suggested workflow","title":"Overview","text":"test makes sense one coder involved. suggested workflow create test, clone oolong object. Ask multiple coders test(s) summarize results. Preprocess create document-feature matrix Train topic model. Create new oolong object. Clone oolong object used raters. Ask different coders code object lock object. Get summary two objects.","code":"tokens(abstracts$text, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, remove_url = TRUE, spilit_hyphens = TRUE) %>% tokens_wordstem %>% tokens_remove(stopwords(\"en\")) %>% dfm(tolower = TRUE) %>% dfm_trim(min_docfreq = 3, max_docfreq = 500) %>% dfm_select(min_nchar = 3, pattern = \"^[a-zA-Z]+$\", valuetype = \"regex\") -> abstracts_dfm require(seededlda) abstracts_seededlda <- textmodel_seededlda(x = abstracts_dfm, dictionary = dictionary(abstracts_dictionary), seeds = 46709394, verbose = TRUE) oolong_test_rater1 <- witi(abstracts_seededlda, abstracts$text, userid = \"Yihui\") oolong_test_rater2 <- clone_oolong(oolong_test_rater1, userid = \"Jenny\") ## Let Yihui do the test. oolong_test_rater1$do_word_intrusion_test() oolong_test_rater1$do_topic_intrusion_test() oolong_test_rater1$lock() ## Let Jenny do the test. oolong_test_rater2$do_word_intrusion_test() oolong_test_rater2$do_topic_intrusion_test() oolong_test_rater2$lock() summarize_oolong(oolong_test_rater1, oolong_test_rater2) #> #> ── Summary (topic model): ────────────────────────────────────────────────────── #> #> ── Word intrusion test ── #> #> ℹ Mean model precision: 0.25 #> ℹ Quantiles of model precision: 0.2, 0.225, 0.25, 0.275, 0.3 #> ℹ P-value of the model precision #> (H0: Model precision is not better than random guess): 0.3656 #> ℹ Krippendorff's alpha: 0.747 #> ℹ K Precision: #> 0, 0.5, 1, 0, 0, 0, 0, 0, 0, 1 #> #> ── Topic intrusion test ── #> #> ℹ Mean TLO: -1.5 #> ℹ Median TLO: -1.12 #> ℹ Quantiles of TLO: -4.71, -2.83, -1.12, 0, 0 #> ℹ P-Value of the median TLO #> (H0: Median TLO is not better than random guess): 0.114"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"about-the-p-values","dir":"Articles","previous_headings":"Validating Topic Models","what":"About the p-values","title":"Overview","text":"test model precision (MP) based one-tailed, one-sample binomial test rater. multiple-rater situation, p-values raters combined using Fisher’s method (.k.. Fisher’s omnibus test). H0: MP better 1/ (n_top_terms + 1) H1: MP better 1/ (n_top_terms + 1) test median TLO based permutation test. H0: Median TLO better random guess. H1: Median TLO better random guess. One must notice two statistical tests testing bear minimum. significant test indicates topic model can make rater(s) perform better random guess. indication good topic interpretability. Also, one use conservative significant level, e.g. α<0.001\\alpha < 0.001.","code":""},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"about-biterm-topic-model","dir":"Articles","previous_headings":"","what":"About Biterm Topic Model","title":"Overview","text":"Please refer vignette BTM.","code":""},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"about-naive-bayes","dir":"Articles","previous_headings":"","what":"About Naive Bayes","title":"Overview","text":"Naive Bayes model supervised machine learning model. package supports Naive Bayes models trained using quanteda.textmodels. Suppose newsgroup_nb Naive Bayes model trained subset classic [20 newsgroups] dataset. can still generate word intrusion word set intrusion tests.","code":"tokens(newsgroup5$text, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, remove_url = TRUE, spilit_hyphens = TRUE) %>% tokens_wordstem %>% tokens_remove(stopwords(\"en\")) %>% dfm(tolower = TRUE) %>% dfm_trim(min_termfreq = 3, max_docfreq = 0.06, docfreq_type = \"prop\") -> newsgroup_dfm docvars(newsgroup_dfm, \"group\") <- newsgroup5$title newsgroup_nb <- textmodel_nb(newsgroup_dfm, docvars(newsgroup_dfm, \"group\"), distribution = \"Bernoulli\") wi(newsgroup_nb) #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✔ WI ✖ TI ✖ WSI #> ℹ WI: k = 20, 0 coded. #> #> ── Methods ── #> #> • <$do_word_intrusion_test()>: do word intrusion test #> • <$lock()>: finalize and see the results wsi(newsgroup_nb) #> #> ── oolong (topic model) ──────────────────────────────────────────────────────── #> ✖ WI ✖ TI ✔ WSI #> ℹ WSI: n = 20, 0 coded. #> #> ── Methods ── #> #> • <$do_word_set_intrusion_test()>: do word set intrusion test #> • <$lock()>: finalize and see the results"},{"path":[]},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"creating-gold-standard","dir":"Articles","previous_headings":"Validating Dictionary-based Methods","what":"Creating gold standard","title":"Overview","text":"trump2k dataset 2,000 tweets @realdonaldtrump. example, interested studying sentiment tweets. One can use tools AFINN automatically extract sentiment tweets. However, oolong recommends generate gold standard human coding first using subset. default, oolong selects 1% origin corpus test cases. parameter construct adjective, e.g. positive, liberal, populistic, etc. instructed, use method $do_gold_standard_test() start coding. coding, need first lock test $turn_gold() method available.","code":"tibble(text = trump2k) #> # A tibble: 2,000 × 1 #> text #> #> 1 \"In just out book, Secret Service Agent Gary Byrne doesn't believe that Croo… #> 2 \"Hillary Clinton has announced that she is letting her husband out to campai… #> 3 \"\\\"@TheBrodyFile: Always great to visit with @TheBrodyFile one-on-one with \\… #> 4 \"Explain to @brithume and @megynkelly, who know nothing, that I will beat Hi… #> 5 \"Nobody beats me on National Security. https://t.co/sCrj4Ha1I5\" #> 6 \"\\\"@realbill2016: @realDonaldTrump @Brainykid2010 @shl Trump leading LA Time… #> 7 \"\\\"@teapartynews: Trump Wins Tea Party Group's 'Nashville Straw Poll' - News… #> 8 \"Big Republican Dinner tonight at Mar-a-Lago in Palm Beach. I will be there!\" #> 9 \".@HillaryClinton loves to lie. America has had enough of the CLINTON'S! It … #> 10 \"\\\"@brianstoya: @realDonaldTrump For POTUS #2016\\\"\" #> # ℹ 1,990 more rows oolong_test <- gs(input_corpus = trump2k, construct = \"positive\", userid = \"Joe\") oolong_test #> #> ── oolong (gold standard generation) ─────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 0 coded. #> ℹ Construct: positive. #> #> ── Methods ── #> #> • <$do_gold_standard_test()>: generate gold standard #> • <$lock()>: finalize this object and see the results oolong_test$do_gold_standard_test() oolong_test$lock() oolong_test #> #> ── oolong (gold standard generation) ─────────────────────────────────────────── #> ☺ Joe #> ℹ GS: n = 20, 20 coded. #> ℹ Construct: positive. #> #> ── Methods ── #> #> • <$turn_gold()>: convert the test results into a quanteda corpus"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"example-validating-afinn-using-the-gold-standard","dir":"Articles","previous_headings":"Validating Dictionary-based Methods","what":"Example: Validating AFINN using the gold standard","title":"Overview","text":"locked oolong test can converted quanteda-compatible corpus analysis. corpus contains two docvars, ‘answer’. example, calculate AFINN score tweet using quanteda. dictionary afinn bundle package. Put back vector AFINN score respective docvars study correlation gold standard AFINN.","code":"oolong_test$turn_gold() #> Corpus consisting of 20 documents and 1 docvar. #> text1 : #> \"Thank you Eau Claire, Wisconsin. #VoteTrump on Tuesday, Apr...\" #> #> text2 : #> \"\"@bobby990r_1: @realDonaldTrump would lead polls the second ...\" #> #> text3 : #> \"\"@KdanielsK: @misstcassidy @AllAboutTheTea_ @realDonaldTrump...\" #> #> text4 : #> \"Thank you for a great afternoon Birmingham, Alabama! #Trump2...\" #> #> text5 : #> \"\"@THETAINTEDT: @foxandfriends @realDonaldTrump Trump 2016 ht...\" #> #> text6 : #> \"People believe CNN these days almost as little as they belie...\" #> #> [ reached max_ndoc ... 14 more documents ] #> ℹ Access the answer from the coding with quanteda::docvars(obj, 'answer') gold_standard <- oolong_test$turn_gold() gold_standard %>% tokens(remove_punct = TRUE) %>% dfm() %>% dfm_lookup(afinn) %>% quanteda::convert(to = \"data.frame\") %>% mutate(matching_word_valence = (neg5 * -5) + (neg4 * -4) + (neg3 * -3) + (neg2 * -2) + (neg1 * -1) + (zero * 0) + (pos1 * 1) + (pos2 * 2) + (pos3 * 3) + (pos4 * 4) + (pos5 * 5), base = ntoken(gold_standard, remove_punct = TRUE), afinn_score = matching_word_valence / base) %>% pull(afinn_score) -> all_afinn_score all_afinn_score #> text1 text2 text3 text4 text5 text6 #> 0.33333333 -0.09090909 -0.16666667 0.45454545 0.00000000 0.00000000 #> text7 text8 text9 text10 text11 text12 #> 0.16666667 0.38461538 0.00000000 0.38461538 -0.29166667 0.00000000 #> text13 text14 text15 text16 text17 text18 #> 0.50000000 0.07142857 0.00000000 -0.12000000 0.28571429 0.16000000 #> text19 text20 #> 0.36842105 0.38888889 summarize_oolong(oolong_test, target_value = all_afinn_score) #> New names: #> `geom_smooth()` using formula = 'y ~ x' #> `geom_smooth()` using formula = 'y ~ x' #> #> ── Summary (gold standard generation): #> ───────────────────────────────────────── #> ℹ Correlation: 0.718 (p = 4e-04) #> ℹ Effect of content length: -0.323 (p = 0.1643) #> • `` -> `...1`"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"suggested-workflow-1","dir":"Articles","previous_headings":"Validating Dictionary-based Methods","what":"Suggested workflow","title":"Overview","text":"Create oolong object, clone another coder. According Song et al. (2020), least draw 1% data. Instruct two coders code tweets lock objects. Calculate target value (case, AFINN score) turning one object corpus. Summarize oolong objects target value. Read results. diagnostic plot consists 4 subplots. good idea read Bland & Altman (1986) difference correlation agreement. Subplot (top left): Raw correlation human judgement target value. One want good correlation two. Subplot (top right): Bland-Altman plot. One want correlation. Also, dots randomly scattering around mean value. , two measurements (human judgement target value) good agreement. Subplot (bottom left): Raw correlation target value content length. One want correlation, indication good reliability influence content length. (See Chan et al. 2020) Subplot (bottom right): Cook’s distance data point. One want dot (least dots) threshold. indication raw correlation human judgement target value can influenced extreme values data. textual output contains Krippendorff’s alpha codings raters. order claim validity target value, must first establish reliability gold standard. Song et al. (2020) suggest Krippendorff’s Alpha > 0.7 acceptable cut-.","code":"trump <- gs(input_corpus = trump2k, exact_n = 40, userid = \"JJ\") trump2 <- clone_oolong(trump, userid = \"Winston\") trump$do_gold_standard_test() trump2$do_gold_standard_test() trump$lock() trump2$lock() gold_standard <- trump$turn_gold() gold_standard %>% tokens(remove_punct = TRUE) %>% dfm() %>% dfm_lookup(afinn) %>% quanteda::convert(to = \"data.frame\") %>% mutate(matching_word_valence = (neg5 * -5) + (neg4 * -4) + (neg3 * -3) + (neg2 * -2) + (neg1 * -1) + (zero * 0) + (pos1 * 1) + (pos2 * 2) + (pos3 * 3) + (pos4 * 4) + (pos5 * 5), base = ntoken(gold_standard, remove_punct = TRUE), afinn_score = matching_word_valence / base) %>% pull(afinn_score) -> target_value res <- summarize_oolong(trump, trump2, target_value = target_value) #> New names: #> `geom_smooth()` using formula = 'y ~ x' #> `geom_smooth()` using formula = 'y ~ x' #> • `` -> `...1` #> • `` -> `...2` res #> #> ── Summary (gold standard generation): ───────────────────────────────────────── #> ℹ Krippendorff's Alpha: 0.931 #> ℹ Correlation: 0.744 (p = 2e-04) #> ℹ Effect of content length: -0.323 (p = 0.1643) plot(res)"},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"backward-compatibility","dir":"Articles","previous_headings":"","what":"Backward compatibility","title":"Overview","text":"Historically, oolong test objects generated one function: create_oolong. longer case longer recommended anymore. still retained backward compatibility purposes. still need use create_oolong(), important parameters input_model input_corpus. Setting NULL generates different tests.","code":""},{"path":"https://gesistsa.github.io/oolong/articles/overview.html","id":"references","dir":"Articles","previous_headings":"","what":"References","title":"Overview","text":"Chang, J., Gerrish, S., Wang, C., Boyd-Graber, J. L., & Blei, D. M. (2009). Reading tea leaves: humans interpret topic models. Advances neural information processing systems (pp. 288-296). link Ying, L., Montgomery, J. M., & Stewart, B. M. (2021). Inferring concepts topics: Towards procedures validating topics measures. Political Analysis. link Song et al. (2020) validations trust? impact imperfect human annotations gold standard quality validation automated content analysis. Political Communication. link Bland, J. M., & Altman, D. (1986). Statistical methods assessing agreement two methods clinical measurement. lancet, 327(8476), 307-310. Chan et al. (2020) Four best practices measuring news sentiment using ‘--shelf’ dictionaries: large-scale p-hacking experiment. Computational Communication Research. link Nielsen, F. Å. (2011). new ANEW: Evaluation word list sentiment analysis microblogs. arXiv preprint arXiv:1103.2903. link","code":""},{"path":"https://gesistsa.github.io/oolong/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chung-hong Chan. Author, maintainer. Marius Sältzer. Author.","code":""},{"path":"https://gesistsa.github.io/oolong/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan C, Sältzer M (2020). “oolong: R package validating automated content analysis tools.” Journal Open Source Software, 5(55), 2461. doi:10.21105/joss.02461, https://github.com/gesistsa/oolong.","code":"@Article{, title = {oolong: An R package for validating automated content analysis tools}, journal = {Journal of Open Source Software}, author = {Chung-hong Chan and Marius Sältzer}, doi = {10.21105/joss.02461}, url = {https://github.com/gesistsa/oolong}, volume = {5}, number = {55}, pages = {2461}, year = {2020}, }"},{"path":"https://gesistsa.github.io/oolong/index.html","id":"oolong-","dir":"","previous_headings":"","what":"Create Validation Tests for Automated Content Analysis","title":"Create Validation Tests for Automated Content Analysis","text":"goal oolong [1] generate administrate validation tests easily typical automated content analysis tools topic models dictionary-based tools. Please refer overview introduction package. need deploy test online, please refer Deployment Vignette. use BTM, please refer BTM Vignette.","code":""},{"path":"https://gesistsa.github.io/oolong/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Create Validation Tests for Automated Content Analysis","text":"Please cite package : Chan C-h. & Sältzer M., (2020). oolong: R package validating automated content analysis tools. Journal Open Source Software, 5(55), 2461, https://doi.org/10.21105/joss.02461 BibTeX entry, use output citation(package = \"oolong\").","code":""},{"path":"https://gesistsa.github.io/oolong/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Create Validation Tests for Automated Content Analysis","text":"Contributions form feedback, comments, code, bug report welcome. Fork source code, modify, issue pull request. Issues, bug reports: File Github issue.","code":""},{"path":"https://gesistsa.github.io/oolong/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Create Validation Tests for Automated Content Analysis","text":"Please note oolong project released Contributor Code Conduct. contributing project, agree abide terms. /ˈuːlʊŋ/ 烏龍, literally means “Dark Dragon”, semi-oxidized tea Asia. popular Taiwan, Japan Hong Kong. Cantonese Taiwanese Mandarin, word can also mean “confused”. perfectly captures spirit human---loop validation.","code":""},{"path":[]},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"description","dir":"","previous_headings":"","what":"Description","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Intended create standard human---loop validity tests typical automated content analysis topic modeling dictionary-based methods. package offers standard workflow functions prepare, administer evaluate human---loop validity test. package provides functions validating topic models using word intrusion, topic intrusion (Chang et al. 2009, https://papers.nips.cc/paper/3700-reading-tea-leaves--humans-interpret-topic-models) word set intrusion (Ying et al. 2021) doi:10.1017/pan.2021.33 tests. package also provides functions generating gold-standard data useful validating dictionary-based methods. default settings generated tests match suggested Chang et al. (2009) Song et al. (2020) doi:10.1080/10584609.2020.1723752.","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"keywords","dir":"","previous_headings":"","what":"Keywords","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Validity Text Analysis Topic Model","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"science-usecases","dir":"","previous_headings":"","what":"Science Usecase(s)","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"package used literature valid topic models prediction models trained text data, e.g. Rauchfleisch et al. (2023), Rothut, et al. (2023), Eisele, et al. (2023).","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"repository-structure","dir":"","previous_headings":"","what":"Repository structure","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"repository follows standard structure R package.","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"environment-setup","dir":"","previous_headings":"","what":"Environment Setup","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"R installed:","code":"install.packages(\"oolong\")"},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"input-data","dir":"","previous_headings":"","what":"Input Data","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"input data topic model prediction model trained text data. example, one can train topic model text data (tweets Donald trump) included package :","code":"library(seededlda) library(quanteda) trump_corpus <- corpus(trump2k) tokens(trump_corpus, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, split_hyphens = TRUE, remove_url = TRUE) %>% tokens_tolower() %>% tokens_remove(stopwords(\"en\")) %>% tokens_remove(\"@*\") -> trump_toks model <- textmodel_lda(x = dfm(trump_toks), k = 8, verbose = TRUE)"},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"sample-input-and-output-data","dir":"","previous_headings":"","what":"Sample Input and Output Data","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"sample input model trained text data, e.g. sample output oolong R6 object.","code":"library(oolong) library(seededlda) abstracts_seededlda Call: lda(x = x, k = k, label = label, max_iter = max_iter, alpha = alpha, beta = beta, seeds = seeds, words = NULL, verbose = verbose) 10 topics; 2,500 documents; 3,908 features."},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"how-to-use","dir":"","previous_headings":"","what":"How to Use","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Please refer overview package comprehensive introduction test types. Suppose topic model trained text data called abstracts_seededlda, included package. Suppose one like conduct word intrusion test (Chang et al. 2009) validate topic model. test can generated wi() function. One can conduct test following instruction displayed, .e. oolong_test$$do_word_intrusion_test(). One see graphic interface like following conduct test. test, one can finalize test locking test. obtain result test. example:","code":"library(oolong) abstracts_seededlda Call: lda(x = x, k = k, label = label, max_iter = max_iter, alpha = alpha, beta = beta, seeds = seeds, words = NULL, verbose = verbose) 10 topics; 2,500 documents; 3,908 features. oolong_test <- wi(abstracts_seededlda, userid = \"Hadley\") oolong_test ── oolong (topic model) ──────────────────────────────────────────────────────── ✔ WI ✖ TI ✖ WSI ☺ Hadley ℹ WI: k = 10, 0 coded. ── Methods ── • <$do_word_intrusion_test()>: do word intrusion test • <$lock()>: finalize and see the results oolong_test$do_word_intrusion_test() oolong_test$lock() oolong_test ── oolong (topic model) ──────────────────────────────────────────────────────── ✔ WI ✖ TI ✖ WSI ☺ Hadley ℹ WI: k = 10, 10 coded. ── Results: ── ℹ 90% precision"},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"contact-details","dir":"","previous_headings":"","what":"Contact Details","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Maintainer: Chung-hong Chan chainsawtiney@gmail.com Issue Tracker: https://github.com/gesistsa/oolong/issues","code":""},{"path":"https://gesistsa.github.io/oolong/methodshub.html","id":"publication","dir":"","previous_headings":"","what":"Publication","title":"oolong - Create Validation Tests for Automated Content Analysis","text":"Chan, C. H., & Sältzer, M. (2020). oolong: R package validating automated content analysis tools. Journal Open Source Software: JOSS, 5(55), 2461. https://doi.org/10.21105/joss.02461","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":null,"dir":"Reference","previous_headings":"","what":"Abstracts of communication journals dataset — abstracts","title":"Abstracts of communication journals dataset — abstracts","text":"random sample abstracts papers published high-impact communication journals 2000 2017. abstracts_dictionary list terms can used semisupervised techniques keyATM.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Abstracts of communication journals dataset — abstracts","text":"","code":"abstracts abstracts_dfm abstracts_dictionary"},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Abstracts of communication journals dataset — abstracts","text":"object class tbl_df (inherits tbl, data.frame) 2500 rows 1 columns. object class dfm 2500 rows 3998 columns. object class list length 10.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Abstracts of communication journals dataset — abstracts","text":"Chan, C-h, & Grill, C. (2020). [Highs Communication Research: Research Topics High Supply, High Popularity, High Prestige High-Impact Journals.](https://doi.org/10.1177/0093650220944790) Communication Research.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts_seededlda.html","id":null,"dir":"Reference","previous_headings":"","what":"Topic models trained with the abstracts dataset. — abstracts_seededlda","title":"Topic models trained with the abstracts dataset. — abstracts_seededlda","text":"topic models trained different topic model packages.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/abstracts_seededlda.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Topic models trained with the abstracts dataset. — abstracts_seededlda","text":"","code":"abstracts_seededlda abstracts_btm"},{"path":"https://gesistsa.github.io/oolong/reference/abstracts_seededlda.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Topic models trained with the abstracts dataset. — abstracts_seededlda","text":"object class textmodel_lda (inherits textmodel, list) length 10. object class BTM length 9.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":null,"dir":"Reference","previous_headings":"","what":"AFINN dictionary — afinn","title":"AFINN dictionary — afinn","text":"AFINN sentiment dictionary quanteda::dictionary format.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"AFINN dictionary — afinn","text":"","code":"afinn"},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"AFINN dictionary — afinn","text":"object class dictionary2 length 11.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/afinn.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"AFINN dictionary — afinn","text":"Nielsen, F. Å. (2011). new ANEW: Evaluation word list sentiment analysis microblogs. arXiv preprint arXiv:1103.2903.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Check whether the oolong needs to be updated — check_oolong","title":"Check whether the oolong needs to be updated — check_oolong","text":"function raises error input oolong object needs updated. Oolong objects generated old version oolong need updated use functionalities recent versions oolong.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Check whether the oolong needs to be updated — check_oolong","text":"","code":"check_oolong(oolong, verbose = FALSE)"},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Check whether the oolong needs to be updated — check_oolong","text":"oolong oolong object checked verbose, logical, display messages","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Check whether the oolong needs to be updated — check_oolong","text":"Nothing","code":""},{"path":"https://gesistsa.github.io/oolong/reference/check_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Check whether the oolong needs to be updated — check_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Clone an oolong object — clone_oolong","title":"Clone an oolong object — clone_oolong","text":"Clone new oolong object. oolong must locked ever coded.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clone an oolong object — clone_oolong","text":"","code":"clone_oolong(oolong, userid = NA)"},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clone an oolong object — clone_oolong","text":"oolong oolong object. userid character string denote name coder","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clone an oolong object — clone_oolong","text":"oolong object","code":""},{"path":"https://gesistsa.github.io/oolong/reference/clone_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Clone an oolong object — clone_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Generate an oolong test — create_oolong","title":"Generate an oolong test — create_oolong","text":"create_oolong generates oolong test object can either used validating topic model creating ground truth (gold standard) text corpus. wi (word intrusion test), ti (topic intrusion test), witi (word topic intrusion tests), wsi (word set intrusion test) gs handy wrappers create_oolong. recommended use wrappers instead create_oolong.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate an oolong test — create_oolong","text":"","code":"create_oolong( input_model = NULL, input_corpus = NULL, n_top_terms = 5, bottom_terms_percentile = 0.6, exact_n = NULL, frac = 0.01, n_top_topics = 3, n_topiclabel_words = 8, use_frex_words = FALSE, frexweight = 0.5, input_dfm = NULL, construct = \"positive\", btm_dataframe = NULL, n_correct_ws = 3, wsi_n_top_terms = 20, userid = NA, type = \"witi\", lambda = 1, difficulty = NULL ) wi( input_model = NULL, userid = NA, n_top_terms = 5, bottom_terms_percentile = 0.6, frexweight = 0.5, use_frex_words = FALSE, lambda = 1, difficulty = NULL ) witi( input_model = NULL, input_corpus = NULL, userid = NA, n_top_terms = 5, bottom_terms_percentile = 0.6, exact_n = NULL, frac = 0.01, n_top_topics = 3, n_topiclabel_words = 8, frexweight = 0.5, use_frex_words = FALSE, input_dfm = NULL, btm_dataframe = NULL, lambda = 1, difficulty = NULL ) ti( input_model = NULL, input_corpus = NULL, userid = NA, exact_n = NULL, frac = 0.01, n_top_topics = 3, n_topiclabel_words = 8, frexweight = 0.5, use_frex_words = FALSE, input_dfm = NULL, btm_dataframe = NULL, lambda = 1, difficulty = NULL ) wsi( input_model = NULL, userid = NA, n_topiclabel_words = 4, n_correct_ws = 3, wsi_n_top_terms = 20, frexweight = 0.5, use_frex_words = FALSE, lambda = 1, difficulty = NULL ) gs( input_corpus = NULL, userid = NA, construct = \"positive\", exact_n = NULL, frac = 0.01 )"},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generate an oolong test — create_oolong","text":"input_model (wi, ti, witi, wsi) STM, WarpLDA, topicmodels, KeyATM, seededlda, textmodel_nb, BTM object; NULL, create_oolong assumes want create gold standard. input_corpus (wi, ti, witi, wsi, gs) input_model null, corpus (character vector quanteda::corpus object) generate model object. input_model input_corpus NULL, topic intrusion test cases generated. input_model BTM object, argument ignored. input_model null, generates gold standard test cases. n_top_terms (wi, witi) integer, number top topic words included candidates word intrusion test. bottom_terms_percentile (wi, witi) double, term considered word intruder theta less percentile theta, must within range 0 1 exact_n (ti, witi, gs) integer, number topic intrusion test cases generate, ignore frac NULL frac (ti, witi, gs) double, fraction test cases generated corpus n_top_topics (wi, witi) integer, number relevant topics shown alongside intruder topic n_topiclabel_words (witi, ti, wsi) integer, number topic words shown topic (\"ti\" \"witi\") / word set (\"wsi\") label use_frex_words (wi, witi, ti, wsi) logical, STM object, use FREX words TRUE, use PROB words FALSE frexweight (wi, witi, ti, wsi) double, adjust `frexweight` STM (see [stm::labelTopics()]), effect STM use_frex_words FALSE input_dfm (wi, witi, ti, wsi) dfm object used training input_model, input_model WarpLDA object construct (gs) string, adjective describe construct want coders code gold standard test cases btm_dataframe (witi, ti) dataframe used training input_model, input_model BTM object n_correct_ws (wsi) number word sets shown alongside intruder word set wsi_n_top_terms (wsi) number top topic words topic randomized selected word set label userid character string denote name coder. Default NA (userid); recommended type (create_oolong) character string denote want create. \"wi\": word intrusion test; \"ti\": topic intrusion test; \"witi\": word intrusion test topic intrusion test; \"gs\": gold standard generation lambda (wi, witi, ti, wsi) double, adjust `lambda` WarpLDA (see [text2vec::LatentDirichletAllocation()]) difficulty (wi, witi, ti, wsi) double, deprecated, backward compatibility","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generate an oolong test — create_oolong","text":"oolong test object.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate an oolong test — create_oolong","text":"Use wi, ti, witi, wsi gs generate oolong test choice. recommended supply also userid (current coder). names tests (word intrusion test topic intrusion test) follow Chang et al (2009). Ying et al. (2021), topic intrusion test named \"T8WSI\" (Top 8 Word Set Intrusion). Word set intrusion test package actually \"R4WSI\" (Random 4 Word Set Intrusion) Ying et al. default settings wi, witi, ti follow Chang et al (2009), e.g. n_top_terms = 5; instead n_top_terms = 4 Ying et al. default setting wsi follows Ying et al., e.g. n_topiclabel_words = 4. suggested Song et al. (2020), 1","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"about-create-oolong","dir":"Reference","previous_headings":"","what":"About create_oolong","title":"Generate an oolong test — create_oolong","text":"create_oolong intuitive use, longer recommended use create_oolong generate oolong test. create_oolong retained backward compatibility purposes. function generates oolong test object based input_model input_corpus. input_model NULL, generates oolong test topic model (tm). input_model NULL input_corpus NULL, generates oolong test generating gold standard (gs).","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Generate an oolong test — create_oolong","text":"oolong object, depends purpose, following methods: $do_word_intrusion_test() (tm) launch shiny-based word intrusion test. coder find intruder word related words. $do_topic_intrusion_test() (tm) launch shiny-based topic intrusion test. coder find intruder topic least likely topic document. $do_word_set_intrusion_test() (tm) launch shiny-based word set intrusion test. coder find intruder word set related word sets. $do_gold_standard_test() (gs) launch shiny-based test generating gold standard. coder determine level predetermined constructs 5-point Likert scale. $lock(force = FALSE) (gs/tm) lock object changed anymore. enables summarize_oolong following method. $turn_gold() (gs) convert oolong object quanteda compatible corpus. details, please see overview vignette: vignette(\"overview\", package = \"oolong\")","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Generate an oolong test — create_oolong","text":"Chang, J., Gerrish, S., Wang, C., Boyd-Graber, J. L., & Blei, D. M. (2009). Reading tea leaves: humans interpret topic models. Advances neural information processing systems (pp. 288-296). Song et al. (2020) validations trust? impact imperfect human annotations gold standard quality validation automated content analysis. Political Communication. Ying, L., Montgomery, J. M., & Stewart, B. M. (2021). Topics, Concepts, Measurement: Crowdsourced Procedure Validating Topics Measures. Political Analysis","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Generate an oolong test — create_oolong","text":"Chung-hong Chan, Marius Sältzer","code":""},{"path":"https://gesistsa.github.io/oolong/reference/create_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generate an oolong test — create_oolong","text":"","code":"## Creation of oolong test with only word intrusion test data(abstracts_seededlda) data(abstracts) oolong_test <- wi(input_model = abstracts_seededlda, userid = \"Hadley\") ## Creation of oolong test with both word intrusion test and topic intrusion test oolong_test <- witi(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"Julia\") ## Creation of oolong test with topic intrusion test oolong_test <- ti(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"Jenny\") ## Creation of oolong test with word set intrusion test oolong_test <- wsi(input_model = abstracts_seededlda, userid = \"Garrett\") ## Creation of gold standard oolong_test <- gs(input_corpus = trump2k, userid = \"Yihui\") ## Using create_oolong(); not recommended oolong_test <- create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"JJ\") oolong_test <- create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text, userid = \"Mara\", type = \"ti\") oolong_test <- create_oolong(input_corpus = abstracts$text, userid = \"Winston\", type = \"gs\")"},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Deploy an oolong test — deploy_oolong","title":"Deploy an oolong test — deploy_oolong","text":"time, use function. write deployable version app directory using export_oolong instead. Please refer vignette(\"deploy\", package = \"oolong\") details.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Deploy an oolong test — deploy_oolong","text":"","code":"deploy_oolong(oolong)"},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Deploy an oolong test — deploy_oolong","text":"oolong oolong object deployed. Please note \"witi\" type, .e. oolong object word topic intrusion tests, deployed. Also object must locked ever coded.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Deploy an oolong test — deploy_oolong","text":"Nothing, launches deployable version coding interface","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Deploy an oolong test — deploy_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/deploy_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Deploy an oolong test — deploy_oolong","text":"","code":"# Please try this example in interactive R sessions only. if (interactive()) { data(abstracts_stm) x <- wi(abstracts_stm) deploy_oolong(x) }"},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"function exports oolong test launched Shiny app ideal online deployment. Deploying Shiny app online allows coders conduct test online browser, rather install R computer. contrast testing interfaces launched methods $do_word_intrusion_test(), deployable version provides data download coder finished coding. Downloaded data can revert back locked oolong object using revert_oolong. version might provide solutions permanent storage. deployable Shiny app directory. Shiny app launchable shiny::runApp() deployable rsconnect::deployApp(). Please refer vignette(\"deploy\", package = \"oolong\") details.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"","code":"export_oolong( oolong, dir = base::tempdir(), verbose = TRUE, use_full_path = TRUE )"},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"oolong oolong object exported. Please note \"witi\" type, .e. oolong object word topic intrusion tests, exported. Also object must locked ever coded. dir character string, directory exported. Default temporary directory verbose logical, whether display information exporting use_full_path logical, whether expand dir full path","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"directory exported, invisible","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/export_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export a deployable Shiny app from an oolong object into a directory — export_oolong","text":"","code":"# Please try this example in interactive R sessions only. if (interactive()) { data(abstracts_stm) x <- wi(abstracts_stm) export_oolong(x) }"},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":null,"dir":"Reference","previous_headings":"","what":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"Naive Bayes model (class 'textmodel_nb') trained 20 newsgroups data.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"","code":"newsgroup_nb"},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"object class textmodel_nb (inherits textmodel, list) length 7.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/newsgroup_nb.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Naive Bayes model trained on 20 newsgroups data — newsgroup_nb","text":"Lang, K. (1995). Newsweeder: Learning filter netnews. Machine Learning Proceedings 1995 (pp. 331-339). Morgan Kaufmann.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":null,"dir":"Reference","previous_headings":"","what":"Print oolong gold standard object — print.oolong_gold_standard","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"function prints summary oolong gold standard object. oolong gold standard object result $turn_gold() method. quanteda::corpus compatible object.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"","code":"# S3 method for class 'oolong_gold_standard' print(x, ...)"},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"x oolong gold standard object ... parameters","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"None, summary quanteda::corpus displayed","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_gold_standard.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Print oolong gold standard object — print.oolong_gold_standard","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":null,"dir":"Reference","previous_headings":"","what":"Print and plot oolong summary — print.oolong_summary","title":"Print and plot oolong summary — print.oolong_summary","text":"functions print plot useful summary results summarize_oolong. details, please see overview vignette: vignette(\"overview\", package = \"oolong\")","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Print and plot oolong summary — print.oolong_summary","text":"","code":"# S3 method for class 'oolong_summary' print(x, ...) # S3 method for class 'oolong_summary' plot(x, ...)"},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Print and plot oolong summary — print.oolong_summary","text":"x oolong_summary ... parameters","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Print and plot oolong summary — print.oolong_summary","text":"None","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"summary","dir":"Reference","previous_headings":"","what":"Summary","title":"Print and plot oolong summary — print.oolong_summary","text":"Print function displays following information: Mean model precision (wi, wsi) Higher value indicates better topic interpretability Quantiles model precision (wi) Higher value indicates better topic interpretability P-value model precision (wi) Model precision's p-value calculated one-sample binomial test Fisher's Omnibus method. Krippendorff's alpha (wi, wsi, gs) Krippendorff's Alpha, one oolong object analyzed. K Precision (wi, wsi) Model precision topic. Mean TLO (ti) Mean topic log odds, higher value indicates better interpretability Median TLO (ti) Median topic log odds, higher value indicates better interpretability Quantiles TLO (ti) Quantiles topic log odds P-Value median TLO (ti) Median topic log odds's p-value calculated permutation test. Correlation (average answer) (gs) Pearson's correlation average answer target value Corrlation (content length) (gs) Pearson's correlation content length target value","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"diagnostic-plot","dir":"Reference","previous_headings":"","what":"Diagnostic plot","title":"Print and plot oolong summary — print.oolong_summary","text":"Plot function displays diagnostic plot following subplots (gs ). Top left Correlation answer coders target value check correlation two values. axes minmax transformed. Top right Bland-altman plot answer coders target value check agreement two values. Bottom left Correlation target value content length check influence content length. Bottom right Cook's distance check influential observations.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/print.oolong_summary.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Print and plot oolong summary — print.oolong_summary","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Obtain a locked oolong from a downloaded data file — revert_oolong","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"generate locked oolong object original oolong object RDS file. RDS file downloaded deployed Shiny app.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"","code":"revert_oolong(oolong, rds_file)"},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"oolong oolong object used deployment rds_file path downloaded RDS file","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"locked oolong object based data downloaded RDS file","code":""},{"path":"https://gesistsa.github.io/oolong/reference/revert_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Obtain a locked oolong from a downloaded data file — revert_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarize oolong objects — summarize_oolong","title":"Summarize oolong objects — summarize_oolong","text":"function summarizes one oolong objects. oolong objects must locked.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarize oolong objects — summarize_oolong","text":"","code":"summarize_oolong(..., target_value = NULL, n_iter = 1500) summarise_oolong(..., target_value = NULL, n_iter = 1500)"},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarize oolong objects — summarize_oolong","text":"... (tm/gs) one oolong objects summarized target_value (gs) vector numeric values, value want validate human-coded gold standard. One example target value sentiment score extracted automatically text n_iter (ti) number iterations calculate median test","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarize oolong objects — summarize_oolong","text":"oolong summary. Depends purpose, oolong summary object following values: $type (gs/tm) type analysis, either 'gs' 'tm' $kripp_alpha; $kripp_alpha_wsi (wi, wsi) Krippendorff's Alpha, one oolong object analyzed. $rater_precision; $rater_precision_wsi (wi, wsi) Model precision $res$rater_precision_p_value (wi) Model precision's p-value calculated one-sample binomial test Fisher's Omnibus method. $k_precision; $k_precision_wsi (wi, wsi) precision topic $tlo (ti) vector topic log odds $tlo_pvalue (ti) Median topic log odds's p-value calculated permutation test. $cor (gs) Pearson's correlation average answer target value $cor_length (gs) Pearson's correlation content length target value $diag_plot (gs) diagnostic plot. useful summary object can obtained either print.oolong_summary plot.oolong_summary. details, please see overview vignette: vignette(\"overview\", package = \"oolong\")","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Summarize oolong objects — summarize_oolong","text":"Chang, J., Gerrish, S., Wang, C., Boyd-Graber, J. L., & Blei, D. M. (2009). Reading tea leaves: humans interpret topic models. Advances neural information processing systems (pp. 288-296). Song et al. (2020) validations trust? impact imperfect human annotations gold standard quality validation automated content analysis. Political Communication. Ying, L., Montgomery, J. M., & Stewart, B. M. (2021). Topics, Concepts, Measurement: Crowdsourced Procedure Validating Topics Measures. Political Analysis.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Summarize oolong objects — summarize_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/reference/summarize_oolong.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarize oolong objects — summarize_oolong","text":"","code":"# Please try this example in interactive R sessions only. if (interactive()) { data(abstracts_stm) oolong_test1 <- create_oolong(abstracts_stm) oolong_test2 <- clone_oolong(oolong_test1) oolong_test1$do_word_intrusion_test() oolong_test2$do_word_intrusion_test() oolong_test1$lock() oolong_test2$lock() summarize_oolong(oolong_test1, oolong_test2) }"},{"path":"https://gesistsa.github.io/oolong/reference/trump2k.html","id":null,"dir":"Reference","previous_headings":"","what":"Trump's tweets dataset — trump2k","title":"Trump's tweets dataset — trump2k","text":"random sample 2000 tweets @realdonaldtrump account assumption duty president United States.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/trump2k.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trump's tweets dataset — trump2k","text":"","code":"trump2k"},{"path":"https://gesistsa.github.io/oolong/reference/trump2k.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Trump's tweets dataset — trump2k","text":"object class character length 2000.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":null,"dir":"Reference","previous_headings":"","what":"Update an oolong object to the latest version — update_oolong","title":"Update an oolong object to the latest version — update_oolong","text":"function update old oolong object latest version.","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update an oolong object to the latest version — update_oolong","text":"","code":"update_oolong(oolong, verbose = TRUE)"},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update an oolong object to the latest version — update_oolong","text":"oolong oolong object updated verbose, logical, display messages","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Update an oolong object to the latest version — update_oolong","text":"updated oolong object","code":""},{"path":"https://gesistsa.github.io/oolong/reference/update_oolong.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Update an oolong object to the latest version — update_oolong","text":"Chung-hong Chan","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-062-development","dir":"Changelog","previous_headings":"","what":"oolong 0.6.2 (development)","title":"oolong 0.6.2 (development)","text":"Add content MH Use icr calculation Krippendorff’s Alpha","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-061","dir":"Changelog","previous_headings":"","what":"oolong 0.6.1","title":"oolong 0.6.1","text":"CRAN release: 2024-04-15 Add fixes quanteda 4.0.0.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-060","dir":"Changelog","previous_headings":"","what":"oolong 0.6.0","title":"oolong 0.6.0","text":"CRAN release: 2024-02-10 Use seededlda instead keyATM demo, can reduce version requirement.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-051","dir":"Changelog","previous_headings":"","what":"oolong 0.5.1","title":"oolong 0.5.1","text":"Transfer ownership gesistsa. Add pkgdown website clean many documents.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-050","dir":"Changelog","previous_headings":"","what":"oolong 0.5.0","title":"oolong 0.5.0","text":"CRAN release: 2023-08-25 Potential breaking change: parameter difficulty deprecated. Instead, please use respective frewweight (STM) lambda (Warp LDA). legacy code explicitly using difficulty parameter, change break code. However, use following pattern, might need change legacy code accordingly. package-level documentation removed. Increase required R version 4.0 keyATM","code":"## This code is okay set.seed(123) wsi(abstracts_stm, use_frex_words = TRUE, difficulty = 0.8) set.seed(123) ## You will get different results with oolong 0.5.0 wsi(abstracts_stm, use_frex_words = TRUE) ## You need to explicitly use the old default, which is quite high set.seed(123) wsi(abstracts_stm, use_frex_words = TRUE, frexweight = 1)"},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-043","dir":"Changelog","previous_headings":"","what":"oolong 0.4.3","title":"oolong 0.4.3","text":"CRAN release: 2023-06-11 Upgrade Shiny test cases shinytest2 Clarify coding task can paused, saved, resumed Vignette Package maintenance","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-041","dir":"Changelog","previous_headings":"","what":"oolong 0.4.1","title":"oolong 0.4.1","text":"CRAN release: 2021-11-09 Eliminate miniUI dependency. Update documentation reflect newly published papers, e.g. Ying et al.","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-040","dir":"Changelog","previous_headings":"","what":"oolong 0.4.0","title":"oolong 0.4.0","text":"CRAN release: 2021-05-31 Add export_oolong deploy_oolong online deployment [thanks Marius Sältzer, Daniel Braby (friend Louis), Johannes Gruber Felicia Loecherbach testing feature; thanks SAGE Ocean concept grant support development feature] Support models seededlda [thanks Marius Sältzer] Support Naive Bayes models quanteda.textmodels [thanks Marius Sältzer] Support generation word set intrusion test (Ying et al. forthcoming) Support generation oolong object topic intrusion test Add new wrappers: wi, ti, witi, wsi, gs Add userid suggested parameter Total revamp object oolong tests; add meta data. Add update_oolong updating object created older versions oolong Update print method oolong tests; now based cli Various bug fixes; Shiny components now automatically tested","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-0311","dir":"Changelog","previous_headings":"","what":"oolong 0.3.11","title":"oolong 0.3.11","text":"CRAN release: 2020-11-13 Support BTM [thanks Marius Sältzer] Update Shiny UI (jump button) Various bug fixes","code":""},{"path":"https://gesistsa.github.io/oolong/news/index.html","id":"oolong-034","dir":"Changelog","previous_headings":"","what":"oolong 0.3.4","title":"oolong 0.3.4","text":"CRAN release: 2020-03-21 Initial CRAN version.","code":""}]