From 81db90928d3b7dd712e47f0b0b138b873d3e768a Mon Sep 17 00:00:00 2001 From: Kohei Watanabe Date: Fri, 12 Apr 2024 22:19:00 +0900 Subject: [PATCH] Update for qunteda v4.0 (#85) dfm.character() is deprecated in v4.0 --- vignettes/overview.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd index 34836b5..10a2fe6 100644 --- a/vignettes/overview.Rmd +++ b/vignettes/overview.Rmd @@ -162,7 +162,7 @@ The test makes more sense if more than one coder is involved. A suggested workfl Preprocess and create a document-feature matrix ```{r, eval = FALSE} -dfm(abstracts$text, tolower = TRUE, stem = TRUE, remove = stopwords('english'), remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE, remove_hyphens = TRUE) %>% dfm_trim(min_docfreq = 3, max_docfreq = 500) %>% dfm_select(min_nchar = 3, pattern = "^[a-zA-Z]+$", valuetype = "regex") -> abstracts_dfm +tokens(abstracts$text, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, remove_url = TRUE, spilit_hyphens = TRUE) %>% tokens_wordstem %>% tokens_remove(stopwords("en")) %>% dfm(tolower = TRUE) %>% dfm_trim(min_docfreq = 3, max_docfreq = 500) %>% dfm_select(min_nchar = 3, pattern = "^[a-zA-Z]+$", valuetype = "regex") -> abstracts_dfm ``` Train a topic model.