From 103c95b6a903a179621320ea294807d5987c7fdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20Drhl=C3=ADk?= Date: Thu, 8 Nov 2018 22:37:44 +0100 Subject: [PATCH] Close #21 - rename language files When adding a new language, it is no longer necessary to edit README.Rmd. We don't need to edit the language data frame any more because the language name is present in the file name. Example: en_English fr-CA_French (Canada) The only issue that came up is that running Ctrl+Shift+K in Rstudio (default Knit) doesn't work because of an unexported function load_langs. It works when using build_sweary() so it's okay. This function cannot be exported because it loads raw language files that are not present in an installed package. --- DESCRIPTION | 3 +- R/build_tools.R | 85 ++++++++++++++++++- R/sweary.R | 4 +- README.Rmd | 36 ++------ README.md | 14 +-- data-raw/swear-word-lists/{cs => cs_Czech} | 0 data-raw/swear-word-lists/{de => de_German} | 0 data-raw/swear-word-lists/{en => en_English} | 0 .../{fr-CA => fr-CA_French (Canada)} | 0 data-raw/swear-word-lists/{gr => gr_Greek} | 0 .../swear-word-lists/{mk => mk_Macedonian} | 0 data-raw/swear-word-lists/{pl => pl_Polish} | 0 data-raw/swear-word-lists/{ro => ro_Romanian} | 0 data-raw/swear-word-lists/{sk => sk_Slovak} | 0 data-raw/swear-words.R | 13 +-- man/file_lang_code.Rd | 18 ++++ man/file_lang_name.Rd | 18 ++++ man/load_lang_from_file.Rd | 17 ++++ man/load_langs.Rd | 15 ++++ man/split_lang_file.Rd | 20 +++++ 20 files changed, 190 insertions(+), 53 deletions(-) rename data-raw/swear-word-lists/{cs => cs_Czech} (100%) rename data-raw/swear-word-lists/{de => de_German} (100%) rename data-raw/swear-word-lists/{en => en_English} (100%) rename data-raw/swear-word-lists/{fr-CA => fr-CA_French (Canada)} (100%) rename data-raw/swear-word-lists/{gr => gr_Greek} (100%) rename data-raw/swear-word-lists/{mk => mk_Macedonian} (100%) rename data-raw/swear-word-lists/{pl => pl_Polish} (100%) rename data-raw/swear-word-lists/{ro => ro_Romanian} (100%) rename data-raw/swear-word-lists/{sk => sk_Slovak} (100%) create mode 100644 man/file_lang_code.Rd create mode 100644 man/file_lang_name.Rd create mode 100644 man/load_lang_from_file.Rd create mode 100644 man/load_langs.Rd create mode 100644 man/split_lang_file.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 394c937..9f6285d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,5 +21,6 @@ Suggests: purrr, rmarkdown, stringr, - usethis + usethis, + readr Imports: glue diff --git a/R/build_tools.R b/R/build_tools.R index af4bbd5..afce285 100644 --- a/R/build_tools.R +++ b/R/build_tools.R @@ -97,7 +97,7 @@ format.sweary_build_results <- function(x) { status$errors > 0 ~ "You need to fix some ERRORS!", status$warnings > 0 ~ "You should fix those WARNINGS!", status$notes > 0 ~ "Handle those NOTES and you're good to go!", - TRUE ~ paste0("Great job! Random swearword for you: ", rsw$word, " [", rsw$language, "] :-)") + TRUE ~ paste0("Great job! Random swear word for you: ", rsw$word, " [", rsw$language, "] :-)") ) glue::glue(" @@ -204,3 +204,86 @@ print_devtools_check_summary <- function(x) { warnings: {x$warnings} notes: {x$notes}") } + +#' Splits lang file name in language code and name +#' +#' @param lang_file Language file name, either absolute +#' or relative. +#' +#' @return Character vector of length 2. First +#' element is language code, second element +#' is language name. +split_lang_file <- function(lang_file) { + file_name <- stringr::str_split(lang_file, "/", simplify = TRUE) %>% + dplyr::last(.) + file_split <- stringr::str_split(file_name, "_", simplify = TRUE) + + return(file_split) +} + +#' Returns language code from file name +#' +#' @param lang_file Language file name, either absolute +#' or relative. +#' +#' @return Language code. +file_lang_code <- function(lang_file) { + file_split <- split_lang_file(lang_file) + + return(file_split[1]) +} + +#' Returns language name from file name +#' +#' @param lang_file Language file name, either absolute +#' or relative. +#' +#' @return Language name. +file_lang_name <- function(lang_file) { + file_split <- split_lang_file(lang_file) + + return(file_split[2]) +} + +#' Loads a single language data frame from file +#' +#' @param lang_file Language file name with full path. +#' @return Data frame of swear words in one language. +load_lang_from_file <- function(lang_file) { + suppressMessages( + words <- readr::read_csv(lang_file, col_names = c("word")) + ) + words$language <- file_lang_code(lang_file) + + return(words) +} + +#' Create a summary df with languages and their counts +#' +#' @return Data frame with language codes, language names, +#' word counts and a formatted markdown table row. +load_langs <- function() { + lang_files <- list.files("data-raw/swear-word-lists/", full.names = TRUE) + + langs <- purrr::map_df(lang_files, function(lang_file) { + file_split <- split_lang_file(lang_file) + dplyr::data_frame( + lang_code = file_split[1], + lang = file_split[2] + ) + }) + + counts <- sweary::swear_words %>% + dplyr::count(.data$language) + + lang_counts <- dplyr::inner_join( + langs, + counts, + by = c("lang_code" = "language") + ) %>% + dplyr::mutate( + label_row = glue::glue("| {lang} | {lang_code} | {n} |") + ) + + return(lang_counts) +} diff --git a/R/sweary.R b/R/sweary.R index c131d80..fedae83 100644 --- a/R/sweary.R +++ b/R/sweary.R @@ -17,5 +17,5 @@ #' @format A data frame with 96 rows and 2 variables. "swear_words" -## Deletes R CMD check NOTES for '.' and '%>%'. -utils::globalVariables(c(".", "%>%")) +## Deletes R CMD check NOTES for '.', '%>%' and '.data'. +utils::globalVariables(c(".", "%>%", ".data")) diff --git a/README.Rmd b/README.Rmd index 5537932..21bf531 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,30 +8,7 @@ output: github_document library(dplyr) library(glue) -# If adding a new language, add a new row to the following -# data frame. Make sure that the codes are alphabetically -# ordered and include their language equivalent. -langs <- data_frame( - lang_code = c("cs", "de", "en", "fr-CA", "gr", "mk", "pl", "ro", "sk"), - lang = c("Czech", "German", "English", "French (Canada)", "Greek", "Macedonian", "Polish", "Romanian", "Slovak") -) - -# Counts of swear words for each language are computed -# based on our swear_words data frame. -counts <- sweary::swear_words %>% - count(language) - -# This joined data frame includes language names, -# counts and labels that are used to create a row -# in a markdown table. -lang_counts <- inner_join( - langs, - counts, - by = c("lang_code" = "language") -) %>% - mutate( - label_row = glue("| {lang} | {lang_code} | {n} |") - ) +lang_counts <- load_langs() ``` [![Join the chat at https://gitter.im/pdrhlik/sweary](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/swearyr) @@ -80,10 +57,11 @@ If you are not comfortable with `git` and pull requests, you can just follow ste Find its two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).\ If the language you are creating is a certain dialect (e.g. Canadian French), find its [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) in this [language code table](http://www.lingoes.net/en/translator/langcode.htm). 2. **Create a language file.**\ - Place the file in `data-raw/swear-word-lists/{LANG_CODE}`.\ + Place the file in `data-raw/swear-word-lists/{LANG_CODE}_{LANG_NAME}`.\ Examples:\ - + English: `data-raw/swear-word-lists/en` - + Canadian French: `data-raw/swear-word-lists/fr-CA` + + English: `data-raw/swear-word-lists/en_English` + + Canadian French: `data-raw/swear-word-lists/fr-CA_French (Canada)`\ + Note that spaces and parentheses in file names are allowed. 3. **Fill in the file with swear words.** Following rules must apply: + **One** swear word per line with no trailing whitespace. + All words must be **lowercase**. @@ -91,9 +69,7 @@ If you are not comfortable with `git` and pull requests, you can just follow ste + The list must be **sorted** alphabetically. 4. **Make sure all the tests pass.**\ You can do that using a development function called `build_sweary()`. It becomes available when you `git clone` the repository and call `devtools::load_all()`. Or pressing `Ctrl+Shift+L` in RStudio. Learn more about calling this function using `?build_sweary`. -5. **Update README.Rmd**.\ - Update the `langs` data frame in README.Rmd by adding a new row to it. More precise instructions are in the raw file itself. -6. **Create a pull request.** +5. **Create a pull request.** ## Origin diff --git a/README.md b/README.md index 517f44c..6f07567 100644 --- a/README.md +++ b/README.md @@ -86,10 +86,13 @@ approve of the changes. [language code table](http://www.lingoes.net/en/translator/langcode.htm). 2. **Create a language file.** - Place the file in `data-raw/swear-word-lists/{LANG_CODE}`. + Place the file in + `data-raw/swear-word-lists/{LANG_CODE}_{LANG_NAME}`. Examples: - - English: `data-raw/swear-word-lists/en` - - Canadian French: `data-raw/swear-word-lists/fr-CA` + - English: `data-raw/swear-word-lists/en_English` + - Canadian French: `data-raw/swear-word-lists/fr-CA_French + (Canada)` + Note that spaces and parentheses in file names are allowed. 3. **Fill in the file with swear words.** Following rules must apply: - **One** swear word per line with no trailing whitespace. - All words must be **lowercase**. @@ -101,10 +104,7 @@ approve of the changes. repository and call `devtools::load_all()`. Or pressing `Ctrl+Shift+L` in RStudio. Learn more about calling this function using `?build_sweary`. -5. **Update README.Rmd**. - Update the `langs` data frame in README.Rmd by adding a new row to - it. More precise instructions are in the raw file itself. -6. **Create a pull request.** +5. **Create a pull request.** ## Origin diff --git a/data-raw/swear-word-lists/cs b/data-raw/swear-word-lists/cs_Czech similarity index 100% rename from data-raw/swear-word-lists/cs rename to data-raw/swear-word-lists/cs_Czech diff --git a/data-raw/swear-word-lists/de b/data-raw/swear-word-lists/de_German similarity index 100% rename from data-raw/swear-word-lists/de rename to data-raw/swear-word-lists/de_German diff --git a/data-raw/swear-word-lists/en b/data-raw/swear-word-lists/en_English similarity index 100% rename from data-raw/swear-word-lists/en rename to data-raw/swear-word-lists/en_English diff --git a/data-raw/swear-word-lists/fr-CA b/data-raw/swear-word-lists/fr-CA_French (Canada) similarity index 100% rename from data-raw/swear-word-lists/fr-CA rename to data-raw/swear-word-lists/fr-CA_French (Canada) diff --git a/data-raw/swear-word-lists/gr b/data-raw/swear-word-lists/gr_Greek similarity index 100% rename from data-raw/swear-word-lists/gr rename to data-raw/swear-word-lists/gr_Greek diff --git a/data-raw/swear-word-lists/mk b/data-raw/swear-word-lists/mk_Macedonian similarity index 100% rename from data-raw/swear-word-lists/mk rename to data-raw/swear-word-lists/mk_Macedonian diff --git a/data-raw/swear-word-lists/pl b/data-raw/swear-word-lists/pl_Polish similarity index 100% rename from data-raw/swear-word-lists/pl rename to data-raw/swear-word-lists/pl_Polish diff --git a/data-raw/swear-word-lists/ro b/data-raw/swear-word-lists/ro_Romanian similarity index 100% rename from data-raw/swear-word-lists/ro rename to data-raw/swear-word-lists/ro_Romanian diff --git a/data-raw/swear-word-lists/sk b/data-raw/swear-word-lists/sk_Slovak similarity index 100% rename from data-raw/swear-word-lists/sk rename to data-raw/swear-word-lists/sk_Slovak diff --git a/data-raw/swear-words.R b/data-raw/swear-words.R index 1c26cec..95d22e1 100644 --- a/data-raw/swear-words.R +++ b/data-raw/swear-words.R @@ -3,18 +3,7 @@ library(readr) library(stringr) library(dplyr) -load_lang <- function(lang_file) { - suppressMessages( - words <- readr::read_csv(lang_file, col_names = c("word")) - ) - lang <- stringr::str_extract(lang_file, "[\\w-]+$") - - words$language <- lang - - return(words) -} - lang_files <- list.files("data-raw/swear-word-lists/", full.names = TRUE) -swear_words <- purrr::map_df(lang_files, load_lang) +swear_words <- purrr::map_df(lang_files, load_lang_from_file) usethis::use_data(swear_words, overwrite = TRUE) diff --git a/man/file_lang_code.Rd b/man/file_lang_code.Rd new file mode 100644 index 0000000..eb5ea02 --- /dev/null +++ b/man/file_lang_code.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_tools.R +\name{file_lang_code} +\alias{file_lang_code} +\title{Returns language code from file name} +\usage{ +file_lang_code(lang_file) +} +\arguments{ +\item{lang_file}{Language file name, either absolute +or relative.} +} +\value{ +Language code. +} +\description{ +Returns language code from file name +} diff --git a/man/file_lang_name.Rd b/man/file_lang_name.Rd new file mode 100644 index 0000000..9af7ebe --- /dev/null +++ b/man/file_lang_name.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_tools.R +\name{file_lang_name} +\alias{file_lang_name} +\title{Returns language name from file name} +\usage{ +file_lang_name(lang_file) +} +\arguments{ +\item{lang_file}{Language file name, either absolute +or relative.} +} +\value{ +Language name. +} +\description{ +Returns language name from file name +} diff --git a/man/load_lang_from_file.Rd b/man/load_lang_from_file.Rd new file mode 100644 index 0000000..b7ab35b --- /dev/null +++ b/man/load_lang_from_file.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_tools.R +\name{load_lang_from_file} +\alias{load_lang_from_file} +\title{Loads a single language data frame from file} +\usage{ +load_lang_from_file(lang_file) +} +\arguments{ +\item{lang_file}{Language file name with full path.} +} +\value{ +Data frame of swear words in one language. +} +\description{ +Loads a single language data frame from file +} diff --git a/man/load_langs.Rd b/man/load_langs.Rd new file mode 100644 index 0000000..3fe5aed --- /dev/null +++ b/man/load_langs.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_tools.R +\name{load_langs} +\alias{load_langs} +\title{Create a summary df with languages and their counts} +\usage{ +load_langs() +} +\value{ +Data frame with language codes, language names, + word counts and a formatted markdown table row. +} +\description{ +Create a summary df with languages and their counts +} diff --git a/man/split_lang_file.Rd b/man/split_lang_file.Rd new file mode 100644 index 0000000..c23f549 --- /dev/null +++ b/man/split_lang_file.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_tools.R +\name{split_lang_file} +\alias{split_lang_file} +\title{Splits lang file name in language code and name} +\usage{ +split_lang_file(lang_file) +} +\arguments{ +\item{lang_file}{Language file name, either absolute +or relative.} +} +\value{ +Character vector of length 2. First + element is language code, second element + is language name. +} +\description{ +Splits lang file name in language code and name +}