Add IUPAC standard atomic weights

Removed m(), now aw() exists Typos
abuseki · Dec 25, 2023 · 8965e9e · 8965e9e
1 parent 9115cb0
commit 8965e9e
Show file tree

Hide file tree

Showing 23 changed files with 358 additions and 101 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,3 +4,4 @@
 ^data-raw$
 ^README\.Rmd$
 ^\.github$
+^checks$
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@
 *.bak
 *.tmp
 *.bib.sav
+checks
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,18 +1,19 @@
 Package: georefdatar
-Title: Geosciences reference data sets in R
-Version: 0.4.3.9000
+Title: Geosciences Reference Data Sets in R
+Version: 0.6.1.9000
 Authors@R: 
     person("Gerald", "Schuberth-Hlavač", email= "abuseki@synapticgap.com",
       role = c("aut", "cre"))
-Description: The package provides some reference data sets that are regularly
-  used in geosciences. These are a periodic table of the elements, a list of
-  minerals and some reservoir reference data set (continental crust, mantle,
-  basalts, ...), some decay constants and isotopic ratios used frequently in
-  geochronology. Some functions for basic queries of the periodic table of the
-  elements and the list of minerals are also included. All data sets have full
+Description: The package includes reference data sets commonly used in 
+  geosciences, such as the standard atomic weights of elements, a periodic 
+  table, a mineral list, reservoir reference datasets (continental crust, 
+  mantle, basalts, etc.), decay constants, and isotopic ratios frequently used 
+  in geochronology. Additionally, the package provides functions for basic 
+  queries of atomic weights and mineral lists. All datasets have complete 
   references, making them citable.
 License: MIT + file LICENSE
 URL: https://github.com/abuseki/georefdatar
+BugReports: https://github.com/abuseki/georefdatar/issues
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
@@ -23,6 +24,9 @@ Imports: Rdpack
 RdMacros: Rdpack
 Suggests: 
     spelling,
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    readxl,
+    dplyr,
+    tidyr
 Config/testthat/edition: 3
 Language: en-US
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+export(aw)
 export(icsColor)
-export(m)
 export(minSearch)
 export(minsForChemistry)
 importFrom(Rdpack,reprompt)
diff --git a/R/IUPAC__Standard_atomic_weights_of_the_elements.R b/R/IUPAC__Standard_atomic_weights_of_the_elements.R
@@ -0,0 +1,43 @@
+#' IUPAC Standard atomic weights of the elements
+#'
+#' A data set containing the standard atomic weights of the elements as
+#' recommended by the [International Union of Pure and Applied Chemistry
+#' (IUPAC)](https://www.iupac.org) and [Commission on Isotopic Abundances and
+#' Atomic Weights (CIAAW)](https://www.ciaaw.org).
+#'
+#' This is table 1 of \insertCite{Prohaska2022}{georefdatar}. The (foot)notes in
+#' the table have been collected in a new column (Notes) and their abbreviations
+#' resolved into sentences.
+#'
+#'
+#' @format A data frame with `r nrow(IUPAC_StdAW)` rows and the following
+#' `r ncol(IUPAC_StdAW)` columns:\cr
+#' \describe{
+#' \item{Element}{Element's name}
+#' \item{Symbol}{Element's symbol}
+#' \item{Atomic number}{Element's atomic number. Elements are listed in
+#' increasing atomic number}
+#' \item{stdAW::Value}{Values of standard atomic weights are given as single
+#' values with uncertainties (column stdAW::Uncertainty) or as intervals.}
+#' \item{stdAW::Uncertainty}{of the Value of the standard atomic weight}
+#' \item{abrStdAW::Value}{Abridged atomic weights quoted to five significant
+#' figures. Unless such precision cannot be attained due to the variability of
+#' isotopic composition in normal materials or due to the limitations of the
+#' measurement capability.}
+#' \item{abrStdAW::±}{A +/− value as a simplified measure of the reliability of
+#' the abridged values.}
+#' \item{Note}{The collected footnotes of the table. Notes are resolved to the
+#' sentences associated with them. If there is more than one note, the notes are
+#'  separated by an newline (`\n`).}
+#' }
+#'
+#' @seealso [aw()] for a function to get the standard atomic weights of the elements found in this table by their symbols
+#' @seealso [IUPAC periodic table of elements](https://iupac.org/what-we-do/periodic-table-of-elements/) online
+#' @seealso [CIAAW](https://www.ciaaw.org/) also a periodic table of elements online
+#' @seealso [CIAAW standard atomic weights](https://www.ciaaw.org/atomic-weights.htm) online
+#'
+#' @references \insertCite{Prohaska2022}{georefdatar}
+#'
+#'
+#' @importFrom Rdpack reprompt
+"IUPAC_StdAW"
diff --git a/R/PeriodicTable.R b/R/PeriodicTable.R
@@ -6,6 +6,8 @@
 #' For each element the following attributes are reported:
 #' `r paste(names(pte), collapse= ', ')`
 #'
+#' @seealso [IUPAC_StdAW] for the standard atomic weights of the elements
+#' recommended by [IUPAC](https://www.iupac.org)
 #'
 #' @references{
 #'   \insertRef{pubChemPTable}{georefdatar}
@@ -16,23 +18,3 @@
 #' @importFrom Rdpack reprompt
 #'
 "pte"
-
-
-
-#' Get the atomic mass of an element
-#'
-#' @param sym symbol of the element as a string
-#'
-#' @return Atomic mass of element with the given symbol
-#'
-#' @seealso [periodic table of elements][pte]
-#'
-#' @export
-#'
-#' @examples
-#' m('H')
-#' m('H')*2+m('O')
-#'
-m <- function(sym) {
-  georefdatar::pte[georefdatar::pte$Symbol==sym,]$AtomicMass
-}
diff --git a/R/atomicWeight.R b/R/atomicWeight.R
@@ -0,0 +1,29 @@
+#' Get the atomic weight of an element
+#'
+#' @param sym symbol of the element as a string
+#' @param dataSource the data source for the atomic weight, either `IUPAC`
+#'   (default) or `PubChem`. This is case insensitive ("IUPAC" is the same as
+#'   e.g. "IuPaC")
+#'
+#' @return Atomic weight of element with the given symbol
+#'
+#' @seealso [IUPAC_StdAW] for the table of standard atomic weights
+#'   by IUPAC and [pte] for a full periodic table of elements
+#'
+#' @export
+#'
+#' @examples
+#' aw('H')
+#' aw('H')*2+aw('O')
+#'
+#' aw('Li', dataSource= "pubchem")
+#'
+aw <- function(sym, dataSource="IUPAC") {
+  dataSource <-toupper(dataSource)
+  stopifnot("Unknown data source!"= dataSource %in% c("IUPAC", "PUBCHEM"))
+
+  if (dataSource == "PUBCHEM")
+    return(georefdatar::pte[georefdatar::pte$Symbol==sym, ]$AtomicMass)
+
+  georefdatar::IUPAC_StdAW[georefdatar::IUPAC_StdAW$Symbol== sym, ]$`abrStdAW::Value`
+}
diff --git a/R/georefdatar-package.R b/R/georefdatar-package.R
@@ -1,17 +1,14 @@
 #' Geosciences reference data sets in R
 #'
-#' @description
-#' The package provides some reference data sets that are regularly used in
-#' geosciences. These are a periodic table of the elements, a list of minerals
-#' and some reservoir reference data set (continental crust, mantle, basalts,
-#' ...). Some functions for basic queries of the periodic table of the elements
-#' and the list of minerals are also included.\cr
-#' All records have full references, making them citable.
+#' @description The package includes reference data sets commonly used in
+#' geosciences, such as the standard atomic weights of elements, a periodic
+#' table, a mineral list, reservoir reference datasets (continental crust,
+#' mantle, basalts, etc.), decay constants, and isotopic ratios frequently used
+#' in geochronology. Additionally, the package provides functions for basic
+#' queries of atomic weights and mineral lists.\cr
+#' All datasets have complete references, making them citable.
 #'
-#' @references {
-#'   \insertNoCite{*}{georefdatar}
-#'   \insertAllCited{}
-#' }
+#' @references { \insertNoCite{*}{georefdatar} \insertAllCited{} }
 #' @importFrom Rdpack reprompt
 #'
 #' @author  Gerald Schuberth-Hlavač

diff --git a/README.Rmd b/README.Rmd
@@ -17,6 +17,7 @@ geoscience for use with _R_.
 Up to now these are:
 
 * periodic table of the elements
+* standard atomic weights of the elements, as recommended by [IUPAC](https://iupac.org)
 * table of minerals -- with name, symbol (abbreviation) and chemistry
 * well known and widely used geochemical data of reservoirs like the primitive
   mantle, the continental crust, basalts, ...
@@ -74,10 +75,10 @@ devtools::install_github("abuseki/georefdatar")
 
 * Query atomic mass from the periodic table of the elements:
   ```{r}
-  m('H')
-  m('H')*2 + m('O')
+  aw('H')
+  aw('H')*2 + aw('O')
 
   # Si wt% in muscovite
-  round(3*m('Si') / (m('K')+3*m('Al')+3*m('Si')+12*m('O')+2*m('H')) * 100, 2)
+  round(3*aw('Si') / (aw('K')+3*aw('Al')+3*aw('Si')+12*aw('O')+2*aw('H')) * 100, 2)
   ```
 
diff --git a/README.md b/README.md
@@ -12,6 +12,8 @@ for geoscience for use with *R*.
 Up to now these are:
 
 - periodic table of the elements
+- standard atomic weights of the elements, as recommended by
+  [IUPAC](https://iupac.org)
 - table of minerals – with name, symbol (abbreviation) and chemistry
 - well known and widely used geochemical data of reservoirs like the
   primitive mantle, the continental crust, basalts, …
@@ -125,20 +127,20 @@ devtools::install_github("abuseki/georefdatar")
 - Query atomic mass from the periodic table of the elements:
 
   ``` r
-  m('H')
+  aw('H')
   ```
 
       ## [1] 1.008
 
   ``` r
-  m('H')*2 + m('O')
+  aw('H')*2 + aw('O')
   ```
 
       ## [1] 18.015
 
   ``` r
   # Si wt% in muscovite
-  round(3*m('Si') / (m('K')+3*m('Al')+3*m('Si')+12*m('O')+2*m('H')) * 100, 2)
+  round(3*aw('Si') / (aw('K')+3*aw('Al')+3*aw('Si')+12*aw('O')+2*aw('H')) * 100, 2)
   ```
 
       ## [1] 21.15
diff --git a/data-raw/IUPAC__2021__Standard_atomic_weights_of_the_elements.xlsx b/data-raw/IUPAC__2021__Standard_atomic_weights_of_the_elements.xlsx
diff --git a/data-raw/IUPAC__Standard_atomic_weights_of_the_elements.R b/data-raw/IUPAC__Standard_atomic_weights_of_the_elements.R
@@ -0,0 +1,110 @@
+## code to prepare `DATASET` dataset goes here
+
+t <-  readxl::read_excel('data-raw/IUPAC__2021__Standard_atomic_weights_of_the_elements.xlsx', sheet = 'Table 1')
+
+# Rename columns
+## There are two rows, that will be squeezed into one
+## Standard Atomic Weight => prefix: "stdAW::"
+## Abridged Standard Atomic Weight => prefix "abrStdAW::"
+t[1,4] <- "stdAW::Value"
+t[1,5] <- 'stdAW::Uncertainty'
+t[1,7] <- "abrStdAW::Value"
+t[1,8] <- "abrStdAW::±"
+
+# remove line two, after column renaming not needed any more
+t <- t[-2,]
+
+# Read ourselves and skip first row. So the upper changes in the column values
+# will be the column names
+t <- readxl::read_excel(writexl::write_xlsx(t), skip = 1)
+
+
+
+# convert to numeric
+
+## Standard atomic weight
+## there is non-breaking space in here
+## tools::showNonASCII(t$`stdAW::Uncertainty`)
+## U+00A0	 	c2 a0	NO-BREAK SPACE
+
+### stdAW::Value: We can't convert this vector fully since there are ranges
+### but we can clean it from the non-breaking spaces
+t$`stdAW::Value` <- gsub("(\\s|\u00A0)", "", t$`stdAW::Value`)
+
+### stdAW::Uncertainty: This vector can be cleaned fully
+t$`stdAW::Uncertainty` <- as.numeric(gsub("(\\s|\u00A0)", "", t$`stdAW::Uncertainty`))
+
+## abridged standard atomic weight
+## Convert this fully to numeric values
+t$`abrStdAW::Value` <- as.numeric(t$`abrStdAW::Value`)
+t$`abrStdAW::±` <- as.numeric(t$`abrStdAW::±`)
+
+## check for non numeric
+stopifnot(hularr::which.nonnumeric(t$`abrStdAW::Value`) == 0)
+stopifnot(hularr::which.nonnumeric(t$`abrStdAW::±`) == 0 )
+stopifnot(hularr::which.nonnumeric(t$`stdAW::Uncertainty`) == 0)
+
+# notes column
+# Will be the summary of the (foot) notes
+t$Note <- NA
+
+
+# Elements having an asterisk in the end, those are the elements with footnote
+# a) in table 1. They are marked with an asterisk in the excel file to not let
+# the element name end with an 'a'.
+# This should be 38 elements
+fna <- which(grepl("\\*$", t$Element))
+stopifnot(length(fna) == 38)
+# Remove the * and leave a note
+t$Element[fna] <- gsub("\\*$", "", t$Element[fna])
+t$Note[fna] <- 'a'
+
+# Capitalize elements
+# t$Element <- unlist(lapply(t$Element, function(x) {
+#   substr(x,1,1) <- toupper(substr(x, 1, 1))
+#   x
+# }))
+
+
+
+# check elements
+## Al and Cs will show up, since there is a difference in the writing
+t[which(!(toupper(t$Element) %in% toupper(pte$Name))), "Element"]
+
+## Check symbols
+t[-which(t$Symbol %in% pte$Symbol), "Symbol"]
+
+
+# Combine Foot-note and  Note
+combineLists <- function(l1, l2) {
+  unlist(
+    lapply(seq_along(l1), function(i) {
+      if (is.na(l1[i]) & is.na(l2[i]))
+        return(NA)
+
+      if (is.na(l1[i]) & !is.na(l2[i]))
+        return(l2[i])
+
+      if (!is.na(l1[i]) & is.na(l2[i]))
+        return(l1[i])
+
+      return(paste(l1[i], l2[i]))
+    })
+  )
+}
+xx <- combineLists(t$Note, t$`Foot-note`)
+
+# resolve footnotes
+## import foot note texts
+fnotes <- readxl::read_excel('data-raw/IUPAC__2021__Standard_atomic_weights_of_the_elements.xlsx', sheet = 'Tab1_footnotes')
+# Turn this into a names list
+fnotes <- with(fnotes, setNames(Text, Footnote))
+# replace notes by their texts
+t$Note <- unlist(lapply(strsplit(xx, "\\s"), function(y) paste(fnotes[y], collapse = "\n")))
+t$Note <- gsub("NA", NA, t$Note)
+# remove foot-note, now resolved in Notes
+t$`Foot-note` <- NULL
+
+# we finished
+IUPAC_StdAW <- as.data.frame(t)
+usethis::use_data(IUPAC_StdAW, overwrite = TRUE)
diff --git a/data-raw/RRUFF_IMA_MinData.R b/data-raw/RRUFF_IMA_MinData.R
@@ -5,8 +5,6 @@
 # Retrieved February 28, 2022 from
 #   https://rruff.info/ima/
 
-library(dplyr)
-
 mins <- readr::read_csv('data-raw/RRUFF_Export_20220228_102415.csv', show_col_types = FALSE)
 mins <- dplyr::select(mins, `Mineral Name (plain)`, `IMA Chemistry (plain)`, `IMA Mineral Symbol`)
 mins <- dplyr::rename(mins,

diff --git a/data-raw/Sun_McDonough__1989.R b/data-raw/Sun_McDonough__1989.R
@@ -12,11 +12,11 @@
 SM <- read.csv('data-raw/Sun_McDonough__1989.csv', comment.char = '#')
 
 # separate reservoirs and make them wider
-C1__Sun_McDounough__1989 = tidyr::pivot_wider(data = SM[,c('Element', 'C1.chondrite')], names_from = Element, values_from = 'C1.chondrite')
-PM__Sun_McDounough__1989 = tidyr::pivot_wider(data = SM[,c('Element', 'Primitive.mantle')], names_from = Element, values_from = 'Primitive.mantle')
-NMORB__Sun_McDounough__1989 = tidyr::pivot_wider(data = SM[,c('Element', 'N.type.MORB')], names_from = Element, values_from = 'N.type.MORB')
-EMORB__Sun_McDounough__1989 = tidyr::pivot_wider(data = SM[,c('Element', 'E.type.MORB')], names_from = Element, values_from = 'E.type.MORB')
-OIB__Sun_McDounough__1989 = tidyr::pivot_wider(data = SM[,c('Element', 'OIB')], names_from = Element, values_from = 'OIB')
+C1__Sun_McDounough__1989 <- tidyr::pivot_wider(data = SM[,c('Element', 'C1.chondrite')], names_from = Element, values_from = 'C1.chondrite')
+PM__Sun_McDounough__1989 <- tidyr::pivot_wider(data = SM[,c('Element', 'Primitive.mantle')], names_from = Element, values_from = 'Primitive.mantle')
+NMORB__Sun_McDounough__1989 <- tidyr::pivot_wider(data = SM[,c('Element', 'N.type.MORB')], names_from = Element, values_from = 'N.type.MORB')
+EMORB__Sun_McDounough__1989 <- tidyr::pivot_wider(data = SM[,c('Element', 'E.type.MORB')], names_from = Element, values_from = 'E.type.MORB')
+OIB__Sun_McDounough__1989 <- tidyr::pivot_wider(data = SM[,c('Element', 'OIB')], names_from = Element, values_from = 'OIB')
 
 # save data
 ## We do not use the chondrite, we use the CI by McDonough_Sun__1995
@@ -25,5 +25,3 @@ usethis::use_data(PM__Sun_McDounough__1989, overwrite = TRUE)
 usethis::use_data(NMORB__Sun_McDounough__1989, overwrite = TRUE)
 usethis::use_data(EMORB__Sun_McDounough__1989, overwrite = TRUE)
 usethis::use_data(OIB__Sun_McDounough__1989, overwrite = TRUE)
-
-
diff --git a/data/IUPAC_StdAW.rda b/data/IUPAC_StdAW.rda
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,4 @@ @@
     *.bak
     *.tmp
     *.bib.sav
+    checks