rename package to SME (#1)

lcrawlab · Jan 6, 2025 · 4ccab82 · 4ccab82
1 parent 22a081e
commit 4ccab82
Show file tree

Hide file tree

Showing 59 changed files with 423 additions and 423 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,4 +1,4 @@
-^mmer\.Rproj$
+^sme\.Rproj$
 ^\.Rproj\.user$
 ^LICENSE\.md$
 ^Makefile$

diff --git a/.gitignore b/.gitignore
@@ -48,7 +48,7 @@ po/*~
 # RStudio Connect folder
 rsconnect/
 .Rproj.user
-mmer.Rproj
+sme.Rproj
 
 
 .idea/

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,14 @@
-Package: mmer
-Title: The Multimodal Marginal Epistasis Test
+Package: sme
+Title: The Sparse Marginal Epistasis Test
 Version: 0.0.1
-URL: https://github.com/lcrawlab/mmer, https://lcrawlab.github.io/mmer/
-BugReports: https://github.com/lcrawlab/mmer/issues
+URL: https://github.com/lcrawlab/sme, https://lcrawlab.github.io/sme/
+BugReports: https://github.com/lcrawlab/sme/issues
 Authors@R: c(
     person("Julian", "Stamp", email = "julian.d.stamp@gmail.com",
     role = c("cre", "aut"), comment = c(ORCID = "0000-0003-3014-6249")),
     person("Lorin", "Crawford", email = "lcrawford@microsoft.com",
     role = "aut", comment = c(ORCID = "0000-0003-0178-8242")))
-Description: A computationally efficient genetics method which detects 
+Description: A computationally efficient genetics method which detects
     statistical epistasis in complex traits.
 License: MIT + file LICENSE
 Encoding: UTF-8
@@ -20,15 +20,15 @@ LinkingTo:
     RcppEigen,
     Rhdf5lib,
     testthat
-Imports: 
+Imports:
     dplyr,
     genio,
     logging,
     mvMAPIT,
     Rcpp,
     RcppEigen,
     tidyr
-Suggests: 
+Suggests:
     GenomicRanges,
     ggplot2,
     knitr,
@@ -38,10 +38,10 @@ Suggests:
 Config/testthat/edition: 3
 SystemRequirements: GNU make
 VignetteBuilder: knitr
-Depends: 
+Depends:
     R (>= 4.4.0)
 LazyData: true
-biocViews:     
+biocViews:
     GenomeWideAssociation,
     Epistasis,
     Genetics,

diff --git a/LICENSE.md b/LICENSE.md
@@ -1,6 +1,6 @@
 # MIT License
 
-Copyright (c) 2024 mmer authors
+Copyright (c) 2024 sme authors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,9 +2,9 @@
 
 export(approximate_memory_requirements)
 export(create_hdf5_file)
-export(mme)
 export(read_hdf5_dataset)
 export(simulate_traits)
+export(sme)
 export(write_hdf5_dataset)
 import(Rcpp)
 import(RcppEigen)
@@ -15,4 +15,4 @@ importFrom(stats,pnorm)
 importFrom(tidyr,pivot_longer)
 importFrom(utils,read.delim)
 importFrom(utils,write.table)
-useDynLib(mmer)
+useDynLib(sme)
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,4 @@
-# mmer 0.0.1
+# sme 0.0.1
 
-* Version that was used in the publication of MME.
+* Version that was used in the publication of SME.
 
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -2,38 +2,38 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 check_openmp <- function() {
-    .Call('_mmer_check_openmp', PACKAGE = 'mmer')
+    .Call('_sme_check_openmp', PACKAGE = 'sme')
 }
 
 count_samples <- function(filename) {
-    .Call('_mmer_count_samples', PACKAGE = 'mmer', filename)
+    .Call('_sme_count_samples', PACKAGE = 'sme', filename)
 }
 
 count_fam <- function(filename) {
-    .Call('_mmer_count_fam', PACKAGE = 'mmer', filename)
+    .Call('_sme_count_fam', PACKAGE = 'sme', filename)
 }
 
 count_snps_bim <- function(filename) {
-    .Call('_mmer_count_snps_bim', PACKAGE = 'mmer', filename)
+    .Call('_sme_count_snps_bim', PACKAGE = 'sme', filename)
 }
 
 createH5File <- function(filename) {
-    invisible(.Call('_mmer_createH5File', PACKAGE = 'mmer', filename))
+    invisible(.Call('_sme_createH5File', PACKAGE = 'sme', filename))
 }
 
 readH5File <- function(filename, datasetName) {
-    .Call('_mmer_readH5File', PACKAGE = 'mmer', filename, datasetName)
+    .Call('_sme_readH5File', PACKAGE = 'sme', filename, datasetName)
 }
 
 replaceH5Dataset <- function(filename, datasetName, newData) {
-    invisible(.Call('_mmer_replaceH5Dataset', PACKAGE = 'mmer', filename, datasetName, newData))
+    invisible(.Call('_sme_replaceH5Dataset', PACKAGE = 'sme', filename, datasetName, newData))
 }
 
-mme_cpp <- function(plink_file, pheno_file, genotype_mask_file, n_randvecs, n_blocks, rand_seed, gxg_indices, n_threads, gxg_h5_dataset, ld_h5_dataset) {
-    .Call('_mmer_mme_cpp', PACKAGE = 'mmer', plink_file, pheno_file, genotype_mask_file, n_randvecs, n_blocks, rand_seed, gxg_indices, n_threads, gxg_h5_dataset, ld_h5_dataset)
+simulate_traits_cpp <- function(plink_file, additive_heritability, gxg_heritability, additive_snps, gxg_group_1, gxg_group_2) {
+    .Call('_sme_simulate_traits_cpp', PACKAGE = 'sme', plink_file, additive_heritability, gxg_heritability, additive_snps, gxg_group_1, gxg_group_2)
 }
 
-simulate_traits_cpp <- function(plink_file, additive_heritability, gxg_heritability, additive_snps, gxg_group_1, gxg_group_2) {
-    .Call('_mmer_simulate_traits_cpp', PACKAGE = 'mmer', plink_file, additive_heritability, gxg_heritability, additive_snps, gxg_group_1, gxg_group_2)
+sme_cpp <- function(plink_file, pheno_file, genotype_mask_file, n_randvecs, n_blocks, rand_seed, gxg_indices, n_threads, gxg_h5_dataset, ld_h5_dataset) {
+    .Call('_sme_sme_cpp', PACKAGE = 'sme', plink_file, pheno_file, genotype_mask_file, n_randvecs, n_blocks, rand_seed, gxg_indices, n_threads, gxg_h5_dataset, ld_h5_dataset)
 }
 
diff --git a/R/approximate_memory_requirements.R b/R/approximate_memory_requirements.R
@@ -1,31 +1,31 @@
-#' Estimate Memory Requirements for MME Routine
+#' Estimate Memory Requirements for SME Routine
 #'
-#' This function provides an approximate estimate of the memory requirements 
-#' (in gigabytes) for running the Multimodal Marginal Epistasis (MME) routine 
+#' This function provides an approximate estimate of the memory requirements
+#' (in gigabytes) for running the Sparse Marginal Epistasis (SME) routine
 #' based on input parameters such as the number of samples, SNPs, and other configurations.
 #'
 #' @param n_samples Integer. The number of samples in the dataset.
 #' @param n_snps Integer. The total number of SNPs in the dataset.
-#' @param n_blocks Integer. The number of genotype blocks used to partition SNPs. 
+#' @param n_blocks Integer. The number of genotype blocks used to partition SNPs.
 #'   Affects the size of encoded genotype segments.
-#' @param n_randvecs Integer. The number of random vectors used for stochastic 
+#' @param n_randvecs Integer. The number of random vectors used for stochastic
 #'   trace estimation. Affects memory for operations involving random vectors.
 #' @param chunksize Integer. The number of focal SNPs processed per chunk.
 #'
 #' @return Numeric. The approximate memory requirement (in gigabytes) for the
-#' MME routine.
+#' SME routine.
 #'
 #' @details
-#' The function calculates memory usage by summing the contributions from 
-#' various components used in the MME routine, including:
+#' The function calculates memory usage by summing the contributions from
+#' various components used in the SME routine, including:
 #' - Variance component estimates (`vc_estimates`)
 #' - Phenotype-related matrices
 #' - Random vector-based computations
 #' - Genotype objects and block statistics
 #' - Gene-by-gene interaction masks
 #'
-#' The estimated memory requirement is derived from the data dimensions 
-#' and operational needs, and it provides a guideline for configuring resources 
+#' The estimated memory requirement is derived from the data dimensions
+#' and operational needs, and it provides a guideline for configuring resources
 #' for the analysis.
 #'
 #' @examples
@@ -34,10 +34,10 @@
 #' n_blocks <- 100
 #' n_randvecs <- 100
 #' chunksize <- 10
-#' approximate_memory_requirements(n_samples, 
-#'                                 n_snps, 
-#'                                 n_blocks, 
-#'                                 n_randvecs, 
+#' approximate_memory_requirements(n_samples,
+#'                                 n_snps,
+#'                                 n_blocks,
+#'                                 n_randvecs,
 #'                                 chunksize)
 #'
 #' @export
@@ -60,7 +60,7 @@ approximate_memory_requirements <- function(n_samples,
   # collect_XXy - Matrix: (n_samples, 1)
   # collect_Gy - Matrix: (n_samples, n_gxg_idx)
   # focal_snps_matrix - Matrix: (n_samples, n_gxg_idx)
-  # collect_XXUy - Matrix: (n_samples, 
+  # collect_XXUy - Matrix: (n_samples,
   #       (n_variance_components + 1) * (n_variance_components + 1) * n_gxg_idx)
   phenotype_like <- n_samples * (6 + 2 * chunksize + 9 * chunksize)
 
@@ -97,11 +97,11 @@ approximate_memory_requirements <- function(n_samples,
   #
   # these are so small they can be neglected
 
-  total <-  vc_estimates + 
-            phenotype_like + 
-            randomvec_like + 
-            gt_objects + 
-            block_stats + 
+  total <-  vc_estimates +
+            phenotype_like +
+            randomvec_like +
+            gt_objects +
+            block_stats +
             mask
   return(total * 8 / 1024 / 1024 / 1024)
 }
diff --git a/R/catch-routine-registration.R b/R/catch-routine-registration.R
@@ -2,5 +2,5 @@
 # 'tools::package_native_routine_registration_skeleton()' generates the required
 # registration info for the 'run_testthat_tests' symbol.
 (function() {
-  .Call("run_testthat_tests", FALSE, PACKAGE = "mmer")
+  .Call("run_testthat_tests", FALSE, PACKAGE = "sme")
 })
diff --git a/R/getting_started.R b/R/getting_started.R
@@ -1,35 +1,35 @@
 #' @title Simulated Dataset for Genome-Wide Interaction Analysis
 #' @description
-#' `getting_started` is a simulated dataset created to demonstrate the use of 
-#' the `mme()` function for genome-wide interaction analyses. It contains 
-#' results from a simulated analysis involving additive genetic effects and 
+#' `getting_started` is a simulated dataset created to demonstrate the use of
+#' the `sme()` function for genome-wide interaction analyses. It contains
+#' results from a simulated analysis involving additive genetic effects and
 #' gene-by-gene (GxG) interactions.
 #'
 #' @details
 #' The dataset was generated as follows:
-#' 
-#' - **Genotype Simulation**:  
+#'
+#' - **Genotype Simulation**:
 #'   Genotype data for 5000 individuals and 6,000 SNPs was simulated with
 #'   synthetic allele counts.
-#' 
-#' - **Phenotype Simulation**:  
+#'
+#' - **Phenotype Simulation**:
 #'   Phenotypic values were simulated with an additive heritability of 0.3 and a
 #'   GxG interaction heritability of 0.25. A set of 100 SNPs were selected for
 #'   additive effects, and two groups of 5 SNPs each were used for GxG
 #'   interactions.
-#' 
-#' - **PLINK-Compatible Files**:  
+#'
+#' - **PLINK-Compatible Files**:
 #'   The simulated data was saved in PLINK-compatible `.bed`, `.fam`,
 #'   and `.bim` files.
-#' 
-#' - **Interaction Analysis**:  
-#'   The `mme()` function was used to perform genome-wide interaction analyses
+#'
+#' - **Interaction Analysis**:
+#'   The `sme()` function was used to perform genome-wide interaction analyses
 #'   on a subset of SNP indices, including the GxG SNP groups and 100 additional
-#'   additive SNPs. Memory-efficient computation parameters 
+#'   additive SNPs. Memory-efficient computation parameters
 #'   (e.g., `chun_ksize`, `n_randvecs`, and `n_blocks`) were applied.
 #'
 #' @format
-#' A list with results from `mme()`, including the following components:
+#' A list with results from `sme()`, including the following components:
 #' \describe{
 #'   \item{`summary`}{A data frame summarizing the analysis results, including
 #'   p-values for SNP associations (`p`).}
@@ -42,24 +42,24 @@
 #' }
 #'
 #' @section Key Parameters:
-#' - **Additive Heritability**: 0.3  
-#' - **GxG Heritability**: 0.25  
-#' - **Number of Samples**: 5000  
-#' - **Number of SNPs**: 6,000  
-#' - **Selected Additive SNPs**: 100  
-#' - **Selected GxG SNP Groups**:  
-#'   - Group 1: 5 SNPs  
-#'   - Group 2: 5 SNPs  
+#' - **Additive Heritability**: 0.3
+#' - **GxG Heritability**: 0.25
+#' - **Number of Samples**: 5000
+#' - **Number of SNPs**: 6,000
+#' - **Selected Additive SNPs**: 100
+#' - **Selected GxG SNP Groups**:
+#'   - Group 1: 5 SNPs
+#'   - Group 2: 5 SNPs
 #'
-#' @usage 
+#' @usage
 #' data("getting_started")
 #'
 #' @examples
 #' data("getting_started")
 #' head(getting_started$summary)
 #'
 #' @seealso
-#' \link[mme]{mme}
+#' \link[sme]{sme}
 #'
 #' @keywords datasets
 #' @source data-raw/getting_started.R

diff --git a/R/simulate_traits.R b/R/simulate_traits.R
@@ -1,7 +1,7 @@
 #' Simulate Quantitative Traits from PLINK Genotypes
 #'
-#' This function simulates a quantitative trait based on additive and epistatic 
-#' genetic effects using genotype data from a PLINK dataset. The simulated trait 
+#' This function simulates a quantitative trait based on additive and epistatic
+#' genetic effects using genotype data from a PLINK dataset. The simulated trait
 #' is saved to a specified output file in a phenotype format compatible with
 #' PLINK.
 #'
@@ -34,12 +34,12 @@
 #'   and `gxg_indices_2`, contributing to the `gxg_heritability`.
 #' - Environmental effects: Any remaining variance not explained by genetic
 #'   effects is assigned to random environmental noise.
-#' 
-#' The output file is in PLINK-compatible phenotype format with three columns: 
+#'
+#' The output file is in PLINK-compatible phenotype format with three columns:
 #' Family ID (`FID`), Individual ID (`IID`), and the simulated trait (`TRAIT`).
 #'
 #' @examples
-#' plink_file <- gsub("\\.bed", "", system.file("testdata", "test.bed", package = "mmer"))
+#' plink_file <- gsub("\\.bed", "", system.file("testdata", "test.bed", package = "sme"))
 #' out_file <- tempfile()
 #' additive_heritability <- 0.3
 #' gxg_heritability <- 0.1
@@ -59,7 +59,7 @@
 #' from_file <- read.table(out_file, header = TRUE)
 #' head(from_file)
 #'
-#' @useDynLib mmer
+#' @useDynLib sme
 #' @import genio
 #' @import dplyr
 #' @importFrom utils write.table
@@ -75,7 +75,7 @@ simulate_traits <- function(plink_file,
 
 
   logging::basicConfig(level = log_level)
-  log <- logging::getLogger("mmer::simulate_traits")
+  log <- logging::getLogger("sme::simulate_traits")
 
   if (additive_heritability + gxg_heritability > 1) {
     stop("Additive heritability and gxg heritability should sum to less than 1")