diff --git a/NAMESPACE b/NAMESPACE index 54b846b..dc79256 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(parafac_core_als) export(parafac_fun) export(plotPARAFACmodel) export(processDataCube) +export(reinflateFac) export(reinflateTensor) export(sumsqr) export(transformPARAFACloadings) diff --git a/R/initializePARAFAC.R b/R/initializePARAFAC.R index fe2d070..5e6363a 100644 --- a/R/initializePARAFAC.R +++ b/R/initializePARAFAC.R @@ -33,6 +33,5 @@ initializePARAFAC = function(Tensor, nfac, initialization="random"){ init[[i]] = svd(df, nfac)$u } } - return(init) } diff --git a/R/parafac_core_als.R b/R/parafac_core_als.R index a648227..9309214 100644 --- a/R/parafac_core_als.R +++ b/R/parafac_core_als.R @@ -38,13 +38,13 @@ parafac_core_als = function(Tensor, nfac, init, maxit=500, ctol=1e-4){ for (m in 1:numModes) { V = rTensor::hadamard_list(lapply(Fac[-m], function(x) {t(x) %*% x})) - V_inv = pracma::pinv(V) #solve(V) throws errors when very near convergence + V_inv = pracma::pinv(V) #solve(V) throws errors when very near convergence, pseudoinverse is suggested by Kolda and Bader 2009 tmp = rTensor::k_unfold(Tensor, m)@data %*% rTensor::khatri_rao_list(Fac[-m], reverse = TRUE) %*% V_inv lambdas = apply(tmp, 2, function(x){norm(as.matrix(x))}) Fac[[m]] = sweep(tmp, 2, lambdas, "/") } - fs[iteration] = parafac_fun(Tensor, Fac) + fs[iteration] = parafac_fun(Tensor, Fac, lambdas) rel_f = abs(fs[iteration]-fs[iteration-1])/tnsr_norm iteration = iteration + 1 } @@ -54,6 +54,7 @@ parafac_core_als = function(Tensor, nfac, init, maxit=500, ctol=1e-4){ for(n in 1:nfac){ Fac[[1]][,n] = Fac[[1]][,n] * lambdas[n] } + Fac[[numModes+1]] = NULL # remove lambdas afterwards model = list("Fac" = Fac, "fs" = fs[1:(iteration-1)]) return(model) diff --git a/R/parafac_fun.R b/R/parafac_fun.R index a641ba4..6da0b22 100644 --- a/R/parafac_fun.R +++ b/R/parafac_fun.R @@ -2,6 +2,7 @@ #' #' @param X Input data #' @param Fac Fac object generated by the PARAFAC algorithm +#' @param lambdas If lambdas (from the kruskal tensor case) are generated to make the Fac norm 1, they can be supplied. #' #' @return Scalar value of the loss function #' @export @@ -13,8 +14,12 @@ #' X = reinflateTensor(A, B, C) #' model = parafac(X, 2) #' f = parafac_fun(X, model$Fac) -parafac_fun = function(X, Fac){ - Xhat = reinflateTensor(Fac[[1]], Fac[[2]], Fac[[3]], returnAsTensor=TRUE) +parafac_fun = function(X, Fac, lambdas=NULL){ + if(!is.null(lambdas)){ + Fac[[length(Fac)+1]] = as.matrix(lambdas) + } + + Xhat = reinflateFac(Fac, X, returnAsTensor=TRUE) f = rTensor::fnorm(Xhat - X) return(f) } diff --git a/R/utils.R b/R/utils.R index 5c8e1cc..72e094d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -259,6 +259,43 @@ reinflateTensor = function(A, B, C, returnAsTensor=FALSE){ return(reinflatedTensor) } +#' Calculate Xhat from a model Fac object +#' +#' @param Fac Fac object from parafac +#' @param X Input data X +#' @param returnAsTensor Boolean to return Xhat as rTensor tensor (TRUE) or matrix (default, FALSE). +#' +#' @return Xhat +#' @export +#' +#' @examples +#' processedFujita = processDataCube(Fujita2023, sparsityThreshold=0.99, centerMode=1, scaleMode=2) +#' model = parafac(processedFujita$data, nfac=1, nstart=1, verbose=FALSE) +#' Xhat = reinflateFac(model$Fac, processedFujita$data) +reinflateFac = function(Fac, X, returnAsTensor=FALSE){ + Fac = lapply(Fac, as.matrix) # Cast to matrix for correct indexation in the one-component case. + numModes = length(dim(X)) + numComponents = ncol(Fac[[1]]) + + # Check for lambdas i.e. kruskal tensors + kruskal = FALSE + if(length(Fac) > numModes){ + kruskal = TRUE + } + + # Calculate reinflated data + Xhat = array(0L, dim(X)) + for(i in 1:numComponents){ + lambda = ifelse(kruskal, Fac[[numModes+1]][i], 1) # Check for ACMTF model lambdas, otherwise lambda=1 + Xhat = Xhat + lambda * reinflateTensor(Fac[[1]][,i], Fac[[2]][,i], Fac[[3]][,i]) + } + + if(returnAsTensor == TRUE){ + Xhat = rTensor::as.tensor(Xhat) + } + return(Xhat) +} + #' Sum-of-squares calculation #' #' @param X Either a list containing matrices, or a matrix of values. diff --git a/man/parafac_fun.Rd b/man/parafac_fun.Rd index eca978b..9336071 100644 --- a/man/parafac_fun.Rd +++ b/man/parafac_fun.Rd @@ -4,12 +4,14 @@ \alias{parafac_fun} \title{PARAFAC loss function calculation} \usage{ -parafac_fun(X, Fac) +parafac_fun(X, Fac, lambdas = NULL) } \arguments{ \item{X}{Input data} \item{Fac}{Fac object generated by the PARAFAC algorithm} + +\item{lambdas}{If lambdas (from the kruskal tensor case) are generated to make the Fac norm 1, they can be supplied.} } \value{ Scalar value of the loss function diff --git a/man/reinflateFac.Rd b/man/reinflateFac.Rd new file mode 100644 index 0000000..69af117 --- /dev/null +++ b/man/reinflateFac.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{reinflateFac} +\alias{reinflateFac} +\title{Calculate Xhat from a model Fac object} +\usage{ +reinflateFac(Fac, X, returnAsTensor = FALSE) +} +\arguments{ +\item{Fac}{Fac object from parafac} + +\item{X}{Input data X} + +\item{returnAsTensor}{Boolean to return Xhat as rTensor tensor (TRUE) or matrix (default, FALSE).} +} +\value{ +Xhat +} +\description{ +Calculate Xhat from a model Fac object +} +\examples{ +processedFujita = processDataCube(Fujita2023, sparsityThreshold=0.99, centerMode=1, scaleMode=2) +model = parafac(processedFujita$data, nfac=1, nstart=1, verbose=FALSE) +Xhat = reinflateFac(model$Fac, processedFujita$data) +} diff --git a/tests/testthat/test-parafac.R b/tests/testthat/test-parafac.R index a4ae3cd..e494c95 100644 --- a/tests/testthat/test-parafac.R +++ b/tests/testthat/test-parafac.R @@ -68,7 +68,7 @@ test_that("nstart larger than 1 returns nstart models if output is all", { expect_equal(length(models), 10) }) -test_that("ALS and multiway yield similar models", { +test_that("ALS and multiway yield similar models in the easy case", { A = array(rnorm(108,2), c(108,2)) B = array(rnorm(100,2), c(100,2)) C = array(rnorm(10,2), c(10,2)) @@ -76,7 +76,42 @@ test_that("ALS and multiway yield similar models", { model_als = parafac(X, 2, nstart=10) model_mw = multiway::parafac(X, 2, nstart=10, verbose=FALSE) - Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]]) - Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C) - expect_equal(Xhat_als, Xhat_mw) + Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE) + Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE) + expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw)) +}) + +test_that("ALS and multiway yield similar models in the fujita case", { + processedFujita = processDataCube(Fujita2023, sparsityThreshold=0.99, CLR=TRUE, centerMode=1, scaleMode=2) + X = processedFujita$data + model_als = parafac(X, 3, nstart=10) + model_mw = multiway::parafac(X, 3, nstart=10, verbose=FALSE) + + Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE) + Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE) + expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw), tolerance=0.1) +}) + +test_that("ALS and multiway yield similar models in the shao case", { + processedShao = processDataCube(Shao2019, sparsityThreshold=0.9, considerGroups=TRUE, groupVariable="Delivery_mode", CLR=TRUE, centerMode=1, scaleMode=2) + X = processedShao$data + X[is.na(X)] = 0 # force NAs to zero otherwise discrepancies occur + model_als = parafac(X, 3, nstart=10) + model_mw = multiway::parafac(X, 3, nstart=10, verbose=FALSE) + + Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE) + Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE) + expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw), tolerance=0.1) +}) + +test_that("ALS and multiway yield similar models in the ploeg case", { + processedPloeg = processDataCube(vanderPloeg2024, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2) + X = processedPloeg$data + X[is.na(X)] = 0 # force NAs to zero otherwise discrepancies occur + model_als = parafac(X, 3, nstart=10) + model_mw = multiway::parafac(X, 3, nstart=10, verbose=FALSE) + + Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE) + Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE) + expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw), tolerance=0.1) })