Skip to content

Commit

Permalink
repair parafac loss function
Browse files Browse the repository at this point in the history
parafac als now follows kolda & bader 2009 more closely, tests added to double check example dataset solutions to be the same
  • Loading branch information
GRvanderPloeg committed Sep 4, 2024
1 parent 45e0830 commit 1a62a19
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 10 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(parafac_core_als)
export(parafac_fun)
export(plotPARAFACmodel)
export(processDataCube)
export(reinflateFac)
export(reinflateTensor)
export(sumsqr)
export(transformPARAFACloadings)
Expand Down
1 change: 0 additions & 1 deletion R/initializePARAFAC.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,5 @@ initializePARAFAC = function(Tensor, nfac, initialization="random"){
init[[i]] = svd(df, nfac)$u
}
}

return(init)
}
5 changes: 3 additions & 2 deletions R/parafac_core_als.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ parafac_core_als = function(Tensor, nfac, init, maxit=500, ctol=1e-4){

for (m in 1:numModes) {
V = rTensor::hadamard_list(lapply(Fac[-m], function(x) {t(x) %*% x}))
V_inv = pracma::pinv(V) #solve(V) throws errors when very near convergence
V_inv = pracma::pinv(V) #solve(V) throws errors when very near convergence, pseudoinverse is suggested by Kolda and Bader 2009
tmp = rTensor::k_unfold(Tensor, m)@data %*% rTensor::khatri_rao_list(Fac[-m], reverse = TRUE) %*% V_inv
lambdas = apply(tmp, 2, function(x){norm(as.matrix(x))})
Fac[[m]] = sweep(tmp, 2, lambdas, "/")
}

fs[iteration] = parafac_fun(Tensor, Fac)
fs[iteration] = parafac_fun(Tensor, Fac, lambdas)
rel_f = abs(fs[iteration]-fs[iteration-1])/tnsr_norm
iteration = iteration + 1
}
Expand All @@ -54,6 +54,7 @@ parafac_core_als = function(Tensor, nfac, init, maxit=500, ctol=1e-4){
for(n in 1:nfac){
Fac[[1]][,n] = Fac[[1]][,n] * lambdas[n]
}
Fac[[numModes+1]] = NULL # remove lambdas afterwards

model = list("Fac" = Fac, "fs" = fs[1:(iteration-1)])
return(model)
Expand Down
9 changes: 7 additions & 2 deletions R/parafac_fun.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#'
#' @param X Input data
#' @param Fac Fac object generated by the PARAFAC algorithm
#' @param lambdas If lambdas (from the kruskal tensor case) are generated to make the Fac norm 1, they can be supplied.
#'
#' @return Scalar value of the loss function
#' @export
Expand All @@ -13,8 +14,12 @@
#' X = reinflateTensor(A, B, C)
#' model = parafac(X, 2)
#' f = parafac_fun(X, model$Fac)
parafac_fun = function(X, Fac){
Xhat = reinflateTensor(Fac[[1]], Fac[[2]], Fac[[3]], returnAsTensor=TRUE)
parafac_fun = function(X, Fac, lambdas=NULL){
if(!is.null(lambdas)){
Fac[[length(Fac)+1]] = as.matrix(lambdas)
}

Xhat = reinflateFac(Fac, X, returnAsTensor=TRUE)
f = rTensor::fnorm(Xhat - X)
return(f)
}
37 changes: 37 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,43 @@ reinflateTensor = function(A, B, C, returnAsTensor=FALSE){
return(reinflatedTensor)
}

#' Calculate Xhat from a model Fac object
#'
#' @param Fac Fac object from parafac
#' @param X Input data X
#' @param returnAsTensor Boolean to return Xhat as rTensor tensor (TRUE) or matrix (default, FALSE).
#'
#' @return Xhat
#' @export
#'
#' @examples
#' processedFujita = processDataCube(Fujita2023, sparsityThreshold=0.99, centerMode=1, scaleMode=2)
#' model = parafac(processedFujita$data, nfac=1, nstart=1, verbose=FALSE)
#' Xhat = reinflateFac(model$Fac, processedFujita$data)
reinflateFac = function(Fac, X, returnAsTensor=FALSE){
Fac = lapply(Fac, as.matrix) # Cast to matrix for correct indexation in the one-component case.
numModes = length(dim(X))
numComponents = ncol(Fac[[1]])

# Check for lambdas i.e. kruskal tensors
kruskal = FALSE
if(length(Fac) > numModes){
kruskal = TRUE
}

# Calculate reinflated data
Xhat = array(0L, dim(X))
for(i in 1:numComponents){
lambda = ifelse(kruskal, Fac[[numModes+1]][i], 1) # Check for ACMTF model lambdas, otherwise lambda=1
Xhat = Xhat + lambda * reinflateTensor(Fac[[1]][,i], Fac[[2]][,i], Fac[[3]][,i])
}

if(returnAsTensor == TRUE){
Xhat = rTensor::as.tensor(Xhat)
}
return(Xhat)
}

#' Sum-of-squares calculation
#'
#' @param X Either a list containing matrices, or a matrix of values.
Expand Down
4 changes: 3 additions & 1 deletion man/parafac_fun.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions man/reinflateFac.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 39 additions & 4 deletions tests/testthat/test-parafac.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,50 @@ test_that("nstart larger than 1 returns nstart models if output is all", {
expect_equal(length(models), 10)
})

test_that("ALS and multiway yield similar models", {
test_that("ALS and multiway yield similar models in the easy case", {
A = array(rnorm(108,2), c(108,2))
B = array(rnorm(100,2), c(100,2))
C = array(rnorm(10,2), c(10,2))
X = reinflateTensor(A, B, C, returnAsTensor=FALSE)
model_als = parafac(X, 2, nstart=10)
model_mw = multiway::parafac(X, 2, nstart=10, verbose=FALSE)

Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]])
Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C)
expect_equal(Xhat_als, Xhat_mw)
Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE)
Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE)
expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw))
})

test_that("ALS and multiway yield similar models in the fujita case", {
processedFujita = processDataCube(Fujita2023, sparsityThreshold=0.99, CLR=TRUE, centerMode=1, scaleMode=2)
X = processedFujita$data
model_als = parafac(X, 3, nstart=10)
model_mw = multiway::parafac(X, 3, nstart=10, verbose=FALSE)

Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE)
Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE)
expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw), tolerance=0.1)
})

test_that("ALS and multiway yield similar models in the shao case", {
processedShao = processDataCube(Shao2019, sparsityThreshold=0.9, considerGroups=TRUE, groupVariable="Delivery_mode", CLR=TRUE, centerMode=1, scaleMode=2)
X = processedShao$data
X[is.na(X)] = 0 # force NAs to zero otherwise discrepancies occur
model_als = parafac(X, 3, nstart=10)
model_mw = multiway::parafac(X, 3, nstart=10, verbose=FALSE)

Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE)
Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE)
expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw), tolerance=0.1)
})

test_that("ALS and multiway yield similar models in the ploeg case", {
processedPloeg = processDataCube(vanderPloeg2024, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
X = processedPloeg$data
X[is.na(X)] = 0 # force NAs to zero otherwise discrepancies occur
model_als = parafac(X, 3, nstart=10)
model_mw = multiway::parafac(X, 3, nstart=10, verbose=FALSE)

Xhat_als = reinflateTensor(model_als$Fac[[1]], model_als$Fac[[2]], model_als$Fac[[3]], returnAsTensor=TRUE)
Xhat_mw = reinflateTensor(model_mw$A, model_mw$B, model_mw$C, returnAsTensor=TRUE)
expect_equal(rTensor::fnorm(Xhat_als), rTensor::fnorm(Xhat_mw), tolerance=0.1)
})

0 comments on commit 1a62a19

Please sign in to comment.