Skip to content

Commit

Permalink
Add sparse+low rank and other models (#134)
Browse files Browse the repository at this point in the history
* fix pr and mvdistr

* add pulsar + glmnets

* added cpp admm

* updated wrappers and src

* update exports

* added slr to spiec.easi

* added npn option

* added lambda.max option

* init ebic methods

* many changes

* fixed imports

* update README

* update some docs

* rebase and cleanup docs

* update docs and roxygen comments

* fix readme

Co-authored-by: Zachary Kurtz <zkurtz@lodotherapeutics.com>
  • Loading branch information
zdk123 and zdk123 committed Jul 13, 2020
1 parent e4d7c0a commit 24f1075
Show file tree
Hide file tree
Showing 45 changed files with 2,184 additions and 436 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: SpiecEasi
Title: Sparse Inverse Covariance for Ecological Statistical Inference
Version: 1.0.7
Version: 1.1.0
Authors@R: c(
person("Zachary", "Kurtz", role = c("aut", "cre"), email="zdkurtz@gmail.com"),
person("Christian", "Mueller", role = "aut"),
Expand All @@ -10,7 +10,7 @@ Authors@R: c(
Description: Estimate networks from the precision matrix of compositional microbial abundance data.
Encoding: UTF-8
Depends:
R (>= 3.3.0),
R (>= 3.6.0),
Imports:
stats,
methods,
Expand All @@ -20,7 +20,8 @@ Imports:
pulsar (>= 0.3.4),
MASS,
VGAM,
Matrix
Matrix,
glmnet
Suggests:
parallel,
boot,
Expand All @@ -30,5 +31,5 @@ Suggests:
testthat
biocViews:
License: GPL (>= 2)
LazyData: true
RoxygenNote: 6.1.1
LinkingTo: Rcpp, RcppArmadillo
RoxygenNote: 7.1.0
16 changes: 16 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method('$',Matrix)
S3method('[[',Matrix)
S3method(alr,data.frame)
S3method(alr,default)
S3method(alr,matrix)
Expand All @@ -8,15 +10,19 @@ S3method(as.matrix,graph)
S3method(clr,data.frame)
S3method(clr,default)
S3method(clr,matrix)
S3method(getOptX,pulsar.refit)
S3method(spiec.easi,default)
S3method(spiec.easi,list)
S3method(spiec.easi,otu_table)
S3method(spiec.easi,phyloseq)
export(adj2igraph)
export(alr)
export(clr)
export(coat)
export(cor2cov)
export(cov2prec)
export(ebic)
export(edge.diss)
export(fitdistr)
export(getOptBeta)
export(getOptCov)
Expand All @@ -33,6 +39,7 @@ export(make_graph)
export(multi.spiec.easi)
export(neff)
export(norm_pseudo)
export(norm_rdiric)
export(norm_to_total)
export(prec2cov)
export(pval.sparccboot)
Expand All @@ -42,6 +49,7 @@ export(rmvnorm)
export(rmvpois)
export(rmvzinegbin)
export(rmvzipois)
export(robustPCA)
export(rzipois)
export(shannon)
export(sparcc)
Expand All @@ -52,12 +60,17 @@ export(stars.pr)
export(stars.roc)
export(symBeta)
export(synth_comm_from_counts)
export(tril)
export(triu)
export(triu2diag)
importFrom(MASS,ginv)
importFrom(Matrix,t)
importFrom(VGAM,dzinegbin)
importFrom(VGAM,dzipois)
importFrom(VGAM,qzinegbin)
importFrom(VGAM,qzipois)
importFrom(VGAM,rdiric)
importFrom(glmnet,glmnet)
importFrom(grDevices,dev.off)
importFrom(grDevices,png)
importFrom(graphics,abline)
Expand All @@ -68,6 +81,8 @@ importFrom(huge,huge)
importFrom(huge,huge.npn)
importFrom(methods,as)
importFrom(pulsar,batch.pulsar)
importFrom(pulsar,getLamPath)
importFrom(pulsar,getMaxCov)
importFrom(pulsar,pulsar)
importFrom(stats,cor)
importFrom(stats,cov)
Expand All @@ -90,3 +105,4 @@ importFrom(stats,rpois)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,var)
useDynLib(SpiecEasi)
8 changes: 8 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @noRd
ADMM <- function(SigmaO, lambda, I, Lambda, Y, beta = 0, r = 0L, mu = 0, eta = 4/5, muf = 1e-6, maxiter = 500L, newtol = 1e-5, tol = 1e-5, over_relax_par = 1.6, shrinkDiag = TRUE) {
.Call('_SpiecEasi_ADMM', PACKAGE = 'SpiecEasi', SigmaO, lambda, I, Lambda, Y, beta, r, mu, eta, muf, maxiter, newtol, tol, over_relax_par, shrinkDiag)
}

113 changes: 107 additions & 6 deletions R/SparseICov.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,9 @@ sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE
if (npn) data <- huge::huge.npn(data, verbose=verbose)

args <- list(...)
method <- switch(method, glasso = "glasso", mb = "mb", stop("Method not supported"))

method <- switch(method, glasso = "glasso", mb = "mb",
stop("Method not supported"))

if (is.null(args$lambda.min.ratio))
args$lambda.min.ratio <- 1e-3

if (is.null(args$lambda.min.ratio)) args$lambda.min.ratio <- 1e-3
est <- do.call(huge::huge, c(args, list(x=data,
method=method,
verbose=verbose,
Expand All @@ -69,3 +65,108 @@ sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE
# }
return(est)
}


#' Neighborhood net estimates
#'
#' Select a sparse inverse covariance matrix using neighborhood selection and glmnet from various exponential models.
#' @param data n x p input (pre-transformed) data
#' @param lambda the lambda path
#' @param method ising and poisson models currently supported.
#' @param ncores number of cores for distributing the model fitting
#' @param sym symmetrize the neighborhood using the 'or' (default)/'and' rule
#' @param ... further arguments to glmnet
#' @importFrom Matrix t
neighborhood.net <- function(data, lambda, method="ising", ncores=1, sym='or', ...) {
p <- ncol(data)
l <- length(lambda)
args <- list(...)
match.fun(switch(method,
ising ={ nbFun <- glm.neighborhood ; args$link <- 'binomial' },
poisson ={ nbFun <- glm.neighborhood ; args$link <- 'poisson' }
# loglin ={ nbFun <- llgm.neighborhood}
))

### Remove 0/1 binomials/ zero variance features
if (method=='ising') {
mintab <- function(x) {
xtab <- table(x)
length(xtab) == 1 || min(xtab) == 1
}
zvind <- which(apply(data, 2, mintab))
} else {
zvind <- which(apply(data, 2, var)==0)
}

estFun <- function(i) {
betamat <- matrix(0, p, l)
if (!(i %in% zvind)) {
suppressWarnings(
out <- do.call(nbFun,
c(list(data[,-c(i, zvind)], data[,i,drop=FALSE], lambda), args)))
lsub <- ncol(out)
if (lsub<l) {
## extend missing lambdas value if glmnet
## returns only larger solution ##
out <- cbind(out, out[,rep(lsub, l-lsub)])
}
betamat[-c(i, zvind),] <- out
}
betamat
}

est <- parallel::mcmapply(estFun, 1:p,
mc.cores=ncores, SIMPLIFY='array')
beta <- vector('list', length(lambda))
path <- vector('list', length(lambda))
for (i in 1:dim(est)[2]) {
tmp <- as(est[,i,], 'dgCMatrix')
beta[[i]] <- tmp
tmp <- as(tmp, 'lgCMatrix')
path[[i]] <- if (sym == "or") sign(tmp | t(tmp)) else sign(tmp & t(tmp))
}
list(beta=beta, path=path)
}


#' @importFrom glmnet glmnet
#' @noRd
glm.neighborhood <- function(X, Y, lambda, link='binomial', ...) {
return(as.matrix(Bmat <- glmnet::glmnet(X, Y, family=link, lambda=lambda, ...)$beta))
}

# #' @useDynLib SpiecEasi LPGM_neighborhood
# llgm.neighborhood <- function(X, Y, lambda, startb=0, th=1e-6, intercept=FALSE) {
# n = nrow(X); p = ncol(X);
# p_new = p
# nlams <- length(lambda)
# X <- scale(X)
# # NOTE: here check if intercept, change X and
# if(intercept){
# Xorig = X;
# X = cbind(t(t(rep(1,n))),Xorig);
# p_new = ncol(X);
# }
#
# if(length(startb) == 1 & startb == 0){startb = rep(0, p_new)}
#
# alphasin = rep(0, nlams)
# Bmatin = matrix(0,p,nlams);
#
# out <- .C("LPGM_neighborhood",
# X=as.double(t(X)), Y=as.double(Y), startb=as.double(startb),
# lambda=as.double(lambda), n=as.integer(n), p=as.integer(p_new), nlams=as.integer(length(lambda)),
# alphas=as.double(alphasin), Bmat=as.double(Bmatin), PACKAGE="SpiecEasi")
#
# alphas = out$alphas
# if(is.null(out$Bmat)) {
# Bmat = NULL
# } else {
# Bmat = matrix(out$Bmat, nrow=nrow(Bmatin), byrow=TRUE)
# Bmat <- Bmat*(abs(Bmat)>th)
# }
# return(Bmat)
# }

dclr <- function(x) t(clr(apply(x, 1, norm_diric),2))
dclrNPN <- function(x) huge::huge.npn(t(clr(apply(x, 1, norm_diric),2)), verbose=FALSE)
Loading

0 comments on commit 24f1075

Please sign in to comment.