Skip to content

Commit

Permalink
Merge pull request #37 from zdk123/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
zdk123 committed Feb 11, 2018
2 parents e35b73e + 3ed5cb4 commit dea8763
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 76 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: SpiecEasi
Title: Sparse Inverse Covariance for Ecological Statistical Inference
Version: 0.1.3
Version: 0.1.4
Authors.R: c(person("Zachary", "Kurtz", role = c("aut", "cre"),
email="zachary.kurtz@med.nyu.edu"), person("Christian",
"Mueller", role = c("aut")), person("Emily", "Miraldi",
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ S3method(clr,data.frame)
S3method(clr,default)
S3method(clr,matrix)
S3method(spiec.easi,default)
S3method(spiec.easi,otu_table)
S3method(spiec.easi,phyloseq)
export(adj2igraph)
export(alr)
Expand Down Expand Up @@ -49,11 +50,12 @@ export(tril)
export(triu)
export(triu2diag)
importFrom(MASS,ginv)
importFrom(Matrix,forceSymmetric)
importFrom(Matrix,t)
importFrom(VGAM,dzinegbin)
importFrom(VGAM,dzipois)
importFrom(VGAM,qzinegbin)
importFrom(VGAM,qzipois)
importFrom(boot,boot)
importFrom(huge,huge)
importFrom(huge,huge.npn)
importFrom(parallel,mclapply)
Expand Down
70 changes: 47 additions & 23 deletions R/SparseICov.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,50 @@
#' Spiec-Easi pipeline
#'
#' Run the whole analysis, from data transformation, iCov estimation and model selection.
#' Inputs are a non-normalized OTU table and pipeline options.
#' @export
spiec.easi <- function(obj, ...) {
UseMethod('spiec.easi', obj)
}

#' Spiec-Easi pipeline
#' @param obj phyloseq object or just the otu_table
#' @method spiec.easi phyloseq
#' @rdname spiec.easi
#' @export
spiec.easi.phyloseq <- function(obj, ...) {
if (!require('foo')) {
stop('\'Phyloseq\' package is not installed')
if (!require('phyloseq')) {
stop('\'Phyloseq\' package is not installed. See doi.org/doi:10.18129/B9.bioc.phyloseq')
}
OTU <- otu_table(obj)@.Data
if (otu_table(obj)@taxa_are_rows) OTU <- t(OTU)
spiec.easi.default(OTU, ...)
spiec.easi.otu_table(otu_table(obj), ...)
}

#' @method spiec.easi otu_table
#' @rdname spiec.easi
#' @export
spiec.easi.otu_table <- function(obj, ...) {
if (!require('phyloseq')) {
stop('\'Phyloseq\' package is not installed. See doi.org/doi:10.18129/B9.bioc.phyloseq')
}
OTU <- obj@.Data
if (obj@taxa_are_rows) OTU <- t(OTU)
spiec.easi.default(OTU, ...)
}

#' Spiec-Easi pipeline
#' @param data non-normalized count OTU/data table with samples on rows and features/OTUs in columns
#' @param method estimation method to use as a character string. Currently either 'glasso' or 'mb' (meinshausen-buhlmann)
#' @param sel.criterion character string specifying criterion/method for model selection accepts 'stars' [default], 'ric', 'ebic'
#' @param icov.select.params list of further arguments to icov.select
#' @param ... further arguments to sparseiCov
#' @param method estimation method to use as a character string. Currently either 'glasso' or 'mb' (meinshausen-buhlmann's neighborhood selection)
#' @param sel.criterion character string specifying criterion/method for model selection. Accepts 'stars' [default], 'ric', 'ebic' (not recommended for high dimensional data)
#' @param verbose flag to show progress messages
#' @param icov.select.params list of further arguments to \code{\link{icov.select}}
#' @param ... further arguments to \code{\link{sparseiCov}} / \code{huge}
#' @method spiec.easi default
#' @rdname spiec.easi
#' @export
spiec.easi.default <- function(data, method='glasso', sel.criterion='stars', verbose=TRUE,
icov.select=TRUE, icov.select.params=list(), ...) {
spiec.easi.default <- function(data, method='glasso', sel.criterion='stars',
verbose=TRUE, icov.select=TRUE,
icov.select.params=list(), ...) {

args <- list(...)
## TODO: check icov.select.params names before running any code
if (verbose) message("Normalizing/clr transformation of data with pseudocount ...")
data.clr <- t(clr(data+1, 1))
if (verbose) message(paste("Inverse Covariance Estimation with", method, "...", sep=" "))
Expand Down Expand Up @@ -67,6 +81,7 @@ spiec.easi.default <- function(data, method='glasso', sel.criterion='stars', ver
#'
#' The argument \code{nlambda} determines the number of penalties - somewhere between 10-100 is usually good, depending on how the values of empirical correlation are distributed.
#' @importFrom huge huge huge.npn
#' @importFrom Matrix t
#' @export
#' @examples
#' # simulate data with 1 negative correlation
Expand Down Expand Up @@ -102,16 +117,16 @@ sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE
if (is.null(args$lambda.min.ratio)) args$lambda.min.ratio <- 1e-3

if (method %in% c("glasso")) {
do.call(huge::huge, c(args, list(x=data, method=method, verbose=verbose,
cov.output = cov.output)))

est <- do.call(huge::huge, c(args,
list(x=data, method=method, verbose=verbose,
cov.output = cov.output)))
} else if (method %in% c('mb')) {
est <- do.call(huge::huge.mb, c(args, list(x=data, verbose=verbose)))
est$method <- 'mb'
est$data <- data
est$sym <- ifelse(!is.null(args$sym), args$sym, 'or')
return(est)
}
return(est)
}


Expand All @@ -125,11 +140,14 @@ sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE
#' @param stars.subsample.ratio The default value 'is 10*sqrt(n)/n' when 'n>144' and '0.8' when 'n<=144', where 'n' is the sample size.
#' @param rep.num number of subsamplings when \code{criterion} = stars.
#' @param ncores number of cores to use. Need multiple processers if \code{ncores > 1}
#' @param normfun normalize internally if data should be renormalized
#' @param normfun normalize internally if data should be renormalized (experimental feature)
#' @importFrom parallel mclapply
#' @importFrom Matrix forceSymmetric
#' @export
icov.select <- function(est, criterion = 'stars', stars.thresh = 0.05, ebic.gamma = 0.5,
stars.subsample.ratio = NULL, rep.num = 20, ncores=1, normfun=function(x) x, verbose=FALSE) {
icov.select <- function(est, criterion = 'stars', stars.thresh = 0.05,
ebic.gamma = 0.5, stars.subsample.ratio = NULL,
rep.num = 20, ncores=1,
normfun=function(x) x, verbose=FALSE) {
gcinfo(FALSE)
if (est$cov.input) {
message("Model selection is not available when using the covariance matrix as input.")
Expand Down Expand Up @@ -283,9 +301,15 @@ icov.select <- function(est, criterion = 'stars', stars.thresh = 0.05, ebic.gamm
}
est$opt.index = max(which.max(est$variability >=
stars.thresh)[1] - 1, 1)
est$refit = est$path[[est$opt.index]]
est$opt.lambda = est$lambda[est$opt.index]
est$opt.sparsity = est$sparsity[est$opt.index]
refit = est$path[[est$opt.index]]
## correct for numerical issue in huge that results in
# non-symmetric matrix in glasso method
## accept the edge set of the denser half of the matrix
est$refit <- forceSymmetric(refit,
ifelse(sum(Matrix::tril(refit))>sum(Matrix::triu(refit)), 'L', 'U'))

est$opt.lambda <- est$lambda[est$opt.index]
est$opt.sparsity <- sum(est$refit)/(d*(d-1))
if (est$method == "glasso") {
est$opt.icov = est$icov[[est$opt.index]]
if (!is.null(est$cov))
Expand Down
24 changes: 13 additions & 11 deletions R/spaRcc.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
#' @param inner_iter number of iterations for the inner loop
#' @export
sparcc <- function(data, iter=20, inner_iter=10, th=.1) {
##
##
# without all the 'frills'
sparccs <-
lapply(1:iter, function(i)
sparccs <-
lapply(1:iter, function(i)
sparccinner(t(apply(data, 1, norm_diric)), iter=inner_iter, th=th))
# collect
cors <- array(unlist(lapply(sparccs, function(x) x$Cor)),
Expand All @@ -25,7 +25,7 @@ sparcc <- function(data, iter=20, inner_iter=10, th=.1) {
}

#' SparCC bootstraps
#'
#'
#' Bootstrapped Sparcc to get empirical and null distributions for correlations
#'
#' @param data count data matrix
Expand All @@ -34,8 +34,11 @@ sparcc <- function(data, iter=20, inner_iter=10, th=.1) {
#' @param statisticperm optional argument to override default permute function
#' @return sparccboot object. Pass to \code{pval.sparccboot} to get empirical p-values
#' @export
#' @importFrom boot boot
sparccboot <- function(data, sparcc.params=list(), statisticboot, statisticperm, R, ncpus=1, ...) {
if (!require('boot')) {
stop('\'boot\' package is not installed')
}

if (missing(statisticboot)) {
statisticboot <- function(data, indices)
triu(do.call("sparcc", c(list(data[indices,,drop=FALSE]), sparcc.params))$Cor)
Expand Down Expand Up @@ -64,8 +67,8 @@ pval.sparccboot <- function(x, sided='both') {
if (sided != "both") stop("only two-sided currently supported")
nparams <- ncol(x$t)
tmeans <- colMeans(x$null_av$t)
# check to see whether Aitchison variance is unstable -- confirm
# that sample Aitchison variance is in 95% confidence interval of
# check to see whether Aitchison variance is unstable -- confirm
# that sample Aitchison variance is in 95% confidence interval of
# bootstrapped samples
niters <- nrow(x$t)
ind95 <- max(1,round(.025*niters)):round(.975*niters)
Expand All @@ -77,7 +80,7 @@ pval.sparccboot <- function(x, sided='both') {
range[1] > aitvar || range[2] < aitvar
}))
# calc whether center of mass is above or below the mean
bs_above <- unlist(lapply(1:nparams, function(i)
bs_above <- unlist(lapply(1:nparams, function(i)
length(which(x$t[, i] > tmeans[i]))))
is_above <- bs_above > x$R/2
cors <- x$t0
Expand Down Expand Up @@ -153,8 +156,8 @@ basis_cov <- function(data.f) {
}

#' @keywords internal
basis_var <- function(T, CovMat = matrix(0, nrow(T), ncol(T)),
M = matrix(1, nrow(T), ncol(T)) + (diag(ncol(T))*(ncol(T)-2)),
basis_var <- function(T, CovMat = matrix(0, nrow(T), ncol(T)),
M = matrix(1, nrow(T), ncol(T)) + (diag(ncol(T))*(ncol(T)-2)),
excluded = NULL, Vmin=1e-4) {

if (!is.null(excluded)) {
Expand Down Expand Up @@ -196,4 +199,3 @@ norm_diric <- function(x, rep=1) {
dmat <- VGAM::rdiric(rep, x+1)
norm_to_total(colMeans(dmat))
}

2 changes: 1 addition & 1 deletion man/icov.select.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 27 additions & 4 deletions man/spiec.easi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 0 additions & 24 deletions man/spiec.easi.default.Rd

This file was deleted.

11 changes: 0 additions & 11 deletions man/spiec.easi.phyloseq.Rd

This file was deleted.

0 comments on commit dea8763

Please sign in to comment.