diff --git a/DESCRIPTION b/DESCRIPTION index 2ba92aa..e390bfc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: DAtest Title: Comparing Differential Abundance methods -Version: 0.0.0.9000 +Version: 1.0.0 Authors@R: person("Jakob", "Russel", email = "russel2620@gmail.com", role = c("aut", "cre")) Description: What the title says. Depends: R (>= 3.2.5) -Imports: foreach, doSNOW, snow, MASS, glmnet, pROC, ggplot2, cowplot, lme4 +Imports: foreach, doSNOW, snow, MASS, pROC, ggplot2, cowplot, lme4, nlme License: GPL (>= 3) Encoding: UTF-8 LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 2d08ce0..e8ba488 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,13 +8,14 @@ export(DA.anc) export(DA.aov) export(DA.bay) export(DA.ds2) -export(DA.enn) export(DA.ere) export(DA.erq) export(DA.kru) export(DA.lao) export(DA.lao2) export(DA.lim) +export(DA.lli) +export(DA.lli2) export(DA.llm) export(DA.llm2) export(DA.lrm) @@ -23,6 +24,8 @@ export(DA.ltt2) export(DA.msf) export(DA.neb) export(DA.per) +export(DA.rai) +export(DA.spe) export(DA.ttt) export(DA.wil) export(DA.zig) @@ -34,7 +37,6 @@ import(cowplot) import(doSNOW) import(foreach) import(ggplot2) -import(glmnet) import(lme4) import(nlme) import(pROC) diff --git a/R/.Rhistory b/R/.Rhistory new file mode 100644 index 0000000..e69de29 diff --git a/R/DA.adx.R b/R/DA.adx.R index df45038..49e6580 100644 --- a/R/DA.adx.R +++ b/R/DA.adx.R @@ -1,12 +1,15 @@ #' Aldex t.test and wilcox - +#' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param ... Additional arguments for the aldex function #' @export -DA.adx <- function(count_table, outcome, mc.samples = 128, p.adj){ +DA.adx <- function(count_table, outcome, ...){ library(ALDEx2, quietly = TRUE) - x <- aldex(data.frame(count_table), outcome, mc.samples = mc.samples, verbose = FALSE) + x <- aldex(data.frame(count_table), outcome, verbose = FALSE, ...) x$Feature <- rownames(x) return(x) diff --git a/R/DA.anc.R b/R/DA.anc.R index 683f335..be1635b 100644 --- a/R/DA.anc.R +++ b/R/DA.anc.R @@ -1,15 +1,18 @@ #' ANCOM #' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param ... Additional arguments for the ANCOM function #' @export -DA.anc <- function(count_table, outcome, sig = 0.05, multcorr = 3, tau = 0.02, theta = 0.1, repeated = FALSE){ +DA.anc <- function(count_table, outcome, ...){ library(ancom.R, quietly = TRUE) input <- as.data.frame(t(count_table)) input$Group <- outcome - res <- ANCOM(input, sig, multcorr, tau, theta, repeated) + res <- ANCOM(input, ...) df <- data.frame(Feature = rownames(count_table), W = res[[1]], diff --git a/R/DA.aov.R b/R/DA.aov.R index 08bce5e..5c2902c 100644 --- a/R/DA.aov.R +++ b/R/DA.aov.R @@ -1,11 +1,16 @@ #' ANOVA - +#' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the aov function #' @export -DA.aov <- function(count_table, outcome, p.adj, relative){ +DA.aov <- function(count_table, outcome, relative = TRUE, p.adj = "fdr", ...){ ao <- function(x){ - tryCatch(as.numeric(summary(aov(x ~ outcome))[[1]][1,5]), error = function(e){NA}) + tryCatch(as.numeric(summary(aov(x ~ outcome, ...))[[1]][1,5]), error = function(e){NA}) } if(relative){ diff --git a/R/DA.bay.R b/R/DA.bay.R index 3c39db6..3a5bce2 100644 --- a/R/DA.bay.R +++ b/R/DA.bay.R @@ -1,11 +1,20 @@ #' baySeq -#' -#' From +#' +#' Implemented as in: #' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 - +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param samplesize How large a sample should be taken in estimating the priors? Default 1e5 +#' @param samplingSubset If given, the priors will be sampled only from the subset specified. Default NULL +#' @param equalDispersions Should we assume equal dispersions of data across all groups in the 'cD' object? Defaults to TRUE +#' @param estimation Defaults to "QL", indicating quasi-likelihood estimation of priors. Currently, the only other possibilities are "ML", a maximum-likelihood method, and "edgeR", the moderated dispersion estimates produced by the 'edgeR' package +#' @param zeroML Should parameters from zero data (rows that within a group are all zeros) be estimated using maximum likelihood methods (which will result in zeros in the parameters? +#' @param consensus If TRUE, creates a consensus distribution rather than a separate distribution for each member of the groups structure in the 'cD' object +#' @param ... Additional arguments to the getLikelihoods function #' @export -DA.bay <- function(count_table, outcome, p.adj){ +DA.bay <- function(count_table, outcome, p.adj = "fdr", samplesize = 1e5, samplingSubset = NULL, equalDispersions = TRUE, estimation = "QL", zeroML = FALSE, consensus = FALSE, ...){ library(baySeq, quietly = TRUE) @@ -14,8 +23,8 @@ DA.bay <- function(count_table, outcome, p.adj){ libsizes(CD) <- getLibsizes(CD) CD@annotation <- data.frame(name=rownames(count_table)) - CD <- getPriors.NB(CD, cl= NULL) - CD <- getLikelihoods(CD, cl = NULL) + CD <- getPriors.NB(CD, cl= NULL, samplesize=samplesize, samplingSubset=samplingSubset, equalDispersions=equalDispersions, estimation=estimation, zeroML=zeroML, consensus=consensus) + CD <- getLikelihoods(CD, cl = NULL, ...) tc <- topCounts(CD, group = "DE", number=nrow(count_table)) tc <- tc[,c(1,rev(ncol(tc)-0:4))] diff --git a/R/DA.ds2.R b/R/DA.ds2.R index bae7e0e..ac1eb04 100644 --- a/R/DA.ds2.R +++ b/R/DA.ds2.R @@ -1,12 +1,16 @@ #' DESeq2 #' -#' From: -#' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 +#' Implemented as in: +#' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8. #' Manual geometric means calculated to avoid errors, see https://github.com/joey711/phyloseq/issues/387 - +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the DESeq function #' @export -DA.ds2 <- function(count_table, outcome, paired = NULL, p.adj){ +DA.ds2 <- function(count_table, outcome, paired = NULL, p.adj = "fdr", ...){ library(DESeq2, quietly = TRUE) if(is.null(paired)){ @@ -25,7 +29,7 @@ DA.ds2 <- function(count_table, outcome, paired = NULL, p.adj){ } geoMeans = apply(counts(x), 1, gm_mean) x = estimateSizeFactors(x, geoMeans = geoMeans) - x <- DESeq(x) + x <- DESeq(x, ...) res <- as.data.frame(results(x)@listData) colnames(res)[5] <- "pval" res$pval.adj <- p.adjust(res$pval, method = p.adj) diff --git a/R/DA.enn.R b/R/DA.enn.R deleted file mode 100644 index 0b9d971..0000000 --- a/R/DA.enn.R +++ /dev/null @@ -1,200 +0,0 @@ -#' ENNB from https://cals.arizona.edu/~anling/ -#' @import glmnet MASS -#' @export - -DA.enn <- function(count_table, outcome, TMM.option = 1, p.adj){ - - ################################################################################# - # # - # ENNB: A two-stage statistical procedure for feature selection and comparison - # in fuctional analysis of metagenomes # - # # - # input: # - # X: Matrix of count data # - # Y: Phenotype info of samples # - # fileoutput: the outout file name # - # TMM.option: 1 or 2. option of "1" is for an approach using the mean of the # - # effective library sizes as a reference library size in TMM normalization; # - # option of "2" represents an approach to regenerating counts # - # with a common dispersion. # - # # - # output: # - # summary table of the final results after two stages. # - ################################################################################# - - TwoStage_Package <- function(X,Y, TMM.option){ - require(glmnet) # Elastic-net - require(MASS) # GLMs-NB - require(edgeR) # TMM normalization - - threshold = 0.05 # FDR to control the type I error at significance level of 0.05 - - ##################################################### - ### Trimmed Mean (TMM) (Based on edgeR Paper) ### - ##################################################### - TMMNorm = function(X, Y, TMM.option){ - factors = calcNormFactors(X,method="TMM") #Calculate normaization factors - if(TMM.option==1){ - eff.lib.size = colSums(X)*factors; - ref.lib.size = mean(eff.lib.size); #Use the mean of the effective library sizes as a reference library size - X.output = sweep(X,MARGIN=2,eff.lib.size,"/")*ref.lib.size; #Normalized read counts - } else if(TMM.option==2){ #Regenerate counts with a common dispersion. - group = as.factor(Y); - X.tmp = DGEList(counts=X, group=group); - X.tmp = calcNormFactors(X.tmp); - X.tmp = estimateCommonDisp(X.tmp); - X.output = round(as.matrix(X.tmp$pseudo.counts)); - } else { - stop("TMM.option must be either 1 or 2!"); - } - return(X.output); - } - - ############################################################################ - #### Negative binomial generalized linear model for two group comparison ### - ############################################################################ - get.glmNB <- function(count,category){ - p.val = rep(NA,ncol(count)) #Declare initial pvalue - - for (i in 1:ncol(count)) { - dat = data.frame(testing=count[,i], groups=as.factor(category)) - if (sum(dat$testing) != 0) { #in case that all counts are zero - fit = glm.nb(testing ~ groups, data=dat) - p.val[i] = summary(fit)$coefficients[2,4] - } - } - p.val[is.na(p.val)] = 1 - p.adj = p.adjust(p.val, method="fdr") #Adjusted p-value - res = cbind(p.val,p.adj) - rownames(res) = colnames(count) - return(res) - } - - ############################################################################## - ### Two-Stage Procedure ### - ############################################################################## - ################################# - ### Two group comparison #### - ################################# - Twostage <- function(X, Y){ - ### A matrix contains cvm error information - cv.mat = matrix(NA, nrow = length(alpha), ncol = 3) - colnames(cv.mat) = c("Alpha", "Lambda", "CVM") - cv.mat[,1] = alpha - - #### Cross-validation to determine the optimal alpha and lambda giving minimum cross-validation error - npop1 = table(Y)[1] - npop2 = table(Y)[2] - N = npop1 + npop2 - if (N <= 15) nfold = 3 - if (N > 15 & N < 50) nfold = 5 - if (N >= 50) nfold = 10 - - ### Fit a GLM with elastic net regularization - for (iter in 1:3){ - set.seed(iter) - - #Select a random number of subjects - newdata.ind = sample(1:nrow(X), floor(0.9*nrow(X))) - - #Subset the data - X.new = X[newdata.ind,] - Y.new = Y[newdata.ind] - - # Cross-varidation to determine lambda - for (j in 1:length(alpha)){ - cv = cv.glmnet(X, Y, family = "binomial", alpha = alpha[j], nfolds=nfold) - ind = match(cv$lambda.min, cv$lambda) - cv.mat[j,2] = cv$lambda[ind] - cv.mat[j,3] = cv$cvm[ind] - } - - alpha.opt = cv.mat[cv.mat[,"CVM"] == min(cv.mat[,"CVM"]),1] - lambda.opt = cv.mat[cv.mat[,"CVM"] == min(cv.mat[,"CVM"]),2] - - # Fit a GLM with elastic net regularization - fit = glmnet(X.new, Y.new, family = "binomial", alpha = alpha.opt, lambda = lambda.opt) - - # Get model coefficients for glmnet - coef = coef(fit) - - if (iter == 1) { - elast = as.matrix(coef) - } else{ - elast = cbind(elast, as.matrix(coef)) - } - } #end (iter) - - ### Get features for which coefficients are not 0 - elast = elast[-1, ] #get rid of intercept - feature = rownames(elast) - df = data.frame(elast, rowsum = rowSums(elast)) - ind.elast = which(df$rowsum !=0 ) #index of coefficients that are not zero - allFeatSel = as.character(feature[ind.elast]) - - if (length(allFeatSel) != 0){ - ind = which(colnames(X) %in% allFeatSel) - X.elast = as.matrix(X[,ind]) - colnames(X.elast) = allFeatSel - # GLMs-NB Model - pvalue.mat = get.glmNB(X.elast,Y) - } else{ - print("No significantly differentially abundant features!!!") - } - - ### Summary of significantly differentially abundant features - sig.mat = pvalue.mat[pvalue.mat[,"p.adj"] < threshold,] # Significantly differentially abundant features - sig = rownames(sig.mat) - ind.sig = which(colnames(X) %in% sig) - X.sig = t(as.matrix(X[,ind.sig])) - X.sig.pop1 = X.sig[,1:npop1] - X.sig.pop2 = X.sig[,(npop1+1):(npop1+npop2)] - mean.pop1 = apply(X.sig.pop1, 1, mean) - mean.pop2 = apply(X.sig.pop2, 1, mean) - sd.pop1 = apply(X.sig.pop1, 1, sd) - sd.pop2 = apply(X.sig.pop2, 1, sd) - - sig.df = data.frame(Annotation=as.character(rownames(X.sig)), mean.pop1,sd.pop1,mean.pop2, sd.pop2, - sig.mat[,"p.val"], sig.mat[,"p.adj"], stringsAsFactors=F) - names(sig.df)=c("Annotation", paste("mean_", names(table(Y))[1], sep=""), paste("sd_", names(table(Y))[1], sep=""), paste("mean_", names(table(Y))[2], sep=""), paste("sd_", names(table(Y))[2], sep=""), "p.val", "p.adj") - - rownames(sig.df) = NULL - sigsort.df = sig.df[order(sig.df$p.adj),] - - } - - alpha = seq(0.01,0.1,by=0.01) - - ### Data preprocessing and TMM normalization ### - X = as.matrix(X) - Y = factor(Y) - X.tmm = TMMNorm(X,Y,TMM.option) # TMM normalization (edgeR) - X = t(X.tmm) - - ngroups = nlevels(Y) - - if (ngroups == 2) Twostage(X, Y) # Two group comparison - - } - - res <- tryCatch(TwoStage_Package(count_table,sample(outcome),TMM.option = 1),error = function(e) NULL) - - features <- data.frame(Feature = rownames(count_table)) - - if(is.null(res)){ - output <- features - output$pval <- 1 - output$pval.adj <- 1 - } else { - colnames(res)[c(1,6:7)] <- c("Feature","pval","pval.adj") - output <- merge(res,features, by="Feature", all = TRUE) - output[is.na(output$pval),"pval"] <- 1 - output$pval.adj <- p.adjust(output$pval, method = p.adj) - } - output$Method <- "ENNB" - return(output) - -} - - - diff --git a/R/DA.ere.R b/R/DA.ere.R index 5f56d61..947a63f 100644 --- a/R/DA.ere.R +++ b/R/DA.ere.R @@ -1,18 +1,21 @@ -#' EdgeR +#' EdgeR exact test #' -#' From: +#' Implemented as in: #' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 - +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the exactTest function #' @export -DA.ere <- function(count_table, outcome, p.adj){ +DA.ere <- function(count_table, outcome, p.adj = "fdr", ...){ library(edgeR, quietly = TRUE) otu_table <- as.data.frame(count_table) x <- DGEList(counts = count_table, group = outcome, genes = data.frame(Feature = row.names(count_table))) x <- edgeR::calcNormFactors(x) x <- estimateTagwiseDisp(estimateCommonDisp(x)) - ta <- exactTest(x)[[1]] + ta <- exactTest(x, ...)[[1]] colnames(ta)[3] <- "pval" ta$pval.adj <- p.adjust(ta$pval, method = p.adj) ta$Feature <- rownames(ta) diff --git a/R/DA.erq.R b/R/DA.erq.R index e6c33e7..d6f49eb 100644 --- a/R/DA.erq.R +++ b/R/DA.erq.R @@ -1,8 +1,13 @@ #' EdgeR quasi-likelihood - +#' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the glmQLFit function #' @export -DA.erq <- function(count_table, outcome, paired = NULL, p.adj){ +DA.erq <- function(count_table, outcome, paired = NULL, p.adj = "fdr", ...){ library(edgeR, quietly = TRUE) otu_table <- as.data.frame(count_table) @@ -14,7 +19,7 @@ DA.erq <- function(count_table, outcome, paired = NULL, p.adj){ design <- model.matrix(~outcome + paired) } y <- estimateDisp(y,design) - fit <- glmQLFit(y,design) + fit <- glmQLFit(y,design, ...) if(is.numeric(outcome)){ qlf <- glmQLFTest(fit,coef=2) ta <- qlf$table diff --git a/R/DA.kru.R b/R/DA.kru.R index 860670c..31edcce 100644 --- a/R/DA.kru.R +++ b/R/DA.kru.R @@ -1,11 +1,16 @@ #' Kruskal-Wallis test - +#' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the kruskal.test function #' @export -DA.kru <- function(count_table, outcome, p.adj, relative = TRUE){ +DA.kru <- function(count_table, outcome, relative = TRUE, p.adj = "fdr", ...){ kru <- function(x){ - tryCatch(kruskal.test(as.numeric(x) ~ as.factor(outcome))$p.value, error = function(e){NA}) + tryCatch(kruskal.test(as.numeric(x) ~ as.factor(outcome), ...)$p.value, error = function(e){NA}) } if(relative){ diff --git a/R/DA.lao.R b/R/DA.lao.R index 680d883..8b9b253 100644 --- a/R/DA.lao.R +++ b/R/DA.lao.R @@ -1,17 +1,24 @@ -#' Log ANOVA - +#' ANOVA +#' +#' With log transformation of counts before normalization. +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for the log transformation. Default 1 +#' @param ... Additional arguments for the aov functions #' @export -DA.lao <- function(count_table, outcome, delta = 1, p.adj){ +DA.lao <- function(count_table, outcome,relative = TRUE, p.adj = "fdr", delta = 1, ...){ ao <- function(x){ - tryCatch(as.numeric(summary(aov(x ~ outcome))[[1]][1,5]), error = function(e){NA}) + tryCatch(as.numeric(summary(aov(x ~ outcome, ...))[[1]][1,5]), error = function(e){NA}) } count_table <- log(count_table+delta) - count.rel <- apply(count_table,2,function(x) x/sum(x)) + if(relative) count_table <- apply(count_table,2,function(x) x/sum(x)) - res <- data.frame(pval = apply(count.rel,1,ao)) + res <- data.frame(pval = apply(count_table,1,ao)) res$pval.adj <- p.adjust(res$pval, method = p.adj) res$Feature <- rownames(res) diff --git a/R/DA.lao2.R b/R/DA.lao2.R index b9155ce..8f9d277 100644 --- a/R/DA.lao2.R +++ b/R/DA.lao2.R @@ -1,11 +1,17 @@ -#' Log ANOVA 2 - +#' ANOVA +#' +#' With log transformation of relative abundances. +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for the log transformation. Default 0.001 +#' @param ... Additional arguments for the aov functions #' @export -DA.lao2 <- function(count_table, outcome, delta = 0.001, p.adj){ +DA.lao2 <- function(count_table, outcome, p.adj = "fdr", delta = 0.001, ...){ ao <- function(x){ - tryCatch(as.numeric(summary(aov(x ~ outcome))[[1]][1,5]), error = function(e){NA}) + tryCatch(as.numeric(summary(aov(x ~ outcome, ...))[[1]][1,5]), error = function(e){NA}) } count.rel <- apply(count_table,2,function(x) x/sum(x)) diff --git a/R/DA.lim.R b/R/DA.lim.R index 4534e6d..830031a 100644 --- a/R/DA.lim.R +++ b/R/DA.lim.R @@ -1,16 +1,18 @@ #' LIMMA #' -#' Some is borrowed from: +#' Some implementation is borrowed from: #' http://www.biostat.jhsph.edu/~kkammers/software/eupa/R_guide.html - +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details #' @export -DA.lim <- function(count_table, outcome, p.adj, relative, paired = NULL, log = FALSE, delta = 1){ +DA.lim <- function(count_table, outcome, paired = NULL, relative = TRUE, p.adj = "fdr", ...){ library(limma, quietly = TRUE) - if(log) count_table <- log(count_table + delta) - if(relative){ count.rel <- apply(count_table,2,function(x) x/sum(x)) } else { @@ -20,7 +22,7 @@ DA.lim <- function(count_table, outcome, p.adj, relative, paired = NULL, log = F if(is.null(paired)) design <- model.matrix(~outcome) else design <- model.matrix(~as.factor(paired)+outcome) n <- dim(count.rel)[1] fit <- lmFit(count.rel, design) - fit.eb <- eBayes(fit) + fit.eb <- eBayes(fit, ...) if(is.null(paired)) Estimate <- fit.eb$coefficients else Estimate <- fit.eb$coefficients[,c(1,(length(levels(as.factor(paired)))+1):ncol(fit.eb$coefficients))] df.residual <- fit.eb$df.residual df.prior <- rep(fit.eb$df.prior, n) diff --git a/R/DA.lli.R b/R/DA.lli.R new file mode 100644 index 0000000..ef3cf28 --- /dev/null +++ b/R/DA.lli.R @@ -0,0 +1,41 @@ +#' LIMMA +#' +#' With log transformation for counts before normalization. +#' Some implementation is borrowed from: +#' http://www.biostat.jhsph.edu/~kkammers/software/eupa/R_guide.html +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for log transformation. Default 1 +#' @export + +DA.lli <- function(count_table, outcome, paired = NULL, relative = TRUE, p.adj = "fdr", delta = 1, ...){ + + library(limma, quietly = TRUE) + + count_table <- log(count_table + delta) + + if(relative) count_table <- apply(count_table,2,function(x) x/sum(x)) + + if(is.null(paired)) design <- model.matrix(~outcome) else design <- model.matrix(~as.factor(paired)+outcome) + n <- dim(count_table)[1] + fit <- lmFit(count_table, design) + fit.eb <- eBayes(fit, ...) + if(is.null(paired)) Estimate <- fit.eb$coefficients else Estimate <- fit.eb$coefficients[,c(1,(length(levels(as.factor(paired)))+1):ncol(fit.eb$coefficients))] + df.residual <- fit.eb$df.residual + df.prior <- rep(fit.eb$df.prior, n) + s2.prior <- rep(fit.eb$s2.prior, n) + s2 <- (fit.eb$sigma)^2 + s2.post <- fit.eb$s2.post + t.stat <- fit.eb$t[, 2] + pval <- fit.eb$p.value[, 2] + pval.adj <- p.adjust(pval, method = p.adj) + res <- data.frame(Estimate, t.stat, pval, pval.adj, df.residual, df.prior, s2.prior, s2, s2.post) + res$Feature <- rownames(res) + res$Method <- "Log LIMMA" + + return(res) +} + diff --git a/R/DA.lli2.R b/R/DA.lli2.R new file mode 100644 index 0000000..eff762a --- /dev/null +++ b/R/DA.lli2.R @@ -0,0 +1,41 @@ +#' LIMMA +#' +#' With log transformation of relative abundances. +#' Some implementation is borrowed from: +#' http://www.biostat.jhsph.edu/~kkammers/software/eupa/R_guide.html +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for log transformation. Default 0.001 +#' @export + +DA.lli2 <- function(count_table, outcome, paired = NULL, p.adj = "fdr", delta = 0.001, ...){ + + library(limma, quietly = TRUE) + + count.rel <- apply(count_table,2,function(x) x/sum(x)) + + count.rel <- log(count.rel + delta) + + if(is.null(paired)) design <- model.matrix(~outcome) else design <- model.matrix(~as.factor(paired)+outcome) + n <- dim(count.rel)[1] + fit <- lmFit(count.rel, design) + fit.eb <- eBayes(fit, ...) + if(is.null(paired)) Estimate <- fit.eb$coefficients else Estimate <- fit.eb$coefficients[,c(1,(length(levels(as.factor(paired)))+1):ncol(fit.eb$coefficients))] + df.residual <- fit.eb$df.residual + df.prior <- rep(fit.eb$df.prior, n) + s2.prior <- rep(fit.eb$s2.prior, n) + s2 <- (fit.eb$sigma)^2 + s2.post <- fit.eb$s2.post + t.stat <- fit.eb$t[, 2] + pval <- fit.eb$p.value[, 2] + pval.adj <- p.adjust(pval, method = p.adj) + res <- data.frame(Estimate, t.stat, pval, pval.adj, df.residual, df.prior, s2.prior, s2, s2.post) + res$Feature <- rownames(res) + res$Method <- "Log LIMMA 2" + + return(res) +} + diff --git a/R/DA.llm.R b/R/DA.llm.R index c9144eb..c82c657 100644 --- a/R/DA.llm.R +++ b/R/DA.llm.R @@ -1,18 +1,27 @@ -#' Linear regression - log -#' +#' Linear regression +#' +#' With log transformation of counts before normalization. +#' Mixed-effect model is used when a paired argument is included, with the paired variable as a random intercept. +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for the log transformation. Default 1 +#' @param ... Additional arguments for the lm/lme functions #' @export -DA.llm <- function(count_table, outcome, paired = NULL, p.adj, delta = 1){ +DA.llm <- function(count_table, outcome, paired = NULL,relative = TRUE, p.adj = "fdr", delta = 1, ...){ count_table <- as.data.frame.matrix(count_table) count_table <- log(count_table + delta) - count.rel <- apply(count_table,2,function(x) x/sum(x)) + if(relative) count_table <- apply(count_table,2,function(x) x/sum(x)) if(is.null(paired)){ lmr <- function(x){ fit <- NULL tryCatch( - fit <- lm(x ~ outcome), + fit <- lm(x ~ outcome, ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { @@ -24,7 +33,7 @@ DA.llm <- function(count_table, outcome, paired = NULL, p.adj, delta = 1){ lmr <- function(x){ fit <- NULL tryCatch( - fit <- lme(x ~ outcome, random = ~1|paired), + fit <- lme(x ~ outcome, random = ~1|paired, ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { @@ -34,11 +43,11 @@ DA.llm <- function(count_table, outcome, paired = NULL, p.adj, delta = 1){ } } - res <- as.data.frame(t(as.data.frame(apply(count.rel,1,lmr)))) + res <- as.data.frame(t(as.data.frame(apply(count_table,1,lmr)))) colnames(res) <- c("Estimate","Std.Error","t-value","pval") res$pval.adj <- p.adjust(res$pval, method = p.adj) res$Feature <- rownames(res) - res$Method <- "Linear regression - Log" + res$Method <- "Log Linear regression" return(res) } diff --git a/R/DA.llm2.R b/R/DA.llm2.R index 04df972..366a0ed 100644 --- a/R/DA.llm2.R +++ b/R/DA.llm2.R @@ -1,8 +1,17 @@ -#' Linear regression - log #2 +#' Linear regression #' +#' With log transformation of relative abundances. +#' Mixed-effect model is used when a paired argument is included, with the paired variable as a random intercept. +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for the log transformation. Default 0.001 +#' @param ... Additional arguments for the lm/lme functions #' @export -DA.llm2 <- function(count_table, outcome, paired = NULL, p.adj, delta = 0.001){ +DA.llm2 <- function(count_table, outcome, paired = NULL, p.adj = "fdr", delta = 0.001, ...){ count_table <- as.data.frame.matrix(count_table) count.rel <- apply(count_table,2,function(x) x/sum(x)) @@ -12,7 +21,7 @@ DA.llm2 <- function(count_table, outcome, paired = NULL, p.adj, delta = 0.001){ lmr <- function(x){ fit <- NULL tryCatch( - fit <- lm(x ~ outcome), + fit <- lm(x ~ outcome, ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { @@ -24,7 +33,7 @@ DA.llm2 <- function(count_table, outcome, paired = NULL, p.adj, delta = 0.001){ lmr <- function(x){ fit <- NULL tryCatch( - fit <- lme(x ~ outcome, random = ~1|paired), + fit <- lme(x ~ outcome, random = ~1|paired, ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { @@ -38,7 +47,7 @@ DA.llm2 <- function(count_table, outcome, paired = NULL, p.adj, delta = 0.001){ colnames(res) <- c("Estimate","Std.Error","t-value","pval") res$pval.adj <- p.adjust(res$pval, method = p.adj) res$Feature <- rownames(res) - res$Method <- "Linear regression - Log 2" + res$Method <- "Log Linear regression 2" return(res) } diff --git a/R/DA.lrm.R b/R/DA.lrm.R index d708068..f17f927 100644 --- a/R/DA.lrm.R +++ b/R/DA.lrm.R @@ -1,20 +1,27 @@ #' Linear regression -#' +#' +#' Mixed-effect model is a paired argument is present, with the paired variable as random intercept +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the lm/lme functions #' @import nlme #' @export -DA.lrm <- function(count_table, outcome, paired = NULL, p.adj){ +DA.lrm <- function(count_table, outcome, paired = NULL, relative = TRUE, p.adj = "fdr", ...){ library(nlme) count_table <- as.data.frame.matrix(count_table) - count.rel <- apply(count_table,2,function(x) x/sum(x)) + if(relative) count_table <- apply(count_table,2,function(x) x/sum(x)) if(is.null(paired)){ lmr <- function(x){ fit <- NULL tryCatch( - fit <- lm(x ~ outcome), + fit <- lm(x ~ outcome, ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { @@ -26,7 +33,7 @@ DA.lrm <- function(count_table, outcome, paired = NULL, p.adj){ lmr <- function(x){ fit <- NULL tryCatch( - fit <- lme(x ~ outcome, random = ~1|paired), + fit <- lme(x ~ outcome, random = ~1|paired, ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { @@ -36,7 +43,7 @@ DA.lrm <- function(count_table, outcome, paired = NULL, p.adj){ } } - res <- as.data.frame(t(as.data.frame(apply(count.rel,1,lmr)))) + res <- as.data.frame(t(as.data.frame(apply(count_table,1,lmr)))) colnames(res) <- c("Estimate","Std.Error","t-value","pval") res$pval.adj <- p.adjust(res$pval, method = p.adj) res$Feature <- rownames(res) diff --git a/R/DA.ltt.R b/R/DA.ltt.R index e084a47..53760ec 100644 --- a/R/DA.ltt.R +++ b/R/DA.ltt.R @@ -1,11 +1,21 @@ -#' Log t-test - +#' Welch t-test +# +# With log transformaiton of counts before normalization +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for log transformation. +#' @param testStat Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1)) +#' @param testStat.pair Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1))) +#' @param ... Additional arguments for the t.test function #' @export -DA.ltt <- function(count_table, outcome, delta = 1, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, paired = NULL, p.adj){ +DA.ltt <- function(count_table, outcome, paired = NULL, relative = TRUE, p.adj = "fdr", delta = 1, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, ...){ tt <- function(x){ - tryCatch(t.test(x ~ outcome)$p.value, error = function(e){NA}) + tryCatch(t.test(x ~ outcome, ...)$p.value, error = function(e){NA}) } if(!is.null(paired)){ @@ -13,14 +23,14 @@ DA.ltt <- function(count_table, outcome, delta = 1, testStat = function(case,con outcome <- outcome[order(paired)] testStat <- testStat.pair tt <- function(x){ - tryCatch(t.test(x ~ outcome, paired = TRUE)$p.value, error = function(e){NA}) + tryCatch(t.test(x ~ outcome, paired = TRUE, ...)$p.value, error = function(e){NA}) } } count_table <- log(count_table+delta) - count.rel <- apply(count_table,2,function(x) x/sum(x)) + if(relative) count_table <- apply(count_table,2,function(x) x/sum(x)) - res <- data.frame(pval = apply(count.rel,1,tt)) + res <- data.frame(pval = apply(count_table,1,tt)) res$pval.adj <- p.adjust(res$pval, method = p.adj) # Teststat outcome.num <- as.numeric(as.factor(outcome))-1 @@ -29,7 +39,7 @@ DA.ltt <- function(count_table, outcome, delta = 1, testStat = function(case,con control <- x[outcome.num==0] testStat(case,control) } - res$FC <- apply(count.rel,1,testfun) + res$FC <- apply(count_table,1,testfun) res$Feature <- rownames(res) res$Method <- "Log t-test" diff --git a/R/DA.ltt2.R b/R/DA.ltt2.R index 371c6b7..d4f03d8 100644 --- a/R/DA.ltt2.R +++ b/R/DA.ltt2.R @@ -1,18 +1,28 @@ -#' Log t-test2 - +#' Welch t-test +#' +#' With log transformed relative abundances +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param delta Numeric. Pseudocount for log transformation. Default 0.001 +#' @param testStat Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1)) +#' @param testStat.pair Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1))) +#' @param ... Additional arguments for the t.test function #' @export -DA.ltt2 <- function(count_table, outcome, delta = 0.001, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, paired = NULL, p.adj){ +DA.ltt2 <- function(count_table, outcome,paired = NULL,p.adj = "fdr", delta = 0.001, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, ...){ tt <- function(x){ - tryCatch(t.test(x ~ outcome)$p.value, error = function(e){NA}) + tryCatch(t.test(x ~ outcome, ...)$p.value, error = function(e){NA}) } if(!is.null(paired)){ otu_table <- count_table[,order(paired)] outcome <- outcome[order(paired)] testStat <- testStat.pair tt <- function(x){ - tryCatch(t.test(x ~ outcome, paired = TRUE)$p.value, error = function(e){NA}) + tryCatch(t.test(x ~ outcome, paired = TRUE, ...)$p.value, error = function(e){NA}) } } diff --git a/R/DA.msf.R b/R/DA.msf.R index f028f15..9e760e4 100644 --- a/R/DA.msf.R +++ b/R/DA.msf.R @@ -1,19 +1,23 @@ #' MetagenomeSeq Feature model #' -#' From: +#' Implemented as in: #' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the fitFeatureModel function #' @export -DA.msf <- function(count_table, outcome, p.adj){ +DA.msf <- function(count_table, outcome, p.adj = "fdr", ...){ library(metagenomeSeq, quietly = TRUE) - otu_table <- as.data.frame.matrix(count_table) + count_table <- as.data.frame.matrix(count_table) mgsdata <- newMRexperiment(counts = count_table) mgsp <- cumNormStat(mgsdata) mgsdata <- cumNorm(mgsdata, mgsp) mod <- model.matrix(~outcome) - mgsfit <- metagenomeSeq::fitFeatureModel(obj=mgsdata,mod=mod) + mgsfit <- metagenomeSeq::fitFeatureModel(obj=mgsdata,mod=mod,...) temp_table <- MRtable(mgsfit, number=nrow(count_table)) temp_table <- temp_table[!is.na(row.names(temp_table)),] temp_table$Feature <- rownames(temp_table) diff --git a/R/DA.neb.R b/R/DA.neb.R index 20ae471..7552b33 100644 --- a/R/DA.neb.R +++ b/R/DA.neb.R @@ -1,9 +1,16 @@ #' Negative binomial glm #' +#' With log(librarySize) as offset. +#' Mixed-effect model is used when a paired argument is included, with the paired variable as a random intercept. +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the glm.nb/glmer.nb functions #' @import MASS lme4 #' @export -DA.neb <- function(count_table, outcome, paired = NULL, p.adj){ +DA.neb <- function(count_table, outcome, paired = NULL, p.adj = "fdr", ...){ library(MASS, quietly = TRUE) library(lme4, quietly = TRUE) @@ -15,7 +22,7 @@ DA.neb <- function(count_table, outcome, paired = NULL, p.adj){ negbin <- function(x){ fit <- NULL tryCatch( - fit <- glm.nb(x ~ outcome + offset(log(libSize))), + fit <- glm.nb(x ~ outcome + offset(log(libSize)), ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { @@ -27,7 +34,7 @@ DA.neb <- function(count_table, outcome, paired = NULL, p.adj){ negbin <- function(x){ fit <- NULL tryCatch( - fit <- glmer.nb(x ~ outcome + offset(log(libSize)) + (1|paired)), + fit <- glmer.nb(x ~ outcome + offset(log(libSize)) + (1|paired), ...), error = function(x) fit <- NULL) if(!is.null(fit)) { if(nrow(coef(summary(fit))) > 1) { diff --git a/R/DA.per.R b/R/DA.per.R index 4bcd57e..d0efdca 100644 --- a/R/DA.per.R +++ b/R/DA.per.R @@ -1,14 +1,23 @@ -#' Permutation test +#' Permutation test of user-defined test statistic #' #' Modified version of the one from: #' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8. #' P-values are now two-sided, and test statistic is a simple log fold change #' #' A paired permutation test is implemented specifically for this package. The test is similar to the original, but with a different test statistic and permutation scheme. The permutations are constrained in the paired version such that the outcome is only permuted within each level of the paired argument (e.g. subjects). The test statistic first finds the log-ratio between the two outcome levels (e.g. case and control) for each level of the paired argument and the final statistic is the mean of these log-ratios. - +#' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param testStat Function. Function for the test statistic. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1)) +#' @param testStat.pair Function. Function for test statistc for paired analysis. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1))) +#' @param noOfIterations Integer. Iterations for permutations. Default 10000 +#' @param margin Numeric. Margin for when to stop iterations if p-value is high and unlikely to become low #' @export -DA.per <- function(count_table, outcome, paired = NULL, noOfIterations = 10000, seed = as.numeric(Sys.time()), margin = 50, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, p.adj, relative = TRUE){ +DA.per <- function(count_table, outcome, paired = NULL, relative = TRUE, p.adj = "fdr", testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, noOfIterations = 10000, margin = 50){ if(!is.null(paired)){ count_table <- count_table[,order(paired)] @@ -23,7 +32,6 @@ DA.per <- function(count_table, outcome, paired = NULL, noOfIterations = 10000, count.rel <- count_table } - set.seed(seed) nullStatList <- list() # Create shuffled outcomes diff --git a/R/DA.rai.R b/R/DA.rai.R new file mode 100644 index 0000000..adef283 --- /dev/null +++ b/R/DA.rai.R @@ -0,0 +1,26 @@ +#' RAIDA +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param predictor Factor. The outcome of interest. E.g. case and control +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the raida function +#' @export + +DA.rai <- function(count_table, outcome, p.adj = "fdr", ...){ + + library(RAIDA, quietly = TRUE) + + count_table.o <- as.data.frame(count_table[,order(outcome)]) + + res <- raida(count_table.o, n.lib = as.numeric(table(outcome)), mtcm = p.adj, ...) + res$Feature <- rownames(res) + colnames(res)[1] <- "pval" + colnames(res)[2] <- "pval.adj" + res$Method <- "RAIDA" + + return(res) + +} + + + + diff --git a/R/DA.spe.R b/R/DA.spe.R new file mode 100644 index 0000000..b58aabe --- /dev/null +++ b/R/DA.spe.R @@ -0,0 +1,33 @@ +#' Spearman's Rank Correlation +#' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Numeric. The outcome of interest. E.g. case and control +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @export + +DA.spe <- function(count_table, outcome, relative = TRUE, p.adj = "fdr", ...){ + + if(relative){ + count.rel <- apply(count_table,2,function(x) x/sum(x)) + } else { + count.rel <- count_table + } + + spe <- function(x){ + tryCatch(cor.test(x,outcome, method = "spearman", ...)$p.value, error = function(e){NA}) + } + + spe.cor <- function(x){ + tryCatch(cor(x,outcome, method = "spearman"), error = function(e){NA}) + } + + res <- data.frame(pval = apply(count.rel,1,spe)) + res$pval.adj <- p.adjust(res$pval, method = p.adj) + res$rho <- apply(count.rel,1,spe.cor) + res$Feature <- rownames(res) + res$Method <- "Spearman" + + return(res) +} + diff --git a/R/DA.ttt.R b/R/DA.ttt.R index 9eed287..967c17a 100644 --- a/R/DA.ttt.R +++ b/R/DA.ttt.R @@ -1,11 +1,19 @@ -#' t-test +#' Welch t-test +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param testStat Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1)) +#' @param testStat.pair Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1))) +#' @param ... Additional arguments for the t.test function #' @export -DA.ttt <- function(count_table, outcome, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, paired = NULL, p.adj, relative = TRUE){ +DA.ttt <- function(count_table, outcome,paired = NULL, relative = TRUE, p.adj = "fdr", testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, ...){ tt <- function(x){ - tryCatch(t.test(x ~ outcome)$p.value, error = function(e){NA}) + tryCatch(t.test(x ~ outcome, ...)$p.value, error = function(e){NA}) } if(!is.null(paired)){ @@ -13,7 +21,7 @@ DA.ttt <- function(count_table, outcome, testStat = function(case,control){log(( outcome <- outcome[order(paired)] testStat <- testStat.pair tt <- function(x){ - tryCatch(t.test(x ~ outcome, paired = TRUE)$p.value, error = function(e){NA}) + tryCatch(t.test(x ~ outcome, paired = TRUE, ...)$p.value, error = function(e){NA}) } } diff --git a/R/DA.wil.R b/R/DA.wil.R index 40fd702..498030b 100644 --- a/R/DA.wil.R +++ b/R/DA.wil.R @@ -1,11 +1,19 @@ -#' Wilcox test +#' Wilcoxon Rank Sum and Signed Rank Test +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor. The outcome of interest. E.g. case and control +#' @param paired Factor. Subject ID for running paired analysis +#' @param relative Logical. Should count_table be normalized to relative abundances. Default TRUE +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param testStat Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1)) +#' @param testStat.pair Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1))) +#' @param ... Additional arguments for the wilcox.test function #' @export -DA.wil <- function(count_table, outcome, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, paired = NULL, p.adj, relative = TRUE){ +DA.wil <- function(count_table, outcome, paired = NULL, relative = TRUE, p.adj = "fdr", testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, ...){ wil <- function(x){ - tryCatch(wilcox.test(x ~ outcome)$p.value, error = function(e){NA}) + tryCatch(wilcox.test(x ~ outcome, ...)$p.value, error = function(e){NA}) } if(!is.null(paired)){ @@ -13,7 +21,7 @@ DA.wil <- function(count_table, outcome, testStat = function(case,control){log(( outcome <- outcome[order(paired)] testStat <- testStat.pair wil <- function(x){ - tryCatch(wilcox.test(x ~ outcome, paired = TRUE)$p.value, error = function(e){NA}) + tryCatch(wilcox.test(x ~ outcome, paired = TRUE, ...)$p.value, error = function(e){NA}) } } diff --git a/R/DA.zig.R b/R/DA.zig.R index 2f01e28..d482a74 100644 --- a/R/DA.zig.R +++ b/R/DA.zig.R @@ -1,11 +1,14 @@ #' MetageonomeSeq ZIG #' -#' From +#' Implemented as in: #' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 - +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param predictor Factor. The outcome of interest. E.g. case and control +#' @param p.adj Character. P-value adjustment. Default "fdr". See p.adjust for details +#' @param ... Additional arguments for the fitZig function #' @export -DA.zig <- function(count_table, outcome, p.adj){ +DA.zig <- function(count_table, outcome, p.adj = "fdr", ...){ library(metagenomeSeq, quietly = TRUE) @@ -14,7 +17,7 @@ DA.zig <- function(count_table, outcome, p.adj){ mgsp <- cumNormStat(mgsdata) mgsdata <- cumNorm(mgsdata, mgsp) mod <- model.matrix(~outcome) - mgsfit <- fitZig(obj=mgsdata,mod=mod) + mgsfit <- fitZig(obj=mgsdata,mod=mod, ...) temp_table <- MRtable(mgsfit, number=nrow(count_table)) temp_table <- temp_table[!is.na(row.names(temp_table)),] # Pvalue have different naming depending on package version diff --git a/R/allDA.R b/R/allDA.R index 752a578..870f303 100644 --- a/R/allDA.R +++ b/R/allDA.R @@ -3,27 +3,14 @@ #' Run many differential abundance tests at a time #' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns #' @param predictor Factor or Numeric. The outcome of interest. E.g. case and control. If the predictor has more than two levels, only the 2. level will be spiked. If the predictor is numeric it will be treated as such in the analyses -#' @param paired Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm" and "llm2" -#' @param relative Logical. Should abundances be made relative? Only has effect for "ttt", "wil", "per", "aov", "kru" and "lim". Default TRUE +#' @param paired Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm", "llm2", "lim", "lli" and "lli2" #' @param tests Character. Which tests to include. Default all (See below for details) +#' @param relative Logical. Should abundances be made relative? Only has effect for "ttt", "ltt", "wil", "per", "aov", "lao", "kru", "lim", "lli", "lrm", "llm" and "spe". Default TRUE #' @param cores Integer. Number of cores to use for parallel computing. Default one less than available #' @param rng.seed Numeric. Seed for reproducibility. Default 123 #' @param p.adj Character. Method for pvalue adjustment. Default "fdr" -#' @param delta1 Numeric. The pseudocount for the log t.test/log ANOVA/llm method. Default 1 -#' @param delta2 Numeric. The pseudocount for the log t.test2/log ANOVA2/llm2 method. Default 0.001 -#' @param noOfIterations Integer. How many iterations should be run for the permutation tests. Default 10000 -#' @param margin Integer. The margin of when to stop iterating for non-significant Features for the permutation tests. Default 50 -#' @param testStat Function. The test statistic function for the permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1)) -#' @param testStat.pair Function. The test statistic function for the paired permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1))) -#' @param mc.samples Integer. Monte Carlo samples for ALDEx2. Default 64 -#' @param sig Numeric. Alpha used in ANCOM. Default 0.05 -#' @param multcorr Integer. Correction used in ANCOM. Default 3 (no correction) -#' @param tau Numeric. Tuning parameter for ANCOM. Default 0.02 -#' @param theta Numeric. Tuning parameter for ANCOM. Default 0.1 -#' @param repeated Logical. Are there repeated measures? Only for ANCOM. Default FALSE -#' @param TMM.option 1 or 2. For "enn". Option "1" is for an approach using the mean of the effective library sizes as a reference library size in TMM normalization; option "2" represents an approach to regenerating counts with a common dispersion. Default 1 -#' @param log.lim Logical. For "lim". Should abundances be log transformed (before relative abundances if relative = TRUE). Default FALSE -#' @param delta.lim Pseudocount for "lim" log transformation. Default 1 +#' @param args List. A list with lists of arguments passed to the different methods. See details for more. +#' @param verbose Logical. Print information during run #' @details Currently implemented methods: #' \itemize{ #' \item per - Permutation test with user defined test statistic @@ -39,7 +26,6 @@ #' \item msf - MetagenomeSeq feature model #' \item zig - MetagenomeSeq zero-inflated gaussian #' \item ds2 - DESeq2 -#' \item enn - ENNB: Two-stage procedure from https://cals.arizona.edu/~anling/software.htm #' \item anc - ANCOM. This test does not output pvalues; for comparison with the other methods, detected Features are set to a pvalue of 0, all else are set to 1. #' \item lim - LIMMA. Moderated linear models based on emperical bayes #' \item kru - Kruskal-Wallis on relative abundances @@ -49,10 +35,45 @@ #' \item lrm - Linear regression on relative abundances #' \item llm - Linear regression, but reads are first transformed with log(abundance + delta1) then turned into relative abundances #' \item llm2 - Linear regression, but with relative abundances transformed with log(relative abundance + delta2) +#' \item rai - RAIDA +#' \item spe - Spearman correlation #' } -#' Is it too slow? Remove "anc" from test argument. #' -#' "per" is also somewhat slow, but is usually one of the methods performing well with large sample sizes. +#' Additional arguments can be passed to the internal functions with the "args" argument. +#' It should be structured as a list with elements named by the tests: +#' E.g. passing to the DA.per function that it should only run 1000 iterations: args = list(per=list(noOfIterations=1000)). +#' Include that the log t.test should use a pseudocount of 0.1: args = list(per=list(noOfIterations=1000), ltt=list(delta=0.1)). +#' Additional arguments are simply seperated by commas. +#' +#' Below is an overview of which functions get the arguments that are passed to a specific test +#' \itemize{ +#' \item per - Passed to DA.per +#' \item bay - Passed to getPriors and getLikelihoods +#' \item adx - Passed to aldex +#' \item wil - Passed to wilcox.test and DA.wil +#' \item ttt - Passed to t.test and DA.ttt +#' \item ltt - Passed to t.test and DA.ltt +#' \item ltt2 - Passed to t.test and DA.ltt2 +#' \item neb - Passed to glm.nb and glmer.nb +#' \item erq - Passed to exactTest +#' \item ere - Passed to glmQLFit +#' \item msf - Passed to fitFeatureModel +#' \item zig - Passed to fitZig +#' \item ds2 - Passed to DESeq +#' \item anc - Passed to ANCOM +#' \item lim - Passed to eBayes +#' \item lli - Passed to eBayes +#' \item lli2 - Passed to eBayes +#' \item kru - Passed to kruskal.test +#' \item aov - Passed to aov +#' \item lao - Passed to aov +#' \item lao2 - Passed to aov +#' \item lrm - Passed to lm and lme +#' \item llm - Passed to lm and lme +#' \item llm2 - Passed to lm and lme +#' \item rai - Passed to raida +#' \item spe - Passed to cor.test +#' } #' @return A list of results: #' \itemize{ #' \item table - Summary of results @@ -63,8 +84,10 @@ #' @importFrom parallel detectCores #' @export -allDA <- function(count_table, predictor, paired = NULL, relative = TRUE, tests = c("anc","per","bay","adx","enn","wil","ttt","ltt","ltt2","neb","erq","ere","msf","zig","ds2","lim","aov","lao","lao2","kru","lrm","llm","llm2"), cores = (detectCores()-1), rng.seed = 123, p.adj = "fdr", delta1 = 1, delta2 = 0.001, noOfIterations = 10000, margin = 50, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, mc.samples = 64, sig = 0.05, multcorr = 3, tau = 0.02, theta = 0.1, repeated = FALSE, TMM.option = 1, log.lim = FALSE, delta.lim = 1){ +allDA <- function(count_table, predictor, paired = NULL, tests = c("spe","anc","per","bay","adx","wil","ttt","ltt","ltt2","neb","erq","ere","msf","zig","ds2","lim","aov","lao","lao2","kru","lrm","llm","llm2","rai"), relative = TRUE, cores = (detectCores()-1), rng.seed = 123, p.adj = "fdr", args = list(), verbose = TRUE){ + # Checks + if(min(count_table) < 0 & is.numeric(predictor)) stop("Numeric predictor and negative values in count_table is currently not supported") if(sum(colSums(count_table) == 0) > 0) stop("Some samples are empty!") if(ncol(count_table) != length(predictor)) stop("Number of samples in count_table does not match length of predictor") if(length(levels(as.factor(predictor))) < 2) stop("Predictor should have at least two levels") @@ -74,36 +97,54 @@ allDA <- function(count_table, predictor, paired = NULL, relative = TRUE, tests library(foreach, quietly = TRUE) library(pROC, quietly = TRUE) - # Prune test argument + # Prune test argument if packages are not installed if(!"baySeq" %in% rownames(installed.packages())) tests <- tests[tests != "bay"] if(!"ALDEx2" %in% rownames(installed.packages())) tests <- tests[tests != "adx"] - if(!"MASS" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("neb","enn")] + if(!"MASS" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("neb")] if(!"lme4" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("neb")] - if(!"edgeR" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("ere","erq","enn")] + if(!"edgeR" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("ere","erq")] if(!"metagenomeSeq" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("msf","zig")] if(!"DESeq2" %in% rownames(installed.packages())) tests <- tests[tests != "ds2"] if(!"ancom.R" %in% rownames(installed.packages())) tests <- tests[tests != "anc"] - if(!"glmnet" %in% rownames(installed.packages())) tests <- tests[tests != "enn"] - if(!"limma" %in% rownames(installed.packages())) tests <- tests[tests != "lim"] + if(!"limma" %in% rownames(installed.packages())) tests <- tests[tests != "lim"] + if(!"RAIDA" %in% rownames(installed.packages())) tests <- tests[tests != "rai"] + # Excluded tests that do not work with a paired argument if(!is.null(paired)){ - tests <- tests[!tests %in% c("bay","adx","anc","enn","ere","msf","zig","aov","lao","lao2","kru")] + tests <- tests[!tests %in% c("bay","adx","anc","ere","msf","zig","aov","lao","lao2","kru","rai","spe")] } + # Only include some tests if there are more than two levels in predictor if(length(levels(as.factor(predictor))) > 2){ - tests <- tests[tests %in% c("neb","erq","ds2","lim","aov","lao","lao2","kru","lrm","llm","llm2")] + tests <- tests[tests %in% c("neb","erq","ds2","lim","lli2","aov","lao","lao2","kru","lrm","llm","llm2","spe")] } else { - tests <- tests[!tests %in% c("aov","lao","lao2","kru","lrm","llm","llm2")] + # Excluded tests if levels in predictor is exactly 2 + tests <- tests[!tests %in% c("aov","lao","lao2","kru","lrm","llm","llm2","spe")] } + # Only include specific tests if predictor is numeric if(is.numeric(predictor)){ - tests <- tests[tests %in% c("neb","erq","ds2","lim","lrm","llm","llm2")] + tests <- tests[tests %in% c("neb","erq","ds2","lim","lrm","llm","llm2","spe")] + } else { + # Exclude if not numeric + tests <- tests[!tests %in% c("spe")] } + # Exclude if relative is false + if(relative == FALSE){ + tests <- tests[!tests %in% c("ltt2","neb","erq","ere","msf","zig","bay","ds2","adx","anc","lli2","lao2","llm2","rai")] + } + + if(verbose){ + message(paste("Tests are run in the following order:")) + print(as.data.frame(tests)) + } + set.seed(rng.seed) - message(paste("Seed is set to",rng.seed)) + if(verbose) message(paste("Seed is set to",rng.seed)) # Remove Features not present in any samples + if(verbose) message(paste(sum(rowSums(count_table) == 0),"empty features removed")) count_table <- count_table[rowSums(count_table) > 0,] ### Run tests @@ -116,36 +157,50 @@ allDA <- function(count_table, predictor, paired = NULL, relative = TRUE, tests if(cores == 1) { registerDoSEQ() } else { - cl <- makeCluster(cores) + cl <- makeCluster(cores, outfile = "") registerDoSNOW(cl) } results <- foreach(i = tests, .export = noquote(paste0("DA.",tests)), .options.snow = opts) %dopar% { + # Extract test arguments + if(!all(names(args) %in% tests)) stop("One or more names in list with additional arguments does not match names of tests") + for(j in seq_along(args)){ + assign(paste0(names(args)[j],".args"),args[[j]],pos=1) + } + test.args <- paste0(tests,".args") + test.boo <- lapply(test.args,exists) + for(l in seq_along(test.args)){ + if(test.boo[l] == FALSE) assign(test.args[l], list(),pos=1) + } + res.sub <- switch(i, - wil = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,testStat,testStat.pair,paired, p.adj, relative)), - ttt = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,testStat,testStat.pair,paired, p.adj, relative)), - ltt = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,delta1,testStat,testStat.pair,paired, p.adj)), - ltt2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,delta2,testStat,testStat.pair,paired, p.adj)), - neb = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,paired, p.adj)), - erq = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,paired, p.adj)), - ere = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor, p.adj)), - msf = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor, p.adj)), - zig = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor, p.adj)), - ds2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,paired, p.adj)), - per = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,paired,noOfIterations,rng.seed,margin,testStat,testStat.pair, p.adj, relative)), - bay = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor, p.adj)), - adx = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,mc.samples, p.adj)), - enn = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,TMM.option,p.adj)), - anc = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,sig,multcorr, tau, theta, repeated)), - lim = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,p.adj,relative,paired,log.lim,delta.lim)), - kru = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor, p.adj, relative)), - aov = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor, p.adj, relative)), - lao = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,delta1, p.adj)), - lao2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,delta2, p.adj)), - lrm = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,paired, p.adj)), - llm = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,paired, p.adj, delta1)), - llm2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,predictor,paired, p.adj, delta2))) + wil = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),wil.args)), + ttt = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),ttt.args)), + ltt = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative, p.adj),ltt.args)), + ltt2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),ltt2.args)), + neb = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),neb.args)), + erq = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),erq.args)), + ere = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),ere.args)), + msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),msf.args)), + zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),zig.args)), + ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),ds2.args)), + per = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),per.args)), + bay = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),bay.args)), + adx = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand),adx.args)), + anc = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand),anc.args)), + lim = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative,p.adj),lim.args)), + lli = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative,p.adj),lli.args)), + lli2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,p.adj),lli2.args)), + kru = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, relative, p.adj),kru.args)), + aov = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, relative, p.adj),aov.args)), + lao = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,relative, p.adj),lao.args)), + lao2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),lao2.args)), + lrm = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),lrm.args)), + llm = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative, p.adj),llm.args)), + llm2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),llm2.args)), + rai = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,p.adj),rai.args)), + spe = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,relative,p.adj),spe.args))) res.sub[is.na(res.sub$pval),"pval"] <- 1 res.sub[is.na(res.sub$pval.adj),"pval.adj"] <- 1 @@ -179,8 +234,8 @@ allDA <- function(count_table, predictor, paired = NULL, relative = TRUE, tests Pos.adj <- sapply(results,function(x) x[x$pval.adj < 0.05,"Feature"]) features <- row.names(count_table) - counts <- sapply(features, function(y) sum(unlist(sapply(Pos.raw, function(x) x %in% y)))) / length(tests) - counts.adj <- sapply(features, function(y) sum(unlist(sapply(Pos.adj, function(x) x %in% y)))) / length(tests) + counts <- sapply(features, function(y) sum(unlist(sapply(Pos.raw, function(x) x %in% y)))) / length(results) + counts.adj <- sapply(features, function(y) sum(unlist(sapply(Pos.adj, function(x) x %in% y)))) / length(results) # Combine and return df.combined <- data.frame(Feature = features, diff --git a/R/spikein.R b/R/spikein.R index 347f25c..0824fa8 100644 --- a/R/spikein.R +++ b/R/spikein.R @@ -1,15 +1,22 @@ #' Spike-in #' +#' Internal function for the testDA function. +#' #' Modified version of the one from: -#' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 - +#' https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8. +#' +#' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns +#' @param outcome Factor or Numeric. The outcome of interest. E.g. case and control. If the predictor has more than two levels, only the 2. level will be spiked. If the predictor is numeric it will be treated as such in the analyses +#' @param spikeMethod Character. Multiplicative ("mult") or additive ("add") spike-in. Default "mult". Use "add" if there are negative values in count_table +#' @param effectSize Integer. The effect size for the spike-ins. Default 2 +#' @param k Vector of length 3. Number of Features to spike in each tertile (lower, mid, upper). k=c(5,10,15): 5 features spiked in low abundance tertile, 10 features spiked in mid abundance tertile and 15 features spiked in high abundance tertile. Default c(5,5,5) +#' @param num.pred Logical. Is the outcome numeric? Default FALSE #' @export -spikein <- function(count_table, outcome, spikeMethod = "mult", effectSize = 2, k, relative = TRUE, num.pred = FALSE){ +spikein <- function(count_table, outcome, spikeMethod = "mult", effectSize = 2, k, num.pred = FALSE){ - if(relative == FALSE & spikeMethod == "mult") stop("Cannot use multiplicative spike-in if relative is FALSE") if(num.pred == TRUE & spikeMethod == "add") stop("Cannot use additive spike-in if predictor is numeric") - + if(effectSize < 0) stop("Effect size should be positive") if(effectSize == 1) spikeMethod <- "none" count_table <- as.data.frame(count_table) diff --git a/R/testDA.R b/R/testDA.R index 97703dd..2f01b51 100644 --- a/R/testDA.R +++ b/R/testDA.R @@ -4,30 +4,17 @@ #' @param count_table Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns #' @param predictor Factor or Numeric. The outcome of interest. E.g. case and control. If the predictor has more than two levels, only the 2. level will be spiked. If the predictor is numeric it will be treated as such in the analyses #' @param R Integer. Number of times to run the tests. Default 10 -#' @param paired Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm" and "llm2" -#' @param relative Logical. Should abundances be made relative? Only has effect for "ttt", "wil", "per", "aov", "kru" and "lim". Default TRUE +#' @param paired Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm", "llm2", "lim", "lli" and "lli2" #' @param tests Character. Which tests to include. Default all (See below for details) -#' @param spikeMethod Character. Multiplicative ("mult") or additive ("add") spike-in. Default "mult". Use "add" if relative = TRUE +#' @param relative Logical. Should abundances be made relative? Only has effect for "ttt", "ltt", "wil", "per", "aov", "lao", "kru", "lim", "lli", "lrm", "llm" and "spe". Default TRUE +#' @param spikeMethod Character. Multiplicative ("mult") or additive ("add") spike-in. Default "mult". Use "add" if count_table contains negative counts. #' @param effectSize Integer. The effect size for the spike-ins. Default 2 #' @param k Vector of length 3. Number of Features to spike in each tertile (lower, mid, upper). k=c(5,10,15): 5 features spiked in low abundance tertile, 10 features spiked in mid abundance tertile and 15 features spiked in high abundance tertile. Default c(5,5,5) -#' @param cores Integer. Number of cores to use for parallel computing. Default one less than available +#' @param cores Integer. Number of cores to use for parallel computing. Default one less than available. Set to 1 for sequential computing. #' @param rng.seed Numeric. Seed for reproducibility. Default 123 #' @param p.adj Character. Method for pvalue adjustment. Default "fdr" (Does not affect AUC, FPR or Spike.detect.rate, these use raw p-values) -#' @param delta1 Numeric. The pseudocount for the log t.test/log ANOVA/llm method. Default 1 -#' @param delta2 Numeric. The pseudocount for the log t.test2/log ANOVA2/lmm2 method. Default 0.001 -#' @param noOfIterations Integer. How many iterations should be run for the permutation test. Default 10000 -#' @param margin Integer. The margin of when to stop iterating for non-significant Features for the permutation test. Default 50 -#' @param testStat Function. The test statistic function for the permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1)) -#' @param testStat.pair Function. The test statistic function for the paired permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1))) -#' @param mc.samples Integer. Monte Carlo samples for ALDEx2. Default 64 -#' @param sig Numeric. Alpha used in ANCOM. Default 0.05 -#' @param multcorr Integer. Correction used in ANCOM. Default 3 (no correction) -#' @param tau Numeric. Tuning parameter for ANCOM. Default 0.02 -#' @param theta Numeric. Tuning parameter for ANCOM. Default 0.1 -#' @param repeated Logical. Are there repeated measures? Only for ANCOM. Default FALSE -#' @param TMM.option 1 or 2. For "enn". Option "1" is for an approach using the mean of the effective library sizes as a reference library size in TMM normalization; option "2" represents an approach to regenerating counts with a common dispersion. Default 1 -#' @param log.lim Logical. For "lim". Should abundances be log transformed (before relative abundances if relative = TRUE). Default FALSE -#' @param delta.lim Pseudocount for "lim" log transformation. Default 1 +#' @param args List. A list with lists of arguments passed to the different methods. See details for more. +#' @param verbose Logical. Print information during run #' @details Currently implemented methods: #' \itemize{ #' \item per - Permutation test with user defined test statistic @@ -43,9 +30,10 @@ #' \item msf - MetagenomeSeq feature model #' \item zig - MetagenomeSeq zero-inflated gaussian #' \item ds2 - DESeq2 -#' \item enn - ENNB: Two-stage procedure from https://cals.arizona.edu/~anling/software.htm #' \item anc - ANCOM. This test does not output pvalues; for comparison with the other methods, detected Features are set to a pvalue of 0, all else are set to 1. #' \item lim - LIMMA. Moderated linear models based on emperical bayes +#' \item lli - LIMMA, but reads are first transformed with log(abundance + delta1) then turned into relative abundances +#' \item lli2 - LIMMA, but with relative abundances transformed with log(relative abundance + delta2) #' \item kru - Kruskal-Wallis on relative abundances #' \item aov - ANOVA on relative abundances #' \item lao - ANOVA, but reads are first transformed with log(abundance + delta1) then turned into relative abundances @@ -53,10 +41,49 @@ #' \item lrm - Linear regression on relative abundances #' \item llm - Linear regression, but reads are first transformed with log(abundance + delta1) then turned into relative abundances #' \item llm2 - Linear regression, but with relative abundances transformed with log(relative abundance + delta2) +#' \item rai - RAIDA +#' \item spe - Spearman correlation #' } #' Is it too slow? Remove "anc" from test argument. +#' "neb" is slow if there is a paired argument. #' #' "per" is also somewhat slow, but is usually one of the methods performing well with large sample sizes. +#' +#' Additional arguments can be passed to the internal functions with the "args" argument. +#' It should be structured as a list with elements named by the tests: +#' E.g. passing to the DA.per function that it should only run 1000 iterations: args = list(per=list(noOfIterations=1000)). +#' Include that the log t.test should use a pseudocount of 0.1: args = list(per=list(noOfIterations=1000), ltt=list(delta=0.1)). +#' Additional arguments are simply seperated by commas. +#' +#' Below is an overview of which functions get the arguments that are passed to a specific test +#' \itemize{ +#' \item per - Passed to DA.per +#' \item bay - Passed to getPriors and getLikelihoods +#' \item adx - Passed to aldex +#' \item wil - Passed to wilcox.test and DA.wil +#' \item ttt - Passed to t.test and DA.ttt +#' \item ltt - Passed to t.test and DA.ltt +#' \item ltt2 - Passed to t.test and DA.ltt2 +#' \item neb - Passed to glm.nb and glmer.nb +#' \item erq - Passed to exactTest +#' \item ere - Passed to glmQLFit +#' \item msf - Passed to fitFeatureModel +#' \item zig - Passed to fitZig +#' \item ds2 - Passed to DESeq +#' \item anc - Passed to ANCOM +#' \item lim - Passed to eBayes +#' \item lli - Passed to eBayes +#' \item lli2 - Passed to eBayes +#' \item kru - Passed to kruskal.test +#' \item aov - Passed to aov +#' \item lao - Passed to aov +#' \item lao2 - Passed to aov +#' \item lrm - Passed to lm and lme +#' \item llm - Passed to lm and lme +#' \item llm2 - Passed to lm and lme +#' \item rai - Passed to raida +#' \item spe - Passed to cor.test +#' } #' @return An object of class DA, which contains a list of results: #' \itemize{ #' \item table - FPR, AUC and spike detection rate for each run @@ -67,45 +94,69 @@ #' @importFrom parallel detectCores #' @export -testDA <- function(count_table, predictor, R = 10, paired = NULL, relative = TRUE, tests = c("anc","per","bay","adx","enn","wil","ttt","ltt","ltt2","neb","erq","ere","msf","zig","ds2","lim","aov","lao","lao2","kru","lrm","llm","llm2"), spikeMethod = "mult", effectSize = 2, k = c(5,5,5), cores = (detectCores()-1), rng.seed = 123, p.adj = "fdr", delta1 = 1, delta2 = 0.001, noOfIterations = 10000, margin = 50, testStat = function(case,control){log((mean(case)+1)/(mean(control)+1))}, testStat.pair = function(case,control){mean(log((case+1)/(control+1)))}, mc.samples = 64, sig = 0.05, multcorr = 3, tau = 0.02, theta = 0.1, repeated = FALSE, TMM.option = 1, log.lim = FALSE, delta.lim = 1){ +testDA <- function(count_table, predictor, R = 10, paired = NULL, tests = c("spe","anc","per","bay","adx","wil","ttt","ltt","ltt2","neb","erq","ere","msf","zig","ds2","lim","lli","lli2","aov","lao","lao2","kru","lrm","llm","llm2","rai"), relative = TRUE, spikeMethod = "mult", effectSize = 2, k = c(5,5,5), cores = (detectCores()-1), rng.seed = 123, p.adj = "fdr", args = list(), verbose = FALSE){ library(foreach, quietly = TRUE) + # Checks + if(min(count_table) < 0 & is.numeric(predictor)) stop("Numeric predictor and negative values in count_table is currently not supported") + if(min(count_table) < 0 & spikeMethod == "mult") stop("Additive spike-in should be used when count_table contains negative values") if(sum(colSums(count_table) == 0) > 0) stop("Some samples are empty!") if(ncol(count_table) != length(predictor)) stop("Number of samples in count_table does not match length of predictor") if(length(levels(as.factor(predictor))) < 2) stop("Predictor should have at least two levels") - # Prune test argument + # Prune test argument if packages are not installed if(!"baySeq" %in% rownames(installed.packages())) tests <- tests[tests != "bay"] if(!"ALDEx2" %in% rownames(installed.packages())) tests <- tests[tests != "adx"] - if(!"MASS" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("neb","enn")] + if(!"MASS" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("neb")] if(!"lme4" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("neb")] - if(!"edgeR" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("ere","erq","enn")] + if(!"edgeR" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("ere","erq")] if(!"metagenomeSeq" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("msf","zig")] if(!"DESeq2" %in% rownames(installed.packages())) tests <- tests[tests != "ds2"] if(!"ancom.R" %in% rownames(installed.packages())) tests <- tests[tests != "anc"] - if(!"glmnet" %in% rownames(installed.packages())) tests <- tests[tests != "enn"] if(!"limma" %in% rownames(installed.packages())) tests <- tests[tests != "lim"] - + if(!"RAIDA" %in% rownames(installed.packages())) tests <- tests[tests != "rai"] + + # Excluded tests that do not work with a paired argument if(!is.null(paired)){ - tests <- tests[!tests %in% c("bay","adx","anc","enn","ere","msf","zig","aov","lao","lao2","kru")] + tests <- tests[!tests %in% c("bay","adx","anc","ere","msf","zig","aov","lao","lao2","kru","rai","spe")] } + # Only include some tests if there are more than two levels in predictor if(length(levels(as.factor(predictor))) > 2){ - tests <- tests[tests %in% c("neb","erq","ds2","lim","aov","lao","lao2","kru","lrm","llm","llm2")] + tests <- tests[tests %in% c("neb","erq","ds2","lim","lli2","aov","lao","lao2","kru","lrm","llm","llm2","spe")] } else { - tests <- tests[!tests %in% c("aov","lao","lao2","kru","lrm","llm","llm2")] + # Excluded tests if levels in predictor is exactly 2 + tests <- tests[!tests %in% c("aov","lao","lao2","kru","lrm","llm","llm2","spe")] } + # Only include specific tests if predictor is numeric if(is.numeric(predictor)){ - tests <- tests[tests %in% c("neb","erq","ds2","lim","lrm","llm","llm2")] + tests <- tests[tests %in% c("neb","erq","ds2","lim","lrm","llm","llm2","spe")] + } else { + # Exclude if not numeric + tests <- tests[!tests %in% c("spe")] + } + + # Exclude if relative is false + if(relative == FALSE){ + tests <- tests[!tests %in% c("ltt2","neb","erq","ere","msf","zig","bay","ds2","adx","anc","lli2","lao2","llm2","rai")] } + if(verbose){ + message(paste("Tests are run in the following order:")) + print(as.data.frame(tests)) + } + set.seed(rng.seed) - message(paste("Seed is set to",rng.seed)) + if(verbose) message(paste("Seed is set to",rng.seed)) + + # Remove Features not present in any samples + if(verbose) message(paste(sum(rowSums(count_table) == 0),"empty features removed")) + count_table <- count_table[rowSums(count_table) > 0,] final.results <- foreach(r = 1:R) %do% { - + library(parallel, quietly = TRUE) library(doSNOW, quietly = TRUE) library(pROC, quietly = TRUE) @@ -120,16 +171,14 @@ testDA <- function(count_table, predictor, R = 10, paired = NULL, relative = TRU rand <- unsplit(lapply(split(predictor,paired), sample), paired) } - # Remove Features not present in any samples - count_table <- count_table[rowSums(count_table) > 0,] - # Spikein if(is.numeric(predictor)){ num.pred <- TRUE + if(verbose) print("Predictor is assumed to be numeric") } else { num.pred <- FALSE } - spiked <- spikein(count_table, rand, spikeMethod, effectSize, k, relative, num.pred) + spiked <- spikein(count_table, rand, spikeMethod, effectSize, k, num.pred) count_table <- spiked[[1]] ### Run tests @@ -142,36 +191,50 @@ testDA <- function(count_table, predictor, R = 10, paired = NULL, relative = TRU if(cores == 1) { registerDoSEQ() } else { - cl <- makeCluster(cores) + cl <- makeCluster(cores, outfile = "") registerDoSNOW(cl) } - results <- foreach(i = tests, .export = noquote(paste0("DA.",tests)), .options.snow = opts) %dopar% { + results <- foreach(i = tests , .options.snow = opts) %dopar% { - res.sub <- switch(i, - wil = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,testStat,testStat.pair,paired, p.adj, relative)), - ttt = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,testStat,testStat.pair,paired, p.adj, relative)), - ltt = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,delta1,testStat,testStat.pair,paired, p.adj)), - ltt2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,delta2,testStat,testStat.pair,paired, p.adj)), - neb = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,paired, p.adj)), - erq = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,paired, p.adj)), - ere = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand, p.adj)), - msf = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand, p.adj)), - zig = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand, p.adj)), - ds2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,paired, p.adj)), - per = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,paired,noOfIterations,rng.seed,margin,testStat,testStat.pair, p.adj, relative)), - bay = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand, p.adj)), - adx = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,mc.samples, p.adj)), - enn = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,TMM.option,p.adj)), - anc = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,sig,multcorr, tau, theta, repeated)), - lim = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,p.adj,relative,paired,log.lim,delta.lim)), - kru = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand, p.adj, relative)), - aov = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand, p.adj, relative)), - lao = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,delta1, p.adj)), - lao2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,delta2, p.adj)), - lrm = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,paired, p.adj)), - llm = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,paired, p.adj, delta1)), - llm2 = do.call(get(noquote(paste0("DA.",i))),list(count_table,rand,paired, p.adj, delta2))) + # Extract test arguments + if(!all(names(args) %in% tests)) stop("One or more names in list with additional arguments does not match names of tests") + for(j in seq_along(args)){ + assign(paste0(names(args)[j],".args"),args[[j]],pos=1) + } + test.args <- paste0(tests,".args") + test.boo <- lapply(test.args,exists) + for(l in seq_along(test.args)){ + if(test.boo[l] == FALSE) assign(test.args[l], list(),pos=1) + } + + res.sub <- switch(i, + wil = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),wil.args)), + ttt = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),ttt.args)), + ltt = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative, p.adj),ltt.args)), + ltt2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),ltt2.args)), + neb = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),neb.args)), + erq = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),erq.args)), + ere = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),ere.args)), + msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),msf.args)), + zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),zig.args)), + ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),ds2.args)), + per = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),per.args)), + bay = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),bay.args)), + adx = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand),adx.args)), + anc = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand),anc.args)), + lim = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative,p.adj),lim.args)), + lli = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative,p.adj),lli.args)), + lli2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,p.adj),lli2.args)), + kru = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, relative, p.adj),kru.args)), + aov = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, relative, p.adj),aov.args)), + lao = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,relative, p.adj),lao.args)), + lao2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand, p.adj),lao2.args)), + lrm = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, relative, p.adj),lrm.args)), + llm = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired,relative, p.adj),llm.args)), + llm2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,paired, p.adj),llm2.args)), + rai = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,p.adj),rai.args)), + spe = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,rand,relative,p.adj),spe.args))) res.sub[is.na(res.sub$pval),"pval"] <- 1 res.sub[is.na(res.sub$pval.adj),"pval.adj"] <- 1 diff --git a/README.md b/README.md index 6de07bc..20af16e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ This is a package for comparing different differential abundance methods used in microbial marker-gene (e.g. 16S rRNA), RNA-seq and protein abundance analysis. -The methodology goes as follows: +The method goes as follows: - Shuffle predictor variable - Spike in data for some randomly chosen features, such that they are @@ -40,13 +40,24 @@ But the package will work without them ANCOM has to be installed from an [external source.](https://www.niehs.nih.gov/research/resources/software/biostatistics/ancom/index.cfm) ++ It depends on: shiny, doParallel, methods, stringr, exactRankTests and +openxlsx + +RAIDA has to be installed from an [external +source.](https://cals.arizona.edu/~anling/software/) + It depends on: +MASS, protoclust, qvalue (biocLite("qvalue")) and limma +(biocLite("limma")) How to compare methods: ----------------------- -A good method has a "False Positive Rate" (FPR) at ~0.05 or below, an -"Area Under the (Receiver Operator) Curve" (AUC) as high as possible, -and a "Spike Detection Rate" (Spike.detect.rate) as high as possible. +First, all methods with a "False Positive Rate" (FPR) at ~0.05 or below +can safely be used. + +Second, among methods with a low FPR, those with a high "Area Under the +(Receiver Operator) Curve" (AUC) and "Spike Detection Rate" +(Spike.detect.rate) are likely to have more power to detect signal in +the respective dataset. **Run the test:** @@ -66,7 +77,7 @@ the p-value associated with the second level is used. predictor can also be numeric, in which case it is spiked as followed: newAbundances = oldAbundances \* (effectSize ^ predictor) -*The tests can be run in a paired version:* +#### *The tests can be run in a paired version:* E.g. if SubjectID is a factor denoting the pairing of the samples (in the same order as columns in the count\_table): @@ -76,31 +87,20 @@ the same order as columns in the count\_table): When a *paired* argument is provided, the predictor is shuffled within the levels of the *paired* factor. -*Or without relative abundances, e.g. for normalized protein abundance:* +#### *Or without relative abundances, e.g. for normalized protein abundance:* - mytest <- testDA(count_table,predictor,relative=FALSE,spikeMethod="add",tests=c("ttt","lim","wil","per"),testStat = function(case,control) {mean(case)-mean(control)}) + mytest <- testDA(count_table,predictor,relative=FALSE) **Plot the output:** plot(mytest, sort = "AUC") -Plot the p-value distributions. Raw p-values should have a uniform -(flat) distribution between 0 and 1. If adj = TRUE, adjusted p-values -will be plotted, and there should not be any below 0.05 (assuming an -alpha = 0.05). - - plot(mytest, p = TRUE) - **Print the output:** Medians for each method: summary(mytest, sort = "AUC") -Results from all the runs: - - print(mytest) - How to run real data: --------------------- @@ -151,7 +151,6 @@ Implemented methods: [DESeq2](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8) (The paired version is a model with the paired variable as covariate) -- enn - [ENNB](https://cals.arizona.edu/~anling/software.htm) - anc - [ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277) - lim - [LIMMA](https://link.springer.com/chapter/10.1007%2F0-387-29362-0_23?LI=true) @@ -166,6 +165,9 @@ Implemented methods: log(abundance + delta) then turned into relative abundances - llm2 - Linear regression, but with relative abundances transformed with log(relative abundance + delta) +- rai - + [RAIDA](https://academic.oup.com/bioinformatics/article/31/14/2269/256302/A-robust-approach-for-identifying-differentially?searchresult=1) +- spe - Spearman Rank Correlation ### Paired permutation test @@ -177,3 +179,75 @@ paired argument (e.g. subjects). The test statistic first finds the log-ratio between the two outcome levels (e.g. case and control) for each level of the paired argument and the final statistic is the mean of these log-ratios. + +Extra features +-------------- + +Plot the p-value distributions. Raw p-values should have a uniform +(flat) distribution between 0 and 1. + + plot(mytest, p = TRUE) + +Results from all the runs: + + print(mytest) + +See the output from the individual methods. E.g. "ere" first run: + + View(mytest$results[[1]]["ere"]) + +#### Passing arguments to the different tests + +Additional arguments can be passed to the internal functions with the +"args" argument. It should be structured as a list with elements named +by the tests: + +E.g. passing to the DA.per function that it should only run 1000 +iterations: + + mytest <- testDA(...,args = list(per=list(noOfIterations=1000))) + +Include that the log t.test should use a pseudocount of 0.1: + + mytest <- testDA(...,args = list(per=list(noOfIterations=1000), ltt=list(delta=0.1))) + +Additional arguments are simply seperated by commas. + +Below is an overview of which functions get the arguments that are +passed to a specific test: + +- per - Passed to DA.per +- bay - Passed to getPriors and getLikelihoods +- adx - Passed to aldex +- wil - Passed to wilcox.test and DA.wil +- ttt - Passed to t.test and DA.ttt +- ltt - Passed to t.test and DA.ltt +- ltt2 - Passed to t.test and DA.ltt2 +- neb - Passed to glm.nb and glmer.nb +- erq - Passed to exactTest +- ere - Passed to glmQLFit +- msf - Passed to fitFeatureModel +- zig - Passed to fitZig +- ds2 - Passed to DESeq +- anc - Passed to ANCOM +- lim - Passed to eBayes +- lli - Passed to eBayes +- lli2 - Passed to eBayes +- kru - Passed to kruskal.test +- aov - Passed to aov +- lao - Passed to aov +- lao2 - Passed to aov +- lrm - Passed to lm and lme +- llm - Passed to lm and lme +- llm2 - Passed to lm and lme +- rai - Passed to raida +- spe - Passed to cor.test + +Errors and issues +----------------- + +If a method fails the following is usually printed: *Error in { : task 1 +failed - "task X failed"*. To find the method corresponding to X, run +the function again with *verbose = TRUE*. This will print the order of +the tests, and test number X can then be excluded from the *tests* +argument. diff --git a/man/DA.adx.Rd b/man/DA.adx.Rd index 2426607..2421543 100644 --- a/man/DA.adx.Rd +++ b/man/DA.adx.Rd @@ -4,7 +4,14 @@ \alias{DA.adx} \title{Aldex t.test and wilcox} \usage{ -DA.adx(count_table, outcome, mc.samples = 128, p.adj) +DA.adx(count_table, outcome, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{...}{Additional arguments for the aldex function} } \description{ Aldex t.test and wilcox diff --git a/man/DA.anc.Rd b/man/DA.anc.Rd index 602269d..26acd1f 100644 --- a/man/DA.anc.Rd +++ b/man/DA.anc.Rd @@ -4,8 +4,14 @@ \alias{DA.anc} \title{ANCOM} \usage{ -DA.anc(count_table, outcome, sig = 0.05, multcorr = 3, tau = 0.02, - theta = 0.1, repeated = FALSE) +DA.anc(count_table, outcome, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{...}{Additional arguments for the ANCOM function} } \description{ ANCOM diff --git a/man/DA.aov.Rd b/man/DA.aov.Rd index b9632e5..8978737 100644 --- a/man/DA.aov.Rd +++ b/man/DA.aov.Rd @@ -4,7 +4,18 @@ \alias{DA.aov} \title{ANOVA} \usage{ -DA.aov(count_table, outcome, p.adj, relative) +DA.aov(count_table, outcome, relative = TRUE, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the aov function} } \description{ ANOVA diff --git a/man/DA.bay.Rd b/man/DA.bay.Rd index 7f05ffc..44a954f 100644 --- a/man/DA.bay.Rd +++ b/man/DA.bay.Rd @@ -4,9 +4,32 @@ \alias{DA.bay} \title{baySeq} \usage{ -DA.bay(count_table, outcome, p.adj) +DA.bay(count_table, outcome, p.adj = "fdr", samplesize = 1e+05, + samplingSubset = NULL, equalDispersions = TRUE, estimation = "QL", + zeroML = FALSE, consensus = FALSE, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{samplesize}{How large a sample should be taken in estimating the priors? Default 1e5} + +\item{samplingSubset}{If given, the priors will be sampled only from the subset specified. Default NULL} + +\item{equalDispersions}{Should we assume equal dispersions of data across all groups in the 'cD' object? Defaults to TRUE} + +\item{estimation}{Defaults to "QL", indicating quasi-likelihood estimation of priors. Currently, the only other possibilities are "ML", a maximum-likelihood method, and "edgeR", the moderated dispersion estimates produced by the 'edgeR' package} + +\item{zeroML}{Should parameters from zero data (rows that within a group are all zeros) be estimated using maximum likelihood methods (which will result in zeros in the parameters?} + +\item{consensus}{If TRUE, creates a consensus distribution rather than a separate distribution for each member of the groups structure in the 'cD' object} + +\item{...}{Additional arguments to the getLikelihoods function} } \description{ -From +Implemented as in: https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 } diff --git a/man/DA.ds2.Rd b/man/DA.ds2.Rd index 5ffa6fd..641c201 100644 --- a/man/DA.ds2.Rd +++ b/man/DA.ds2.Rd @@ -4,10 +4,21 @@ \alias{DA.ds2} \title{DESeq2} \usage{ -DA.ds2(count_table, outcome, paired = NULL, p.adj) +DA.ds2(count_table, outcome, paired = NULL, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the DESeq function} } \description{ -From: -https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 +Implemented as in: +https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8. Manual geometric means calculated to avoid errors, see https://github.com/joey711/phyloseq/issues/387 } diff --git a/man/DA.enn.Rd b/man/DA.enn.Rd deleted file mode 100644 index d97f9c6..0000000 --- a/man/DA.enn.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DA.enn.R -\name{DA.enn} -\alias{DA.enn} -\title{ENNB from https://cals.arizona.edu/~anling/} -\usage{ -DA.enn(count_table, outcome, TMM.option = 1, p.adj) -} -\description{ -ENNB from https://cals.arizona.edu/~anling/ -} diff --git a/man/DA.ere.Rd b/man/DA.ere.Rd index 477c931..e6cc7e5 100644 --- a/man/DA.ere.Rd +++ b/man/DA.ere.Rd @@ -2,11 +2,20 @@ % Please edit documentation in R/DA.ere.R \name{DA.ere} \alias{DA.ere} -\title{EdgeR} +\title{EdgeR exact test} \usage{ -DA.ere(count_table, outcome, p.adj) +DA.ere(count_table, outcome, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the exactTest function} } \description{ -From: +Implemented as in: https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 } diff --git a/man/DA.erq.Rd b/man/DA.erq.Rd index fdfdb28..57ed721 100644 --- a/man/DA.erq.Rd +++ b/man/DA.erq.Rd @@ -4,7 +4,18 @@ \alias{DA.erq} \title{EdgeR quasi-likelihood} \usage{ -DA.erq(count_table, outcome, paired = NULL, p.adj) +DA.erq(count_table, outcome, paired = NULL, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the glmQLFit function} } \description{ EdgeR quasi-likelihood diff --git a/man/DA.kru.Rd b/man/DA.kru.Rd index 7f00ffd..83ed61d 100644 --- a/man/DA.kru.Rd +++ b/man/DA.kru.Rd @@ -4,7 +4,18 @@ \alias{DA.kru} \title{Kruskal-Wallis test} \usage{ -DA.kru(count_table, outcome, p.adj, relative = TRUE) +DA.kru(count_table, outcome, relative = TRUE, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the kruskal.test function} } \description{ Kruskal-Wallis test diff --git a/man/DA.lao.Rd b/man/DA.lao.Rd index ddbd0d6..9034c86 100644 --- a/man/DA.lao.Rd +++ b/man/DA.lao.Rd @@ -2,10 +2,24 @@ % Please edit documentation in R/DA.lao.R \name{DA.lao} \alias{DA.lao} -\title{Log ANOVA} +\title{ANOVA} \usage{ -DA.lao(count_table, outcome, delta = 1, p.adj) +DA.lao(count_table, outcome, relative = TRUE, p.adj = "fdr", delta = 1, + ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for the log transformation. Default 1} + +\item{...}{Additional arguments for the aov functions} } \description{ -Log ANOVA +With log transformation of counts before normalization. } diff --git a/man/DA.lao2.Rd b/man/DA.lao2.Rd index 938c636..82001ba 100644 --- a/man/DA.lao2.Rd +++ b/man/DA.lao2.Rd @@ -2,10 +2,21 @@ % Please edit documentation in R/DA.lao2.R \name{DA.lao2} \alias{DA.lao2} -\title{Log ANOVA 2} +\title{ANOVA} \usage{ -DA.lao2(count_table, outcome, delta = 0.001, p.adj) +DA.lao2(count_table, outcome, p.adj = "fdr", delta = 0.001, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for the log transformation. Default 0.001} + +\item{...}{Additional arguments for the aov functions} } \description{ -Log ANOVA 2 +With log transformation of relative abundances. } diff --git a/man/DA.lim.Rd b/man/DA.lim.Rd index 5603a8c..06913b9 100644 --- a/man/DA.lim.Rd +++ b/man/DA.lim.Rd @@ -4,10 +4,21 @@ \alias{DA.lim} \title{LIMMA} \usage{ -DA.lim(count_table, outcome, p.adj, relative, paired = NULL, log = FALSE, - delta = 1) +DA.lim(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} } \description{ -Some is borrowed from: +Some implementation is borrowed from: http://www.biostat.jhsph.edu/~kkammers/software/eupa/R_guide.html } diff --git a/man/DA.lli.Rd b/man/DA.lli.Rd new file mode 100644 index 0000000..fa80ce3 --- /dev/null +++ b/man/DA.lli.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DA.lli.R +\name{DA.lli} +\alias{DA.lli} +\title{LIMMA} +\usage{ +DA.lli(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", delta = 1, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for log transformation. Default 1} +} +\description{ +With log transformation for counts before normalization. +Some implementation is borrowed from: +http://www.biostat.jhsph.edu/~kkammers/software/eupa/R_guide.html +} diff --git a/man/DA.lli2.Rd b/man/DA.lli2.Rd new file mode 100644 index 0000000..31afcb3 --- /dev/null +++ b/man/DA.lli2.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DA.lli2.R +\name{DA.lli2} +\alias{DA.lli2} +\title{LIMMA} +\usage{ +DA.lli2(count_table, outcome, paired = NULL, p.adj = "fdr", delta = 0.001, + ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for log transformation. Default 0.001} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} +} +\description{ +With log transformation of relative abundances. +Some implementation is borrowed from: +http://www.biostat.jhsph.edu/~kkammers/software/eupa/R_guide.html +} diff --git a/man/DA.llm.Rd b/man/DA.llm.Rd index 216e859..0db41c3 100644 --- a/man/DA.llm.Rd +++ b/man/DA.llm.Rd @@ -2,10 +2,27 @@ % Please edit documentation in R/DA.llm.R \name{DA.llm} \alias{DA.llm} -\title{Linear regression - log} +\title{Linear regression} \usage{ -DA.llm(count_table, outcome, paired = NULL, p.adj, delta = 1) +DA.llm(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", delta = 1, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for the log transformation. Default 1} + +\item{...}{Additional arguments for the lm/lme functions} } \description{ -Linear regression - log +With log transformation of counts before normalization. +Mixed-effect model is used when a paired argument is included, with the paired variable as a random intercept. } diff --git a/man/DA.llm2.Rd b/man/DA.llm2.Rd index a72ba04..75155bc 100644 --- a/man/DA.llm2.Rd +++ b/man/DA.llm2.Rd @@ -2,10 +2,27 @@ % Please edit documentation in R/DA.llm2.R \name{DA.llm2} \alias{DA.llm2} -\title{Linear regression - log #2} +\title{Linear regression} \usage{ -DA.llm2(count_table, outcome, paired = NULL, p.adj, delta = 0.001) +DA.llm2(count_table, outcome, paired = NULL, p.adj = "fdr", delta = 0.001, + ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for the log transformation. Default 0.001} + +\item{...}{Additional arguments for the lm/lme functions} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} } \description{ -Linear regression - log #2 +With log transformation of relative abundances. +Mixed-effect model is used when a paired argument is included, with the paired variable as a random intercept. } diff --git a/man/DA.lrm.Rd b/man/DA.lrm.Rd index b95416e..d750f60 100644 --- a/man/DA.lrm.Rd +++ b/man/DA.lrm.Rd @@ -4,8 +4,22 @@ \alias{DA.lrm} \title{Linear regression} \usage{ -DA.lrm(count_table, outcome, paired = NULL, p.adj) +DA.lrm(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the lm/lme functions} } \description{ -Linear regression +Mixed-effect model is a paired argument is present, with the paired variable as random intercept } diff --git a/man/DA.ltt.Rd b/man/DA.ltt.Rd index 9fa8f03..ce499b7 100644 --- a/man/DA.ltt.Rd +++ b/man/DA.ltt.Rd @@ -2,13 +2,32 @@ % Please edit documentation in R/DA.ltt.R \name{DA.ltt} \alias{DA.ltt} -\title{Log t-test} +\title{Welch t-test} \usage{ -DA.ltt(count_table, outcome, delta = 1, testStat = function(case, control) { - log((mean(case) + 1)/(mean(control) + 1)) }, - testStat.pair = function(case, control) { mean(log((case + 1)/(control + - 1))) }, paired = NULL, p.adj) +DA.ltt(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", delta = 1, testStat = function(case, control) { + log((mean(case) + 1)/(mean(control) + 1)) }, testStat.pair = function(case, + control) { mean(log((case + 1)/(control + 1))) }, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for log transformation.} + +\item{testStat}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1))} + +\item{testStat.pair}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1)))} + +\item{...}{Additional arguments for the t.test function} } \description{ -Log t-test +Welch t-test } diff --git a/man/DA.ltt2.Rd b/man/DA.ltt2.Rd index 1ff52be..07fe58e 100644 --- a/man/DA.ltt2.Rd +++ b/man/DA.ltt2.Rd @@ -2,13 +2,32 @@ % Please edit documentation in R/DA.ltt2.R \name{DA.ltt2} \alias{DA.ltt2} -\title{Log t-test2} +\title{Welch t-test} \usage{ -DA.ltt2(count_table, outcome, delta = 0.001, testStat = function(case, - control) { log((mean(case) + 1)/(mean(control) + 1)) }, - testStat.pair = function(case, control) { mean(log((case + 1)/(control + - 1))) }, paired = NULL, p.adj) +DA.ltt2(count_table, outcome, paired = NULL, p.adj = "fdr", delta = 0.001, + testStat = function(case, control) { log((mean(case) + 1)/(mean(control) + + 1)) }, testStat.pair = function(case, control) { mean(log((case + + 1)/(control + 1))) }, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{delta}{Numeric. Pseudocount for log transformation. Default 0.001} + +\item{testStat}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1))} + +\item{testStat.pair}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1)))} + +\item{...}{Additional arguments for the t.test function} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} } \description{ -Log t-test2 +With log transformed relative abundances } diff --git a/man/DA.msf.Rd b/man/DA.msf.Rd index 8212f2b..d214c17 100644 --- a/man/DA.msf.Rd +++ b/man/DA.msf.Rd @@ -4,9 +4,18 @@ \alias{DA.msf} \title{MetagenomeSeq Feature model} \usage{ -DA.msf(count_table, outcome, p.adj) +DA.msf(count_table, outcome, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the fitFeatureModel function} } \description{ -From: +Implemented as in: https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 } diff --git a/man/DA.neb.Rd b/man/DA.neb.Rd index 6973ac5..78a482c 100644 --- a/man/DA.neb.Rd +++ b/man/DA.neb.Rd @@ -4,8 +4,20 @@ \alias{DA.neb} \title{Negative binomial glm} \usage{ -DA.neb(count_table, outcome, paired = NULL, p.adj) +DA.neb(count_table, outcome, paired = NULL, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the glm.nb/glmer.nb functions} } \description{ -Negative binomial glm +With log(librarySize) as offset. +Mixed-effect model is used when a paired argument is included, with the paired variable as a random intercept. } diff --git a/man/DA.per.Rd b/man/DA.per.Rd index a3e59c7..89638bd 100644 --- a/man/DA.per.Rd +++ b/man/DA.per.Rd @@ -2,13 +2,32 @@ % Please edit documentation in R/DA.per.R \name{DA.per} \alias{DA.per} -\title{Permutation test} +\title{Permutation test of user-defined test statistic} \usage{ -DA.per(count_table, outcome, paired = NULL, noOfIterations = 10000, - seed = as.numeric(Sys.time()), margin = 50, testStat = function(case, - control) { log((mean(case) + 1)/(mean(control) + 1)) }, - testStat.pair = function(case, control) { mean(log((case + 1)/(control + - 1))) }, p.adj, relative = TRUE) +DA.per(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", testStat = function(case, control) { log((mean(case) + + 1)/(mean(control) + 1)) }, testStat.pair = function(case, control) { + mean(log((case + 1)/(control + 1))) }, noOfIterations = 10000, + margin = 50) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{testStat}{Function. Function for the test statistic. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1))} + +\item{testStat.pair}{Function. Function for test statistc for paired analysis. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1)))} + +\item{noOfIterations}{Integer. Iterations for permutations. Default 10000} + +\item{margin}{Numeric. Margin for when to stop iterations if p-value is high and unlikely to become low} } \description{ Modified version of the one from: diff --git a/man/DA.rai.Rd b/man/DA.rai.Rd new file mode 100644 index 0000000..f2bf5ee --- /dev/null +++ b/man/DA.rai.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DA.rai.R +\name{DA.rai} +\alias{DA.rai} +\title{RAIDA} +\usage{ +DA.rai(count_table, outcome, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the raida function} + +\item{predictor}{Factor. The outcome of interest. E.g. case and control} +} +\description{ +RAIDA +} diff --git a/man/DA.spe.Rd b/man/DA.spe.Rd new file mode 100644 index 0000000..c37570c --- /dev/null +++ b/man/DA.spe.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DA.spe.R +\name{DA.spe} +\alias{DA.spe} +\title{Spearman's Rank Correlation} +\usage{ +DA.spe(count_table, outcome, relative = TRUE, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Numeric. The outcome of interest. E.g. case and control} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} +} +\description{ +Spearman's Rank Correlation +} diff --git a/man/DA.ttt.Rd b/man/DA.ttt.Rd index 716811c..775a394 100644 --- a/man/DA.ttt.Rd +++ b/man/DA.ttt.Rd @@ -2,13 +2,30 @@ % Please edit documentation in R/DA.ttt.R \name{DA.ttt} \alias{DA.ttt} -\title{t-test} +\title{Welch t-test} \usage{ -DA.ttt(count_table, outcome, testStat = function(case, control) { - log((mean(case) + 1)/(mean(control) + 1)) }, testStat.pair = function(case, - control) { mean(log((case + 1)/(control + 1))) }, paired = NULL, p.adj, - relative = TRUE) +DA.ttt(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", testStat = function(case, control) { log((mean(case) + + 1)/(mean(control) + 1)) }, testStat.pair = function(case, control) { + mean(log((case + 1)/(control + 1))) }, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{testStat}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1))} + +\item{testStat.pair}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1)))} + +\item{...}{Additional arguments for the t.test function} } \description{ -t-test +Welch t-test } diff --git a/man/DA.wil.Rd b/man/DA.wil.Rd index 814feb6..5dd4258 100644 --- a/man/DA.wil.Rd +++ b/man/DA.wil.Rd @@ -2,13 +2,30 @@ % Please edit documentation in R/DA.wil.R \name{DA.wil} \alias{DA.wil} -\title{Wilcox test} +\title{Wilcoxon Rank Sum and Signed Rank Test} \usage{ -DA.wil(count_table, outcome, testStat = function(case, control) { - log((mean(case) + 1)/(mean(control) + 1)) }, testStat.pair = function(case, - control) { mean(log((case + 1)/(control + 1))) }, paired = NULL, p.adj, - relative = TRUE) +DA.wil(count_table, outcome, paired = NULL, relative = TRUE, + p.adj = "fdr", testStat = function(case, control) { log((mean(case) + + 1)/(mean(control) + 1)) }, testStat.pair = function(case, control) { + mean(log((case + 1)/(control + 1))) }, ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor. The outcome of interest. E.g. case and control} + +\item{paired}{Factor. Subject ID for running paired analysis} + +\item{relative}{Logical. Should count_table be normalized to relative abundances. Default TRUE} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{testStat}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1))} + +\item{testStat.pair}{Function. Function for calculating fold change. Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1)))} + +\item{...}{Additional arguments for the wilcox.test function} } \description{ -Wilcox test +Wilcoxon Rank Sum and Signed Rank Test } diff --git a/man/DA.zig.Rd b/man/DA.zig.Rd index c17d006..214e786 100644 --- a/man/DA.zig.Rd +++ b/man/DA.zig.Rd @@ -4,9 +4,18 @@ \alias{DA.zig} \title{MetageonomeSeq ZIG} \usage{ -DA.zig(count_table, outcome, p.adj) +DA.zig(count_table, outcome, p.adj = "fdr", ...) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{p.adj}{Character. P-value adjustment. Default "fdr". See p.adjust for details} + +\item{...}{Additional arguments for the fitZig function} + +\item{predictor}{Factor. The outcome of interest. E.g. case and control} } \description{ -From +Implemented as in: https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 } diff --git a/man/allDA.Rd b/man/allDA.Rd index ea77b4f..f89abb7 100644 --- a/man/allDA.Rd +++ b/man/allDA.Rd @@ -4,63 +4,32 @@ \alias{allDA} \title{Run many differential abundance methods} \usage{ -allDA(count_table, predictor, paired = NULL, relative = TRUE, - tests = c("anc", "per", "bay", "adx", "enn", "wil", "ttt", "ltt", "ltt2", - "neb", "erq", "ere", "msf", "zig", "ds2", "lim", "aov", "lao", "lao2", "kru", - "lrm", "llm", "llm2"), cores = (detectCores() - 1), rng.seed = 123, - p.adj = "fdr", delta1 = 1, delta2 = 0.001, noOfIterations = 10000, - margin = 50, testStat = function(case, control) { log((mean(case) + - 1)/(mean(control) + 1)) }, testStat.pair = function(case, control) { - mean(log((case + 1)/(control + 1))) }, mc.samples = 64, sig = 0.05, - multcorr = 3, tau = 0.02, theta = 0.1, repeated = FALSE, - TMM.option = 1, log.lim = FALSE, delta.lim = 1) +allDA(count_table, predictor, paired = NULL, tests = c("spe", "anc", "per", + "bay", "adx", "wil", "ttt", "ltt", "ltt2", "neb", "erq", "ere", "msf", "zig", + "ds2", "lim", "aov", "lao", "lao2", "kru", "lrm", "llm", "llm2", "rai"), + relative = TRUE, cores = (detectCores() - 1), rng.seed = 123, + p.adj = "fdr", args = list(), verbose = TRUE) } \arguments{ \item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} \item{predictor}{Factor or Numeric. The outcome of interest. E.g. case and control. If the predictor has more than two levels, only the 2. level will be spiked. If the predictor is numeric it will be treated as such in the analyses} -\item{paired}{Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm" and "llm2"} - -\item{relative}{Logical. Should abundances be made relative? Only has effect for "ttt", "wil", "per", "aov", "kru" and "lim". Default TRUE} +\item{paired}{Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm", "llm2", "lim", "lli" and "lli2"} \item{tests}{Character. Which tests to include. Default all (See below for details)} +\item{relative}{Logical. Should abundances be made relative? Only has effect for "ttt", "ltt", "wil", "per", "aov", "lao", "kru", "lim", "lli", "lrm", "llm" and "spe". Default TRUE} + \item{cores}{Integer. Number of cores to use for parallel computing. Default one less than available} \item{rng.seed}{Numeric. Seed for reproducibility. Default 123} \item{p.adj}{Character. Method for pvalue adjustment. Default "fdr"} -\item{delta1}{Numeric. The pseudocount for the log t.test/log ANOVA/llm method. Default 1} - -\item{delta2}{Numeric. The pseudocount for the log t.test2/log ANOVA2/llm2 method. Default 0.001} - -\item{noOfIterations}{Integer. How many iterations should be run for the permutation tests. Default 10000} - -\item{margin}{Integer. The margin of when to stop iterating for non-significant Features for the permutation tests. Default 50} - -\item{testStat}{Function. The test statistic function for the permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1))} - -\item{testStat.pair}{Function. The test statistic function for the paired permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1)))} - -\item{mc.samples}{Integer. Monte Carlo samples for ALDEx2. Default 64} - -\item{sig}{Numeric. Alpha used in ANCOM. Default 0.05} +\item{args}{List. A list with lists of arguments passed to the different methods. See details for more.} -\item{multcorr}{Integer. Correction used in ANCOM. Default 3 (no correction)} - -\item{tau}{Numeric. Tuning parameter for ANCOM. Default 0.02} - -\item{theta}{Numeric. Tuning parameter for ANCOM. Default 0.1} - -\item{repeated}{Logical. Are there repeated measures? Only for ANCOM. Default FALSE} - -\item{TMM.option}{1 or 2. For "enn". Option "1" is for an approach using the mean of the effective library sizes as a reference library size in TMM normalization; option "2" represents an approach to regenerating counts with a common dispersion. Default 1} - -\item{log.lim}{Logical. For "lim". Should abundances be log transformed (before relative abundances if relative = TRUE). Default FALSE} - -\item{delta.lim}{Pseudocount for "lim" log transformation. Default 1} +\item{verbose}{Logical. Print information during run} } \value{ A list of results: @@ -88,7 +57,6 @@ Currently implemented methods: \item msf - MetagenomeSeq feature model \item zig - MetagenomeSeq zero-inflated gaussian \item ds2 - DESeq2 - \item enn - ENNB: Two-stage procedure from https://cals.arizona.edu/~anling/software.htm \item anc - ANCOM. This test does not output pvalues; for comparison with the other methods, detected Features are set to a pvalue of 0, all else are set to 1. \item lim - LIMMA. Moderated linear models based on emperical bayes \item kru - Kruskal-Wallis on relative abundances @@ -98,8 +66,43 @@ Currently implemented methods: \item lrm - Linear regression on relative abundances \item llm - Linear regression, but reads are first transformed with log(abundance + delta1) then turned into relative abundances \item llm2 - Linear regression, but with relative abundances transformed with log(relative abundance + delta2) + \item rai - RAIDA + \item spe - Spearman correlation } -Is it too slow? Remove "anc" from test argument. -"per" is also somewhat slow, but is usually one of the methods performing well with large sample sizes. +Additional arguments can be passed to the internal functions with the "args" argument. +It should be structured as a list with elements named by the tests: +E.g. passing to the DA.per function that it should only run 1000 iterations: args = list(per=list(noOfIterations=1000)). +Include that the log t.test should use a pseudocount of 0.1: args = list(per=list(noOfIterations=1000), ltt=list(delta=0.1)). +Additional arguments are simply seperated by commas. + +Below is an overview of which functions get the arguments that are passed to a specific test +\itemize{ + \item per - Passed to DA.per + \item bay - Passed to getPriors and getLikelihoods + \item adx - Passed to aldex + \item wil - Passed to wilcox.test and DA.wil + \item ttt - Passed to t.test and DA.ttt + \item ltt - Passed to t.test and DA.ltt + \item ltt2 - Passed to t.test and DA.ltt2 + \item neb - Passed to glm.nb and glmer.nb + \item erq - Passed to exactTest + \item ere - Passed to glmQLFit + \item msf - Passed to fitFeatureModel + \item zig - Passed to fitZig + \item ds2 - Passed to DESeq + \item anc - Passed to ANCOM + \item lim - Passed to eBayes + \item lli - Passed to eBayes + \item lli2 - Passed to eBayes + \item kru - Passed to kruskal.test + \item aov - Passed to aov + \item lao - Passed to aov + \item lao2 - Passed to aov + \item lrm - Passed to lm and lme + \item llm - Passed to lm and lme + \item llm2 - Passed to lm and lme + \item rai - Passed to raida + \item spe - Passed to cor.test +} } diff --git a/man/spikein.Rd b/man/spikein.Rd index 58a31e7..f463e00 100644 --- a/man/spikein.Rd +++ b/man/spikein.Rd @@ -5,9 +5,25 @@ \title{Spike-in} \usage{ spikein(count_table, outcome, spikeMethod = "mult", effectSize = 2, k, - relative = TRUE, num.pred = FALSE) + num.pred = FALSE) +} +\arguments{ +\item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} + +\item{outcome}{Factor or Numeric. The outcome of interest. E.g. case and control. If the predictor has more than two levels, only the 2. level will be spiked. If the predictor is numeric it will be treated as such in the analyses} + +\item{spikeMethod}{Character. Multiplicative ("mult") or additive ("add") spike-in. Default "mult". Use "add" if there are negative values in count_table} + +\item{effectSize}{Integer. The effect size for the spike-ins. Default 2} + +\item{k}{Vector of length 3. Number of Features to spike in each tertile (lower, mid, upper). k=c(5,10,15): 5 features spiked in low abundance tertile, 10 features spiked in mid abundance tertile and 15 features spiked in high abundance tertile. Default c(5,5,5)} + +\item{num.pred}{Logical. Is the outcome numeric? Default FALSE} } \description{ +Internal function for the testDA function. +} +\details{ Modified version of the one from: -https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 +https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8. } diff --git a/man/testDA.Rd b/man/testDA.Rd index 6052a5d..c7b38c4 100644 --- a/man/testDA.Rd +++ b/man/testDA.Rd @@ -4,17 +4,12 @@ \alias{testDA} \title{Comparing differential abundance methods by FPR and AUC} \usage{ -testDA(count_table, predictor, R = 10, paired = NULL, relative = TRUE, - tests = c("anc", "per", "bay", "adx", "enn", "wil", "ttt", "ltt", "ltt2", - "neb", "erq", "ere", "msf", "zig", "ds2", "lim", "aov", "lao", "lao2", "kru", - "lrm", "llm", "llm2"), spikeMethod = "mult", effectSize = 2, k = c(5, 5, - 5), cores = (detectCores() - 1), rng.seed = 123, p.adj = "fdr", - delta1 = 1, delta2 = 0.001, noOfIterations = 10000, margin = 50, - testStat = function(case, control) { log((mean(case) + 1)/(mean(control) - + 1)) }, testStat.pair = function(case, control) { mean(log((case + - 1)/(control + 1))) }, mc.samples = 64, sig = 0.05, multcorr = 3, - tau = 0.02, theta = 0.1, repeated = FALSE, TMM.option = 1, - log.lim = FALSE, delta.lim = 1) +testDA(count_table, predictor, R = 10, paired = NULL, tests = c("spe", + "anc", "per", "bay", "adx", "wil", "ttt", "ltt", "ltt2", "neb", "erq", "ere", + "msf", "zig", "ds2", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", "lrm", + "llm", "llm2", "rai"), relative = TRUE, spikeMethod = "mult", + effectSize = 2, k = c(5, 5, 5), cores = (detectCores() - 1), + rng.seed = 123, p.adj = "fdr", args = list(), verbose = FALSE) } \arguments{ \item{count_table}{Matrix or data.frame. Table with taxa/genes/proteins as rows and samples as columns} @@ -23,53 +18,27 @@ testDA(count_table, predictor, R = 10, paired = NULL, relative = TRUE, \item{R}{Integer. Number of times to run the tests. Default 10} -\item{paired}{Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm" and "llm2"} - -\item{relative}{Logical. Should abundances be made relative? Only has effect for "ttt", "wil", "per", "aov", "kru" and "lim". Default TRUE} +\item{paired}{Factor. Subject ID for running paired analysis. Only for "per", "ttt", "ltt", "ltt2", "neb", "wil", "erq", "ds2", "lrm", "llm", "llm2", "lim", "lli" and "lli2"} \item{tests}{Character. Which tests to include. Default all (See below for details)} -\item{spikeMethod}{Character. Multiplicative ("mult") or additive ("add") spike-in. Default "mult". Use "add" if relative = TRUE} +\item{relative}{Logical. Should abundances be made relative? Only has effect for "ttt", "ltt", "wil", "per", "aov", "lao", "kru", "lim", "lli", "lrm", "llm" and "spe". Default TRUE} + +\item{spikeMethod}{Character. Multiplicative ("mult") or additive ("add") spike-in. Default "mult". Use "add" if count_table contains negative counts.} \item{effectSize}{Integer. The effect size for the spike-ins. Default 2} \item{k}{Vector of length 3. Number of Features to spike in each tertile (lower, mid, upper). k=c(5,10,15): 5 features spiked in low abundance tertile, 10 features spiked in mid abundance tertile and 15 features spiked in high abundance tertile. Default c(5,5,5)} -\item{cores}{Integer. Number of cores to use for parallel computing. Default one less than available} +\item{cores}{Integer. Number of cores to use for parallel computing. Default one less than available. Set to 1 for sequential computing.} \item{rng.seed}{Numeric. Seed for reproducibility. Default 123} \item{p.adj}{Character. Method for pvalue adjustment. Default "fdr" (Does not affect AUC, FPR or Spike.detect.rate, these use raw p-values)} -\item{delta1}{Numeric. The pseudocount for the log t.test/log ANOVA/llm method. Default 1} - -\item{delta2}{Numeric. The pseudocount for the log t.test2/log ANOVA2/lmm2 method. Default 0.001} - -\item{noOfIterations}{Integer. How many iterations should be run for the permutation test. Default 10000} - -\item{margin}{Integer. The margin of when to stop iterating for non-significant Features for the permutation test. Default 50} - -\item{testStat}{Function. The test statistic function for the permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: log((mean(case abundances)+1)/(mean(control abundances)+1))} - -\item{testStat.pair}{Function. The test statistic function for the paired permutation test (also in output of ttt, ltt, ltt2 and wil). Should take two vectors as arguments. Default is a log fold change: mean(log((case abundances+1)/(control abundances+1)))} - -\item{mc.samples}{Integer. Monte Carlo samples for ALDEx2. Default 64} - -\item{sig}{Numeric. Alpha used in ANCOM. Default 0.05} - -\item{multcorr}{Integer. Correction used in ANCOM. Default 3 (no correction)} +\item{args}{List. A list with lists of arguments passed to the different methods. See details for more.} -\item{tau}{Numeric. Tuning parameter for ANCOM. Default 0.02} - -\item{theta}{Numeric. Tuning parameter for ANCOM. Default 0.1} - -\item{repeated}{Logical. Are there repeated measures? Only for ANCOM. Default FALSE} - -\item{TMM.option}{1 or 2. For "enn". Option "1" is for an approach using the mean of the effective library sizes as a reference library size in TMM normalization; option "2" represents an approach to regenerating counts with a common dispersion. Default 1} - -\item{log.lim}{Logical. For "lim". Should abundances be log transformed (before relative abundances if relative = TRUE). Default FALSE} - -\item{delta.lim}{Pseudocount for "lim" log transformation. Default 1} +\item{verbose}{Logical. Print information during run} } \value{ An object of class DA, which contains a list of results: @@ -97,9 +66,10 @@ Currently implemented methods: \item msf - MetagenomeSeq feature model \item zig - MetagenomeSeq zero-inflated gaussian \item ds2 - DESeq2 - \item enn - ENNB: Two-stage procedure from https://cals.arizona.edu/~anling/software.htm \item anc - ANCOM. This test does not output pvalues; for comparison with the other methods, detected Features are set to a pvalue of 0, all else are set to 1. \item lim - LIMMA. Moderated linear models based on emperical bayes + \item lli - LIMMA, but reads are first transformed with log(abundance + delta1) then turned into relative abundances + \item lli2 - LIMMA, but with relative abundances transformed with log(relative abundance + delta2) \item kru - Kruskal-Wallis on relative abundances \item aov - ANOVA on relative abundances \item lao - ANOVA, but reads are first transformed with log(abundance + delta1) then turned into relative abundances @@ -107,8 +77,47 @@ Currently implemented methods: \item lrm - Linear regression on relative abundances \item llm - Linear regression, but reads are first transformed with log(abundance + delta1) then turned into relative abundances \item llm2 - Linear regression, but with relative abundances transformed with log(relative abundance + delta2) + \item rai - RAIDA + \item spe - Spearman correlation } Is it too slow? Remove "anc" from test argument. +"neb" is slow if there is a paired argument. "per" is also somewhat slow, but is usually one of the methods performing well with large sample sizes. + +Additional arguments can be passed to the internal functions with the "args" argument. +It should be structured as a list with elements named by the tests: +E.g. passing to the DA.per function that it should only run 1000 iterations: args = list(per=list(noOfIterations=1000)). +Include that the log t.test should use a pseudocount of 0.1: args = list(per=list(noOfIterations=1000), ltt=list(delta=0.1)). +Additional arguments are simply seperated by commas. + +Below is an overview of which functions get the arguments that are passed to a specific test +\itemize{ + \item per - Passed to DA.per + \item bay - Passed to getPriors and getLikelihoods + \item adx - Passed to aldex + \item wil - Passed to wilcox.test and DA.wil + \item ttt - Passed to t.test and DA.ttt + \item ltt - Passed to t.test and DA.ltt + \item ltt2 - Passed to t.test and DA.ltt2 + \item neb - Passed to glm.nb and glmer.nb + \item erq - Passed to exactTest + \item ere - Passed to glmQLFit + \item msf - Passed to fitFeatureModel + \item zig - Passed to fitZig + \item ds2 - Passed to DESeq + \item anc - Passed to ANCOM + \item lim - Passed to eBayes + \item lli - Passed to eBayes + \item lli2 - Passed to eBayes + \item kru - Passed to kruskal.test + \item aov - Passed to aov + \item lao - Passed to aov + \item lao2 - Passed to aov + \item lrm - Passed to lm and lme + \item llm - Passed to lm and lme + \item llm2 - Passed to lm and lme + \item rai - Passed to raida + \item spe - Passed to cor.test +} }