snpgdsAdmixProp()

zhengxwen · Feb 23, 2024 · bc58568 · bc58568
1 parent 325dd75
commit bc58568
Show file tree

Hide file tree

Showing 5 changed files with 29 additions and 23 deletions.
diff --git a/NEWS b/NEWS
@@ -10,6 +10,8 @@ UTILITIES
 
     o show a progress bar in `snpgdsLDpruning()` when 'verbose=TRUE'
 
+    o update the help files of `snpgdsAdmixProp()` and `snpgdsAdmixPlot()`
+
 
 CHANGES IN VERSION 1.34.0
 -------------------------

diff --git a/R/PCA.R b/R/PCA.R
@@ -6,7 +6,7 @@
 #     A High-performance Computing Toolset for Relatedness and
 # Principal Component Analysis of SNP Data
 #
-# Copyright (C) 2011 - 2020        Xiuwen Zheng
+# Copyright (C) 2011 - 2024        Xiuwen Zheng
 # License: GPL-3
 #
 
@@ -343,7 +343,6 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
     # 'sample.id' and 'eigenvect' should exist
     stopifnot(!is.null(eigobj$sample.id))
     stopifnot(is.matrix(eigobj$eigenvect))
-
     stopifnot(is.list(groups))
     stopifnot(length(groups) > 1)
     if (length(groups) > (ncol(eigobj$eigenvect)+1))
@@ -374,8 +373,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
         grlist <- c(grlist, groups[[i]])
     }
 
-    stopifnot(is.logical(bound) & is.vector(bound))
-    stopifnot(length(bound) == 1)
+    stopifnot(is.logical(bound), length(bound)==1L)
 
     # calculate ...
 
@@ -393,7 +391,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
     }
 
     # check
-    if (any(is.na(mat)))
+    if (anyNA(mat))
         stop("The eigenvectors should not have missing value!")
 
     T.P <- mat[length(groups), ]
@@ -407,11 +405,11 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
     rownames(new.p) <- eigobj$sample.id
 
     # whether bounded
-    if (bound)
+    if (isTRUE(bound))
     {
         new.p[new.p < 0] <- 0
-        r <- 1.0 / rowSums(new.p)
-        new.p <- new.p * r
+        new.p[new.p > 1] <- 1
+        new.p <- new.p / rowSums(new.p)
     }
 
     new.p

diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ The GDS format offers the efficient operations specifically designed for integer
 
 ## Bioconductor
 
-Release Version: v1.36.0
+Release Version: v1.36.1
 
 [http://www.bioconductor.org/packages/SNPRelate](http://www.bioconductor.org/packages/SNPRelate)
 

diff --git a/man/snpgdsAdmixPlot.Rd b/man/snpgdsAdmixPlot.Rd
@@ -15,12 +15,15 @@ snpgdsAdmixTable(propmat, group, sort=FALSE)
 \arguments{
     \item{propmat}{a sample-by-ancestry matrix of proportion estimates,
         returned from \code{\link{snpgdsAdmixProp}()}}
-    \item{group}{a character vector of a factor according to the samples
+    \item{group}{a character vector of a factor according to the rows
         in \code{propmat}}
-    \item{col}{specify colors}
+    \item{col}{specify colors; if \code{group} is not specified, it is a color
+        for each sample; otherwise specify colors for the groups}
     \item{multiplot}{single plot or multiple plots}
-    \item{showgrp}{show group names in the plot}
-    \item{shownum}{\code{TRUE}: show the number of each group in the figure}
+    \item{showgrp}{show group names in the plot; applicable when \code{group}
+        is used}
+    \item{shownum}{\code{TRUE}: show the number of each group on the X-axis
+        in the figure; applicable when \code{group} is used}
     \item{ylim}{\code{TRUE}: y-axis is limited to [0, 1];
         \code{FALSE}: \code{ylim <- range(propmat)};
         a 2-length numeric vector: \code{ylim} used in \code{plot()}}
@@ -71,11 +74,16 @@ groups <- list(CEU = samp.id[pop_code == "CEU"],
     YRI = samp.id[pop_code == "YRI"],
     CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])
 
-prop <- snpgdsAdmixProp(RV, groups=groups)
+prop <- snpgdsAdmixProp(RV, groups=groups, bound=TRUE)
 
 # draw
 snpgdsAdmixPlot(prop, group=pop_code)
 
+# use user-defined colors for the groups
+snpgdsAdmixPlot(prop, group=pop_code, multiplot=FALSE, col=c(3,2,4))
+
+snpgdsAdmixTable(prop, group=pop_code)
+
 # close the genotype file
 snpgdsClose(genofile)
 }

diff --git a/man/snpgdsAdmixProp.Rd b/man/snpgdsAdmixProp.Rd
@@ -16,20 +16,17 @@ snpgdsAdmixProp(eigobj, groups, bound=FALSE)
     \item{groups}{a list of sample IDs, such like \code{groups = list(
         CEU = c("NA0101", "NA1022", ...), YRI = c("NAxxxx", ...),
         Asia = c("NA1234", ...))}}
-    \item{bound}{if \code{TRUE}, the estimates are bounded so that no
-        component < 0 or > 1, and the sum of proportions is one}
+    \item{bound}{if \code{TRUE}, the estimates are bounded in [0, 1], and
+        the sum of proportions is one; \code{bound=FALSE} for unbiased
+        estimates}
 }
 \details{
     The minor allele frequency and missing rate for each SNP passed in
 \code{snp.id} are calculated over all the samples in \code{sample.id}.
 }
 \value{
-    Return a \code{snpgdsEigMixClass} object, and it is a list:
-    \item{sample.id}{the sample ids used in the analysis}
-    \item{snp.id}{the SNP ids used in the analysis}
-    \item{eigenval}{eigenvalues}
-    \item{eigenvect}{eigenvactors, "# of samples" x "eigen.cnt"}
-    \item{ibdmat}{the IBD matrix}
+    Return a matrix of ancestral proportions with rows for study individuals
+(\code{rownames()} is sample ID).
 }
 
 \references{
@@ -72,7 +69,7 @@ head(tab)
 # draw
 plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
     xlab="eigenvector 2", ylab="eigenvector 1")
-legend("topleft", legend=levels(tab$pop), pch="o", col=1:4)
+legend("bottomleft", legend=levels(tab$pop), pch="o", col=1:4)
 
 
 # define groups
@@ -81,6 +78,7 @@ groups <- list(CEU = samp.id[pop_code == "CEU"],
     CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])
 
 prop <- snpgdsAdmixProp(RV, groups=groups)
+head(prop)
 
 # draw
 plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),