Skip to content

Commit

Permalink
snpgdsAdmixProp()
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengxwen committed Feb 23, 2024
1 parent 325dd75 commit bc58568
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 23 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ UTILITIES

o show a progress bar in `snpgdsLDpruning()` when 'verbose=TRUE'

o update the help files of `snpgdsAdmixProp()` and `snpgdsAdmixPlot()`


CHANGES IN VERSION 1.34.0
-------------------------
Expand Down
14 changes: 6 additions & 8 deletions R/PCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# A High-performance Computing Toolset for Relatedness and
# Principal Component Analysis of SNP Data
#
# Copyright (C) 2011 - 2020 Xiuwen Zheng
# Copyright (C) 2011 - 2024 Xiuwen Zheng
# License: GPL-3
#

Expand Down Expand Up @@ -343,7 +343,6 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
# 'sample.id' and 'eigenvect' should exist
stopifnot(!is.null(eigobj$sample.id))
stopifnot(is.matrix(eigobj$eigenvect))

stopifnot(is.list(groups))
stopifnot(length(groups) > 1)
if (length(groups) > (ncol(eigobj$eigenvect)+1))
Expand Down Expand Up @@ -374,8 +373,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
grlist <- c(grlist, groups[[i]])
}

stopifnot(is.logical(bound) & is.vector(bound))
stopifnot(length(bound) == 1)
stopifnot(is.logical(bound), length(bound)==1L)

# calculate ...

Expand All @@ -393,7 +391,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
}

# check
if (any(is.na(mat)))
if (anyNA(mat))
stop("The eigenvectors should not have missing value!")

T.P <- mat[length(groups), ]
Expand All @@ -407,11 +405,11 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
rownames(new.p) <- eigobj$sample.id

# whether bounded
if (bound)
if (isTRUE(bound))
{
new.p[new.p < 0] <- 0
r <- 1.0 / rowSums(new.p)
new.p <- new.p * r
new.p[new.p > 1] <- 1
new.p <- new.p / rowSums(new.p)
}

new.p
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ The GDS format offers the efficient operations specifically designed for integer

## Bioconductor

Release Version: v1.36.0
Release Version: v1.36.1

[http://www.bioconductor.org/packages/SNPRelate](http://www.bioconductor.org/packages/SNPRelate)

Expand Down
18 changes: 13 additions & 5 deletions man/snpgdsAdmixPlot.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ snpgdsAdmixTable(propmat, group, sort=FALSE)
\arguments{
\item{propmat}{a sample-by-ancestry matrix of proportion estimates,
returned from \code{\link{snpgdsAdmixProp}()}}
\item{group}{a character vector of a factor according to the samples
\item{group}{a character vector of a factor according to the rows
in \code{propmat}}
\item{col}{specify colors}
\item{col}{specify colors; if \code{group} is not specified, it is a color
for each sample; otherwise specify colors for the groups}
\item{multiplot}{single plot or multiple plots}
\item{showgrp}{show group names in the plot}
\item{shownum}{\code{TRUE}: show the number of each group in the figure}
\item{showgrp}{show group names in the plot; applicable when \code{group}
is used}
\item{shownum}{\code{TRUE}: show the number of each group on the X-axis
in the figure; applicable when \code{group} is used}
\item{ylim}{\code{TRUE}: y-axis is limited to [0, 1];
\code{FALSE}: \code{ylim <- range(propmat)};
a 2-length numeric vector: \code{ylim} used in \code{plot()}}
Expand Down Expand Up @@ -71,11 +74,16 @@ groups <- list(CEU = samp.id[pop_code == "CEU"],
YRI = samp.id[pop_code == "YRI"],
CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])

prop <- snpgdsAdmixProp(RV, groups=groups)
prop <- snpgdsAdmixProp(RV, groups=groups, bound=TRUE)

# draw
snpgdsAdmixPlot(prop, group=pop_code)

# use user-defined colors for the groups
snpgdsAdmixPlot(prop, group=pop_code, multiplot=FALSE, col=c(3,2,4))

snpgdsAdmixTable(prop, group=pop_code)

# close the genotype file
snpgdsClose(genofile)
}
Expand Down
16 changes: 7 additions & 9 deletions man/snpgdsAdmixProp.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,17 @@ snpgdsAdmixProp(eigobj, groups, bound=FALSE)
\item{groups}{a list of sample IDs, such like \code{groups = list(
CEU = c("NA0101", "NA1022", ...), YRI = c("NAxxxx", ...),
Asia = c("NA1234", ...))}}
\item{bound}{if \code{TRUE}, the estimates are bounded so that no
component < 0 or > 1, and the sum of proportions is one}
\item{bound}{if \code{TRUE}, the estimates are bounded in [0, 1], and
the sum of proportions is one; \code{bound=FALSE} for unbiased
estimates}
}
\details{
The minor allele frequency and missing rate for each SNP passed in
\code{snp.id} are calculated over all the samples in \code{sample.id}.
}
\value{
Return a \code{snpgdsEigMixClass} object, and it is a list:
\item{sample.id}{the sample ids used in the analysis}
\item{snp.id}{the SNP ids used in the analysis}
\item{eigenval}{eigenvalues}
\item{eigenvect}{eigenvactors, "# of samples" x "eigen.cnt"}
\item{ibdmat}{the IBD matrix}
Return a matrix of ancestral proportions with rows for study individuals
(\code{rownames()} is sample ID).
}

\references{
Expand Down Expand Up @@ -72,7 +69,7 @@ head(tab)
# draw
plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
xlab="eigenvector 2", ylab="eigenvector 1")
legend("topleft", legend=levels(tab$pop), pch="o", col=1:4)
legend("bottomleft", legend=levels(tab$pop), pch="o", col=1:4)


# define groups
Expand All @@ -81,6 +78,7 @@ groups <- list(CEU = samp.id[pop_code == "CEU"],
CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])

prop <- snpgdsAdmixProp(RV, groups=groups)
head(prop)

# draw
plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),
Expand Down

0 comments on commit bc58568

Please sign in to comment.