Merge pull request #17 from l-ramirez-lopez/v2.0

V2.0
l-ramirez-lopez · Oct 12, 2020 · 443ae94 · 443ae94
2 parents c908e96 + 559e442
commit 443ae94
Show file tree

Hide file tree

Showing 51 changed files with 1,327 additions and 1,220 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -32,8 +32,6 @@ import(graphics)
 import(iterators)
 import(lifecycle)
 importFrom(dplyr,if_else)
-importFrom(dplyr,mutate)
-importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(graphics,barplot)
 importFrom(lifecycle,deprecate_soft)

diff --git a/NEWS b/NEWS
@@ -2,6 +2,9 @@
 
 ## Version 2.0 (gordillo)
 
+* 11.10.2020 
+New vignette!
+
 * 02.07.2020 
 During the recent lockdown we had the chance to inevest a enough time on the 
 development of a new version of the package resemble. This new version comes 

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -59,7 +59,6 @@ moving_cor_diss <- function(X, Y, w) {
 #' @usage get_col_largest_sd(X)
 #' @param X a matrix.
 #' @return a value indicating the index of the column with the largest standard deviation. 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -72,7 +71,6 @@ get_col_largest_sd <- function(X) {
 #' @usage get_column_sds(X)
 #' @param X a a matrix.
 #' @return a vector of standard deviation values. 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -85,7 +83,6 @@ get_column_sds <- function(X) {
 #' @usage get_column_means(X)
 #' @param X a a matrix.
 #' @return a vector of mean values. 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -98,7 +95,6 @@ get_column_means <- function(X) {
 #' @usage get_column_sums(X)
 #' @param X a matrix.
 #' @return a vector of standard deviation values. 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -114,8 +110,8 @@ get_column_sums <- function(X) {
 #' @usage 
 #' opls_for_projection(X, Y, ncomp, scale,
 #'                     maxiter, tol,
-#'                     pcSelmethod = "cumvar",
-#'                     pcSelvalue = 0.99)
+#'                     pcSelmethod = "var",
+#'                     pcSelvalue = 0.01)
 #' @param X a matrix of predictor variables.
 #' @param Y a matrix of either a single or multiple response variables.
 #' @param ncomp the number of pls components.
@@ -156,11 +152,10 @@ get_column_sums <- function(X) {
 #' and \code{Xscale}}. 
 #' \item{\code{weights}}{ the matrix of wheights.}
 #' }
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
-opls_for_projection <- function(X, Y, ncomp, scale, maxiter, tol, pcSelmethod = "cumvar", pcSelvalue = 0.99) {
+opls_for_projection <- function(X, Y, ncomp, scale, maxiter, tol, pcSelmethod = "var", pcSelvalue = 0.01) {
     .Call('_resemble_opls_for_projection', PACKAGE = 'resemble', X, Y, ncomp, scale, maxiter, tol, pcSelmethod, pcSelvalue)
 }
 
@@ -197,7 +192,6 @@ opls_for_projection <- function(X, Y, ncomp, scale, maxiter, tol, pcSelmethod =
 #' These objects contain information on the explained variance for the \code{X} and \code{Y} matrices respectively.}
 #' \item{\code{transf}}{ a \code{list} conating two objects: \code{Xcenter} and \code{Xscale}}. 
 #' \item{\code{weights}}{ the matrix of wheights.}} 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -234,7 +228,6 @@ opls_get_all <- function(X, Y, ncomp, scale, maxiter, tol) {
 #' \item{\code{Y}}{ the \code{Y} input.}
 #' \item{\code{transf}}{ a \code{list} conating two objects: \code{Xcenter} and \code{Xscale}}. 
 #' \item{\code{weights}}{ the matrix of wheights.}} 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -265,7 +258,6 @@ opls <- function(X, Y, ncomp, scale, maxiter, tol) {
 #' \item{\code{projection_mat}}{ the projection matrix.}
 #' \item{\code{transf}}{ a \code{list} conating two objects: \code{Xcenter} and \code{Xscale}}. 
 #' } 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -284,7 +276,6 @@ opls_get_basics <- function(X, Y, ncomp, scale, maxiter, tol) {
 #' @param scale a logical indicating whether the matrix of predictors used to create the regression model was scaled.
 #' @param Xscale if \code{scale = TRUE} a matrix of one row with the values that must be used for scaling \code{newdata}.
 #' @return a matrix of predicted values.
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -303,7 +294,6 @@ predict_opls <- function(bo, b, ncomp, newdata, scale, Xscale) {
 #' @param Xscale if \code{scale = TRUE} a matrix of one row with the values that must be used for scaling \code{newdata}.
 #' @param Xcenter a matrix of one row with the values that must be used for centering \code{newdata}.
 #' @return a matrix corresponding to the new spectra projected onto the PLS space 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -318,7 +308,6 @@ project_opls <- function(projection_mat, ncomp, newdata, scale, Xcenter, Xscale)
 #' @param projection_mat the projection matrix generated by the \code{opls_get_basics} function.
 #' @param xloadings the loadings matrix generated by the \code{opls_get_basics} function.
 #' @return a matrix of 1 row and 1 column.
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -349,7 +338,6 @@ reconstruction_error <- function(x, projection_mat, xloadings) {
 #' @param Xcenter a matrix of one row with the values that must be used for centering \code{newdata}.
 #' @param Xscale if \code{scale = TRUE} a matrix of one row with the values that must be used for scaling \code{newdata}.
 #' @return a matrix of one row with the weights for each component between the max. and min. specified. 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -387,7 +375,6 @@ get_pls_weights <- function(projection_mat, xloadings, coefficients, new_x, min_
 #' \item{\code{st_rmse_seg}}{ the standardized RMSEs.}
 #' \item{\code{rsq_seg}}{ the coefficients of determination.}
 #' } 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -413,7 +400,6 @@ opls_cv_cpp <- function(X, Y, scale, method, mindices, pindices, min_component,
 #' \item{\code{Ycenter}}{ if matrix of predictors was scaled, the centering vector used for \code{Y}.}
 #' \item{\code{Yscale}}{ if matrix of predictors was scaled, the scaling vector used for \code{Y}.}
 #' }
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -434,7 +420,6 @@ gaussian_process <- function(X, Y, noisev = 0.001, scale = TRUE) {
 #' @param Ycenter if \code{center = TRUE} a matrix of one row with the values that must be used for accounting for the centering of the response variable.
 #' @param Yscale if \code{scale = TRUE} a matrix of one row with the values that must be used  for accounting for the scaling of the response variable.
 #' @return a matrix of predicted values
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -463,7 +448,6 @@ predict_gaussian_process <- function(Xz, alpha, newdata, scale, Xcenter, Xscale,
 #' \item{\code{st.rmse.seg}}{ the standardized RMSEs.}
 #' \item{\code{rsq.seg}}{ the coefficients of determination.}
 #' } 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
@@ -477,8 +461,8 @@ gaussian_process_cv <- function(X, Y, mindices, pindices, noisev = 0.001, scale
 #' @usage 
 #' pca_nipals(X, ncomp, center, scale,
 #'            maxiter, tol,
-#'            pcSelmethod = "cumvar",
-#'            pcSelvalue = 0.99)
+#'            pcSelmethod = "var",
+#'            pcSelvalue = 0.01)
 #' @param X a matrix of predictor variables.
 #' @param Y a matrix of either a single or multiple response variables.
 #' @param ncomp the number of pls components.
@@ -488,25 +472,24 @@ gaussian_process_cv <- function(X, Y, mindices, pindices, noisev = 0.001, scale
 #' @param pcSelmethod the method for selecting the number of components. 
 #' Options are: \code{'cumvar'} (for selecting the number of principal components based on a given 
 #' cumulative amount of explained variance) and \code{"var"} (for selecting the number of principal 
-#' components based on a given amount of explained variance). Default is \code{'cumvar'}
+#' components based on a given amount of explained variance). Default is \code{'var'}
 #' @param pcSelvalue a numerical value that complements the selected method (\code{pcSelmethod}). 
 #' If \code{"cumvar"} is chosen, it must be a value (larger than 0 and below 1) indicating the maximum 
 #' amount of cumulative variance that the retained components should explain. If \code{"var"} is chosen, 
 #' it must be a value (larger than 0 and below 1) indicating that components that explain (individually) 
 #' a variance lower than this threshold must be excluded. If \code{"manual"} is chosen, it must be a value 
-#' specifying the desired number of principal components to retain. Default is 0.99.
+#' specifying the desired number of principal components to retain. Default is 0.01.
 #' @return a list containing the following elements:
 #' \itemize{
 #' \item{\code{pc_scores}}{ a matrix of principal component scores.}
 #' \item{\code{pc_loadings}}{ a matrix of of principal component loadings.}
 #' \item{\code{variance}}{ a matrix of the variance of the principal components.} 
 #' \item{\code{scale}}{ a \code{list} conating two objects: \code{center} and \code{scale}, which correspond to the vectors used to center and scale the input matrix.} 
 #' } 
-#' @useDynLib resemble
 #' @author Leonardo Ramirez-Lopez
 #' @keywords internal 
 #' @useDynLib resemble
-pca_nipals <- function(X, ncomp, center, scale, maxiter, tol, pcSelmethod = "cumvar", pcSelvalue = 0.99) {
+pca_nipals <- function(X, ncomp, center, scale, maxiter, tol, pcSelmethod = "var", pcSelvalue = 0.01) {
     .Call('_resemble_pca_nipals', PACKAGE = 'resemble', X, ncomp, center, scale, maxiter, tol, pcSelmethod, pcSelvalue)
 }
 
@@ -524,6 +507,20 @@ which_min <- function(X) {
     .Call('_resemble_which_min', PACKAGE = 'resemble', X)
 }
 
+#' @title A function to compute indices of minimum values of a distance vector
+#' @description For internal use only
+#' @usage 
+#' which_min_vector(X)
+#' @param X a vector of distances 
+#' @return a vector of the indices of the nearest neighbors
+#' @details 
+#' Used internally to find the nearest neighbors. 
+#' It searches in lower (or upper) triangular matrix. Therefore this must be the format of the 
+#' input data. The piece of code int \code{len = (sqrt(X.size()*8+1)+1)/2} generated an error in CRAN
+#' since \code{sqrt} cannot be applied to integers.
+#' @keywords internal
+#' @useDynLib resemble
+#' @author Antoine Stevens 
 which_min_vector <- function(X) {
     .Call('_resemble_which_min_vector', PACKAGE = 'resemble', X)
 }

diff --git a/R/cor_diss.R b/R/cor_diss.R
@@ -21,17 +21,19 @@
 #' must be scaled. If \code{Xu} is provided the data is scaled on the basis
 #' of \mjeqn{Xr \cup Xu}{Xr U Xu}.
 #' @details
-#' The correlation dissimilarity \mjeqn{cd}{cd} between two observations
-#' \mjeqn{x_i}{x_i} and \mjeqn{x_j}{x_j} is computed as follows:
+#' The correlation dissimilarity \mjeqn{d}{d} between two observations
+#' \mjeqn{x_i}{x_i} and \mjeqn{x_j}{x_j} is based on the Perason's 
+#' correlation coefficient (\mjeqn{\rho}{\rho}) and it can be computed as 
+#' follows:
 #'
-#' \mjdeqn{cd(x_i, x_j) = \frac{1}{2}(1 - cor(x_i, x_j))}{cd(x_i, x_j) = 1/2 (1 - cor (x_i, x_j))}
+#' \mjdeqn{d(x_i, x_j) = \frac{1}{2}(1 - \rho(x_i, x_j))}{d(x_i, x_j) = 1/2 (1 - \rho(x_i, x_j))}
 #'
-#' The avobe formlula is used when \code{ws = NULL}.
+#' The above formula is used when \code{ws = NULL}.
 #' On the other hand (when \code{ws != NULL}) the moving correlation
-#' dissimilarity \mjeqn{mcd}{mcd} between two observations \mjeqn{x_i}{x_i} and \mjeqn{x_j}{x_j}
+#' dissimilarity between two observations \mjeqn{x_i}{x_i} and \mjeqn{x_j}{x_j}
 #' is computed as follows:
 #'
-#' \mjdeqn{mcd(x_i, x_j) = \frac{1}{2 ws}\sum_{k=1}^{p-ws}(1 - cor(x_{i,(k:k+ws)}, x_{j,(k:k+ws)}))}{mcd(x_i, x_j) = 1/(2 ws)\sum_(k=1)^{p-ws}(1 - cor(x_(i,k:k+ws), x_(j,k:k+ws)))}
+#' \mjdeqn{d(x_i, x_j; ws) = \frac{1}{2 ws}\sum_{k=1}^{p-ws}1 - \rho(x_{i,(k:k+ws)}, x_{j,(k:k+ws)})}{d(x_i, x_j) = 1/(2 ws)\sum_(k=1)^{p-ws}(1 - \rho(x_(i,k:k+ws), x_(j,k:k+ws)))}
 #'
 #' where \mjeqn{ws}{ws} represents a given window size which rolls sequentially
 #' from 1 up to \mjeqn{p - ws}{p - ws} and  \mjeqn{p}{p} is the number of
@@ -40,7 +42,7 @@
 #' The function does not accept input data containing missing values.
 #' @return
 #' a matrix of the computed dissimilarities.
-#' @author Antoine Stevens and Leonardo Ramirez-Lopez
+#' @author Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez}
 #' @examples
 #' \dontrun{
 #' library(prospectr)