documentation and gp fixes

lsteinmann · Nov 15, 2023 · 86219f9 · 86219f9
1 parent c59c09a
commit 86219f9
Show file tree

Hide file tree

Showing 20 changed files with 84 additions and 70 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,23 +1,18 @@
 Type: Package
 Package: datplot
-Title: Preparation of Object Dating Ranges for Density Plots
-    (Aoristic Analysis)
+Title: Preparation of Object Dating Ranges for Density Plots (Aoristic
+    Analysis)
 Version: 1.1.0
-Authors@R: 
-    c(person(given = "Lisa",
-             family = "Steinmann",
-             role = c("aut", "cre"),
-             email = "lisa.steinmann@rub.de",
-             comment = c(ORCID = "0000-0002-2215-1243")),
-      person(given = "Barbora",
-             family = "Weissova",
-             role = "ctb",
-             email = "barbora.weissova@rub.de",
-             comment = c(ORCID = "0000-0002-3297-6855")))
+Authors@R: c(
+    person("Lisa", "Steinmann", , "lisa.steinmann@rub.de", role = c("aut", "cre", "cph"),
+           comment = c(ORCID = "0000-0002-2215-1243")),
+    person("Barbora", "Weissova", , "barbora.weissova@rub.de", role = "ctb",
+           comment = c(ORCID = "0000-0002-3297-6855"))
+  )
 Maintainer: Lisa Steinmann <lisa.steinmann@rub.de>
-Description: Converting date ranges into dating 'steps' eases
-    the visualization of changes in e.g. pottery consumption, style and
-    other variables over time. This package provides tools to process and
+Description: Converting date ranges into dating 'steps' eases the
+    visualization of changes in e.g. pottery consumption, style and other
+    variables over time. This package provides tools to process and
     prepare data for visualization and employs the concept of aoristic
     analysis.
 License: GPL (>= 3)

diff --git a/NEWS.md b/NEWS.md
@@ -7,7 +7,7 @@ argument `calc = "weight"` or `calc = "probability"`
 probability calculation instead of the original (weights) calculation.
 * Change and improve error-handling of `scaleweight()`. 
 * Remove UTF-8 characters to comply with CRAN. 
-* Update documentation and add a pkgdown-site.
+* Update documentation and add a [pkgdown-site](https://lsteinmann.github.io/datplot/).
 
 
 # datplot 1.0.1

diff --git a/R/data.R b/R/data.R
@@ -79,4 +79,7 @@ NULL
 #' Quantitative Analysis.” Dissertation, Berlin: Freie Universität Berlin.
 #' \url{https://refubium.fu-berlin.de/handle/fub188/23730},
 #' partially after \url{https://inscriptions.packhum.org/}
+
 "Inscr_Bithynia"
+NULL
+
diff --git a/R/datplot_utility.R b/R/datplot_utility.R
@@ -148,7 +148,6 @@ get.probability <- function(DAT_min, DAT_max) {
 #' @export get.step.sequence
 #'
 #' @examples
-#' \dontrun{
 #' min_year <- -494
 #' max_year <- -334
 #' sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25)
@@ -158,7 +157,6 @@ get.probability <- function(DAT_min, DAT_max) {
 #' max_year <- 100
 #' sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25)
 #' print(sequence)
-#' }
 get.step.sequence <- function(datmin = 0, datmax = 100, stepsize = 25) {
 
   stopifnot(is.numeric(datmin))
@@ -245,10 +243,10 @@ create.sub.objects <- function(DAT_list,
 
 
   if (any(diffs < stepsize)) {
-    warning(paste("stepsize is larger than the range of the closest dated object at Index = ",
-                  paste(which(diffs < stepsize), collapse = ", "), "). ",
-                  "For information see documentation of get.step.sequence().",
-                  sep = ""))
+    warning(paste0("stepsize is larger than the range of the ",
+                   "closest dated object at Index = ",
+                   paste(which(diffs < stepsize), collapse = ", "), "). ",
+                   "For information see documentation of get.step.sequence()."))
   }
 
   DAT_list <- lapply(DAT_list, function(object) {
@@ -332,7 +330,7 @@ check.structure <- function(DAT_df) {
     }
   if (any(dat_df_structure[c("is.minDAT", "is.maxDAT")] == FALSE)) {
     result <- FALSE
-    stop("The 3rd or 4th columns of your data.frame are not numbers.")
+    stop("The 3rd and 4th columns of your data.frame have to be numeric.")
   } else {
     result <- TRUE
     }

diff --git a/R/datsteps.R b/R/datsteps.R
@@ -40,15 +40,13 @@
 #' Added columns contain the value of each step, the 'weight' or 'probability'-
 #' value for each step, and (if chosen) the cumulative probability.
 #'
+#' @export datsteps
+#'
 #' @examples
-#' \dontrun{
-#' data(DAT_df)
+#' data("Inscr_Bithynia")
+#' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
 #' DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
 #' plot(density(DAT_df_steps$DAT_step))
-#' }
-#'
-#'
-#' @export datsteps
 datsteps <- function(DAT_df,
                      stepsize = 1,
                      calc = "weight",
@@ -76,6 +74,14 @@ datsteps <- function(DAT_df,
          weight = message("Using 'weight'-calculation (see https://doi.org/10.1017/aap.2021.8)."),
          probability = message("Using step-wise probability calculation."))
 
+  if (any(is.na(DAT_df))) {
+    NA_rows <- c(which(is.na(DAT_df[, 3])),
+                 which(is.na(DAT_df[, 4])))
+    NA_rows <- unique(NA_rows)
+    DAT_df <- DAT_df[-NA_rows, ]
+    warning(paste0(length(NA_rows), " rows with NA-values in the ",
+                   "dating columns will be omitted."))
+  }
 
   DAT_df <- as.data.frame(DAT_df)
   # Checking the overall structure
@@ -88,7 +94,7 @@ datsteps <- function(DAT_df,
 
   # Prepare the Matrix to be used instead of the df for faster processing
   DAT_mat <- matrix(ncol = 5, nrow = nrow(DAT_df))
-  DAT_mat[, 1] <- 1:nrow(DAT_df)
+  DAT_mat[, 1] <- seq_len(nrow(DAT_df))
   DAT_mat[, 2] <- DAT_df[, 3]
   DAT_mat[, 3] <- DAT_df[, 4]
 

diff --git a/R/get_histogramscale.R b/R/get_histogramscale.R
@@ -14,16 +14,16 @@
 #' @return the value with which to scale the density curve to a histogram
 #' plot so that both will be visible
 #'
+#' @export get.histogramscale
+#'
 #' @examples
-#' \dontrun{
+#' data("Inscr_Bithynia")
+#' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
 #' DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
 #' get.histogramscale(DAT_df_steps)
 #'
 #' get.histogramscale(DAT_df_steps$DAT_step, binwidth = 20)
 #' get.histogramscale(500, binwidth = 20)
-#' }
-#'
-#' @export get.histogramscale
 get.histogramscale <- function(DAT_df_steps, binwidth = "stepsize") {
   if (check.number(DAT_df_steps) & length(DAT_df_steps) == 1) {
     nrow <- DAT_df_steps

diff --git a/R/scaleweight.R b/R/scaleweight.R
@@ -11,6 +11,12 @@
 #' @return the same data.frame, with the scaled values in the specified column
 #'
 #' @export scaleweight
+#'
+#' @examples
+#' data("Inscr_Bithynia")
+#' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")]
+#' DAT_df_steps <- datsteps(DAT_df, stepsize = 25)
+#' DAT_df_scaled <- scaleweight(DAT_df_steps, var = 2, val = 5)
 
 scaleweight <- function(DAT_df, var = c("all", 2), val = 5) {
 
@@ -46,7 +52,7 @@ scaleweight <- function(DAT_df, var = c("all", 2), val = 5) {
                       "(scaled to sum of all objects)")
   } else {
     uvar <- unique(DAT_df[, var])
-    for (row in 1:length(uvar)) {
+    for (row in seq_len(length(uvar))) {
       index <- which(DAT_df[, var] == uvar[row])
       DAT_df[index, val] <-  DAT_df[index, val] / sum(DAT_df[index, val])
     }

diff --git a/README.md b/README.md
@@ -1,10 +1,8 @@
 <!-- badges: start -->
-[![CRAN status](https://www.r-pkg.org/badges/version/datplot)](https://CRAN.R-project.org/package=datplot)
-[![R-CMD-check](https://github.com/lsteinmann/datplot/workflows/R-CMD-check/badge.svg)](https://github.com/lsteinmann/datplot/actions)
-[![codecov](https://app.codecov.io/gh/lsteinmann/datplot/branch/main/graph/badge.svg?token=CVNCAL9U4W)](https://app.codecov.io/gh/lsteinmann/datplot)
-
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4285912.svg)](https://doi.org/10.5281/zenodo.4285912)
+[![R-CMD-check](https://github.com/lsteinmann/datplot/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/lsteinmann/datplot/actions/workflows/R-CMD-check.yaml)
+[![codecov](https://codecov.io/gh/lsteinmann/datplot/branch/main/graph/badge.svg)](https://app.codecov.io/gh/lsteinmann/datplot)
 [![DOI](https://img.shields.io/badge/Publication-10.1017/aap.2021.8-green.svg)](https://doi.org/10.1017/aap.2021.8)
+[![CRAN status](https://www.r-pkg.org/badges/version/datplot)](https://CRAN.R-project.org/package=datplot)
 
 <!-- badges: end -->
 
@@ -19,9 +17,7 @@ This package proposes implements the concepts of aoristic analysis to prepare ar
 
     browseVignettes("datplot")
 
-after installing the package, or on GitHub in the /vignettes/ directory. Density plots are easy to understand and are usually aesthetically pleasing. They do omit a some information, such as individual counts, that bar histograms can communicate better. On the other hand, ranges can be incorporated into the visualization as well to regard the variety of timespans archaeological objects may be dated to.
-
-**Note:** Please note that the weight calculation has changed with version 1.0.1 to reflect true probabilities for each object when a stepsize of 1 is used. The change does not affect the visualization, but makes the weight-values usable as dating probability for steps of 1 year exactly. 
+or [on the pkgdown-site](https://lsteinmann.github.io/datplot/articles/how-to.html) after installing the package, or on GitHub in the /vignettes/ directory. Density plots are easy to understand and are usually aesthetically pleasing. They do omit a some information, such as individual counts, that bar histograms can communicate better. On the other hand, ranges can be incorporated into the visualization as well to regard the variety of timespans archaeological objects may be dated to.
 
 ![Attic Pottery from BAPD by Date](man/figures/demo_readme.png "Attic Pottery from BAPD by Date")
 
@@ -31,7 +27,7 @@ The package at version 1.0.0 has been published along with a case study on inscr
 
 Recommendation
 -------
-People interested in employing this method should also consider taking a look at [ISAAKiel's package aoristAAR](https://github.com/ISAAKiel/aoristAAR/), or at [archSeries](https://github.com/davidcorton/archSeries), [tabula](https://github.com/tesselle/tabula), [rtefact](https://github.com/ahb108/rtfact) and [aoristic-analysis (LimesLimits)](https://github.com/LimesLimits/aoristic-analysis). 
+People interested in employing this method should also consider taking a look at [ISAAKiel's package aoristAAR](https://github.com/ISAAKiel/aoristAAR/), or at [archSeries](https://github.com/davidcorton/archSeries), [tabula](https://github.com/tesselle/tabula), [rtefact](https://github.com/ahb108/rtfact), [aoristic-analysis (LimesLimits)](https://github.com/LimesLimits/aoristic-analysis) and (in the future) [baorista](https://github.com/ercrema/baorista). 
 
 
 Installation 

diff --git a/data-raw/Inscr_Bithynia.R b/data-raw/Inscr_Bithynia.R
@@ -29,8 +29,7 @@ inscriptions$URL[repl] <- paste("https://epigraphy.packhum.org/text/",
                                 gsub("PH", "", inscriptions$ikey[repl]),
                                 sep = "")
 
-
-inscriptions$ID <- paste("I_", 1:nrow(inscriptions), sep = "")
+inscriptions$ID <- paste("I_", seq_len(nrow(inscriptions)), sep = "")
 inscriptions <- inscriptions %>%
   rename(Dating = Chronological.Frame) %>%
   mutate(Language = replace(Language, Language == "Gr/Lat", "Greek/Latin"),

diff --git a/inst/literatur.bib b/inst/literatur.bib
@@ -10,10 +10,12 @@ @Www{BAPD
 @article{datplotarticle,
   title = {datplot: {{A}} new r-package for the visualization of date ranges in archaeology},
   author = {Weissova, Barbora and Steinmann, Lisa},
-  date = {forthcoming},
+  date = {2021},
   journaltitle = {Advances in Archaeological Practice},
-  volume = {tba},
-  pages = {tba},
+  volume = {9},
+  number = {7},
+  pages = {288-298},
+  doi = {10.1017/aap.2021.8}
 }
 
 

diff --git a/man/datplot-package.Rd b/man/datplot-package.Rd
diff --git a/man/datsteps.Rd b/man/datsteps.Rd
diff --git a/man/get.histogramscale.Rd b/man/get.histogramscale.Rd
diff --git a/man/get.step.sequence.Rd b/man/get.step.sequence.Rd
diff --git a/man/scaleweight.Rd b/man/scaleweight.Rd
diff --git a/tests/create_testing_df.R b/tests/create_testing_df.R
@@ -34,6 +34,6 @@ create.testing.df <- function(k = 100, distmean = 150, distsd = 25) {
   rm(test_df_two)
   test_df <- test_df[sample(nrow(test_df)), ]
 
-  test_df$id <- paste("ID_", 1:nrow(test_df), sep = "")
+  test_df$id <- paste("ID_", seq_len(nrow(test_df)), sep = "")
   return(test_df)
 }
diff --git a/tests/testthat/test-datplot_utility.R b/tests/testthat/test-datplot_utility.R
@@ -31,7 +31,7 @@ test_that("switch.dating issues warning", {
 testdf <- create.testing.df()
 
 fristlast <- matrix(nrow = nrow(testdf), ncol = 2)
-for (r in 1:nrow(testdf)) {
+for (r in seq_len(nrow(testdf))) {
   seq <- get.step.sequence(datmin = testdf[r, 3],
                     datmax = testdf[r, 4],
                     stepsize = 25)
@@ -89,7 +89,7 @@ testdf[, 4] <- sample(1:200, nrow(testdf))
 testdf[1, 3:4] <- c(4, 4)
 
 DAT_mat <- matrix(ncol = 5, nrow = nrow(testdf))
-DAT_mat[, 1] <- 1:nrow(testdf)
+DAT_mat[, 1] <- seq_len(nrow(testdf))
 DAT_mat[, 2] <- testdf[, 3]
 DAT_mat[, 3] <- testdf[, 4]
 colnames(DAT_mat) <- c("index", "datmin", "datmax", "weight", "step")

diff --git a/tests/testthat/test-datsteps.R b/tests/testthat/test-datsteps.R
@@ -11,6 +11,15 @@ test_that("warning for wrong column types", {
 })
 
 
+data("Inscr_Bithynia")
+
+test_that("removes NA with warning", {
+  expect_warning(datsteps(Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")],
+                          stepsize = 100), regexp = "NA")
+})
+
+
+
 data("DAT_df")
 
 test_that("warning for problematic value of stepsize", {

diff --git a/vignettes/data_preparation.Rmd b/vignettes/data_preparation.Rmd
@@ -55,7 +55,7 @@ The original file consists of five columns which each row representing a single
 The data set is not yet suited for analysis, as some variables, especially the chronological frame, have many inconsistencies. For further processing, we should also be sure to include an identifier-column. As `r nrow(inscriptions) - length(unique(inscriptions$ikey))` inscriptions do not have an ikey-Value, which might have otherwise been a good candidate for identification, we chose to create a new automatically generated ID, so that every inscription can be individually identifiable.
 
 ```{r }
-inscriptions$ID <- paste("I_", 1:nrow(inscriptions), sep = "")
+inscriptions$ID <- paste("I_", seq_len(nrow(inscriptions)), sep = "")
 ```
 
 Two of the variables of this data set are almost ready for further use, i.e. Location and Language. A look at their unique values reveals only small inconsistencies that can be easily fixed:
@@ -154,7 +154,7 @@ As a demonstration, this is the resulting table (`num_dating`) of up to this poi
 
 ```{r echo = FALSE}
 require(knitr)
-knitr::kable(na.omit(na.omit(num_dating)[sample(1:nrow(na.omit(num_dating)), 
+knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))), 
                                                 10), ]))
 ```
 
@@ -214,7 +214,7 @@ for (r in sel) {
 Another look at the data set can help us to check for possible errors.
 
 ```{r echo = FALSE}
-knitr::kable(na.omit(na.omit(num_dating)[sample(1:nrow(na.omit(num_dating)),
+knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))),
                                                 10), ]))
 ```
 
@@ -248,7 +248,7 @@ for (r in sel) {
 
 ```
 ```{r echo = FALSE}
-knitr::kable(na.omit(na.omit(num_dating)[sample(1:nrow(na.omit(num_dating)), 
+knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))), 
                                                 10), ]))
 ```
 

diff --git a/vignettes/how-to.Rmd b/vignettes/how-to.Rmd
@@ -46,7 +46,7 @@ library(datplot)
 data(Beazley)
 ```
 ```{r preptable, echo = FALSE}
-knitr::kable(Beazley[sample(1:nrow(Beazley), 10, replace = FALSE), ])
+knitr::kable(Beazley[sample(seq_len(nrow(Beazley)), 10, replace = FALSE), ])
 ```
 
 ## How to Display a Range?
@@ -206,7 +206,7 @@ This now opens the possibility to calculate the cumulative probability for each
 ```{r cumulative demo, fig.height = 10}
 data("Inscr_Bithynia")
 Inscr_Bithynia <- na.omit(Inscr_Bithynia[, c(1, 3, 8, 9)])
-Inscr_Bithynia <- Inscr_Bithynia[sample(1:nrow(Inscr_Bithynia), 5), ]
+Inscr_Bithynia <- Inscr_Bithynia[sample(seq_len(nrow(Inscr_Bithynia)), 5), ]
 Inscr_Bithynia_steps <- datsteps(Inscr_Bithynia, 
                                  stepsize = 1, 
                                  calc = "probability",