Skip to content

Commit

Permalink
Merge pull request #1426 from rstudio/add-delay-load-requests-registry
Browse files Browse the repository at this point in the history
  • Loading branch information
t-kalinowski authored Jul 26, 2023
2 parents cf16bb9 + 5b4ff7e commit 7adcbc1
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 97 deletions.
14 changes: 8 additions & 6 deletions R/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ py_module_available <- function(module) {
#' @export
py_discover_config <- function(required_module = NULL, use_environment = NULL) {

required_module <- required_module %||% .globals$delay_load_module
if (is.null(required_module) && length(.globals$delay_load_imports$module))
required_module <- .globals$delay_load_imports$module[[1L]]
if (!is.null(required_module))
required_module <- strsplit(required_module, ".", fixed = TRUE)[[1L]][[1L]]

Expand Down Expand Up @@ -242,9 +243,10 @@ py_discover_config <- function(required_module = NULL, use_environment = NULL) {

# look for any environment names supplied in a call like:
# import("bar", delayed = list(environment = "r-barlyr"))
use_environment <- use_environment %||% .globals$delay_load_environment
if(!is.null(use_environment)) {
python <- tryCatch(py_resolve(use_environment), error = identity)
for (envname in c(use_environment, .globals$delay_load_imports$environment)) {
if(is.na(envname))
next
python <- tryCatch(py_resolve(envname), error = identity)
if (!inherits(python, "error"))
return(python_config(
python, required_module,
Expand All @@ -265,8 +267,8 @@ py_discover_config <- function(required_module = NULL, use_environment = NULL) {

# look in virtual environments that have a required module derived name,
# e.g., given a call to import("bar"), look for an environment named "r-bar"
if (!is.null(required_module)) {
envname <- paste0("r-", required_module)
for(module in c(required_module, .globals$delay_load_imports$module)) {
envname <- paste0("r-", module)
python <- tryCatch(py_resolve(envname), error = identity)
if (!inherits(python, "error"))
return(python_config(
Expand Down
89 changes: 51 additions & 38 deletions R/import.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,57 +88,70 @@ import <- function(module, as = NULL, convert = TRUE, delay_load = FALSE) {
})
}

# resolve delay load
delay_load_environment <- NULL
delay_load_priority <- 0
delay_load_functions <- NULL
if (is.function(delay_load)) {
delay_load_functions <- list(on_load = delay_load)
delay_load <- TRUE
} else if (is.list(delay_load)) {
delay_load_environment <- delay_load$environment
delay_load_functions <- delay_load
if (!is.null(delay_load$priority))
delay_load_priority <- delay_load$priority
delay_load <- TRUE
}

# normal case (load immediately)
if (!delay_load || is_python_initialized()) {
if (isFALSE(delay_load) || is_python_initialized()) {

# ensure that python is initialized (pass top level module as
# a hint as to which version of python to choose)
ensure_python_initialized(required_module = module)

# import the module
py_module_import(module, convert = convert)
return(py_module_import(module, convert = convert))

}


# delay load case (wait until first access)
else {
if (is.null(.globals$delay_load_module) || (delay_load_priority > .globals$delay_load_priority)) {
.globals$delay_load_module <- module
.globals$delay_load_environment <- delay_load_environment # environment name, like "r-keras"
.globals$delay_load_priority <- delay_load_priority
}
module_proxy <- new.env(parent = emptyenv())
module_proxy$module <- module
module_proxy$convert <- convert
if (!is.null(delay_load_functions)) {

# `get_module()` can be a function that at runtime can resolve the name
# (length 1 character vector) of the actual module to import e.g., in
# keras, we can decide at run time if this should be "tensorflow.keras",
# "keras", or "keras_core" based on any env vars or versions installed.
module_proxy$get_module <- delay_load_functions$get_module
module_proxy$before_load <- delay_load_functions$before_load
module_proxy$on_load <- delay_load_functions$on_load
module_proxy$on_error <- delay_load_functions$on_error
}
attr(module_proxy, "class") <- c("python.builtin.module", "python.builtin.object")
module_proxy
register_delay_load_import(module, delay_load) ->
module_hooks

module_proxy <- new.env(parent = emptyenv())
module_proxy$module <- module
module_proxy$convert <- convert
if (!is.null(module_hooks)) {
# `get_module()` can be a function that at runtime can resolve the name
# (length 1 character vector) of the actual module to import e.g., in
# keras, we can decide at run time if this should be "tensorflow.keras",
# "keras", or "keras_core" based on any env vars or versions installed.
module_proxy$get_module <- module_hooks$get_module
module_proxy$before_load <- module_hooks$before_load
module_proxy$on_load <- module_hooks$on_load
module_proxy$on_error <- module_hooks$on_error
}

attr(module_proxy, "class") <- c("python.builtin.module", "python.builtin.object")
module_proxy
}


register_delay_load_import <- function(module, delay_load = NULL) {
spec <- list(module = module,
priority = 0L,
environment = NA_character_)
hooks <- NULL

if (is.function(delay_load)) {

hooks <- list(on_load = delay_load)

} else if (is.list(delay_load)) {

spec$priority <- delay_load$priority %||% 0L
spec$environment <- delay_load$environment %||% NA_character_
hooks <- delay_load

}

storage.mode(spec$priority) <- "integer"
storage.mode(spec$environment) <- "character"

df <- .globals$delay_load_imports
df <- rbind(df, spec, stringsAsFactors = FALSE)
df <- df[order(df$priority, decreasing = TRUE), ]
.globals$delay_load_imports <- df

hooks
}


Expand Down
28 changes: 13 additions & 15 deletions R/package.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ NULL
.globals$required_python_version <- NULL
.globals$use_python_versions <- c()
.globals$py_config <- NULL
.globals$delay_load_module <- NULL
.globals$delay_load_environment <- NULL
.globals$delay_load_priority <- 0
.globals$delay_load_imports <- data.frame(module = character(),
priority = integer(),
environment = character(),
stringsAsFactors = FALSE)
.globals$suppress_warnings_handlers <- list()
.globals$class_filters <- list(

Expand All @@ -46,23 +47,20 @@ ensure_python_initialized <- function(required_module = NULL) {
if (is_python_initialized())
return()

# give delay load modules priority
use_environment <- NULL
if (!is.null(.globals$delay_load_module)) {
required_module <- .globals$delay_load_module
use_environment <- .globals$delay_load_environment
.globals$delay_load_module <- NULL # one shot
.globals$delay_load_environment <- NULL
.globals$delay_load_priority <- 0
}

# notify front-end (if any) that Python is about to be initialized
callback <- getOption("reticulate.python.beforeInitialized")
if (is.function(callback))
callback()

# make sure this module is used for an environment name.
if(!is.null(required_module))
register_delay_load_import(required_module)

# perform initialization
.globals$py_config <- initialize_python(required_module, use_environment)
.globals$py_config <- initialize_python()

# clear the global list of delay_load requests
.globals$delay_load_imports <- NULL

# remap output streams to R output handlers
remap_output_streams()
Expand Down Expand Up @@ -117,7 +115,7 @@ initialize_python <- function(required_module = NULL, use_environment = NULL) {

# resolve top level module for search
if (!is.null(required_module))
required_module <- strsplit(required_module, ".", fixed = TRUE)[[1]][[1]]
required_module <- strsplit(required_module, ".", fixed = TRUE)[[1L]][[1L]]

# find configuration
config <- local({
Expand Down
17 changes: 7 additions & 10 deletions R/pyenv.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,8 @@ pyenv_find_impl <- function(install = TRUE) {

pyenv_install <- function(version, force, pyenv = NULL) {

pyenv <- normalizePath(
pyenv %||% pyenv_find(),
winslash = "/",
mustWork = TRUE
)
pyenv <- canonical_path(pyenv %||% pyenv_find())
stopifnot(file.exists(pyenv))

# set options
withr::local_envvar(PYTHON_CONFIGURE_OPTS = "--enable-shared")
Expand Down Expand Up @@ -258,7 +255,7 @@ pyenv_update <- function(pyenv = pyenv_find()) {
if (startsWith(pyenv, root <- pyenv_root())) {
# this pyenv installation is fully managed by reticulate
# root == where .../bin/pyenv lives
withr::with_dir(root, system2("git", "pull"))
withr::with_dir(root, system2("git", "pull", stdout = FALSE, stderr = FALSE))
}

if (is_windows())
Expand All @@ -271,10 +268,10 @@ pyenv_update <- function(pyenv = pyenv_find()) {
system2("git", c("clone", "https://github.com/pyenv/pyenv-update.git",
file.path(root, "plugins/pyenv-update")))

result <- system2t(pyenv, "update", stdout = TRUE, stderr = TRUE)
if (result != 0L) {
fmt <- "Error creating conda environment [exit code %i]"
stopf(fmt, result)
result <- system2t(pyenv, "update", stdout = FALSE, stderr = FALSE)
if (!identical(result, 0L)) {
fmt <- "Error updating pyenv [exit code %i]"
warningf(fmt, result)
}

}
Expand Down
4 changes: 1 addition & 3 deletions R/python.R
Original file line number Diff line number Diff line change
Expand Up @@ -1523,9 +1523,7 @@ py_resolve_module_proxy <- function(proxy) {
py_module_proxy_import(proxy)

# clear the global tracking of delay load modules
.globals$delay_load_module <- NULL
.globals$delay_load_environment <- NULL
.globals$delay_load_priority <- 0
.globals$delay_load_imports <- NULL

# call on_load if provided
if (is.function(on_load))
Expand Down
8 changes: 5 additions & 3 deletions R/virtualenv.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,8 @@ is_virtualenv <- function(dir) {
virtualenv_starter <- function(version = NULL, all = FALSE) {

starters <- data.frame(version = numeric_version(character()),
path = character())
path = character(),
stringsAsFactors = FALSE)

find_starters <- function(glob) {
# accept NULL, NA, and "" as a no-op
Expand Down Expand Up @@ -507,11 +508,12 @@ virtualenv_starter <- function(version = NULL, all = FALSE) {
return(NA_character_)
substr(v, 8L, 999L)
}, error = function(e) NA_character_), ""), strict = FALSE)
df <- data.frame(version = v, path = p, row.names = NULL)
df <- data.frame(version = v, path = p,
row.names = NULL, stringsAsFactors = FALSE)
df <- df[!is.na(df$version), ]
df <- df[order(df$version, decreasing = TRUE), ]

df <- rbind(starters, df)
df <- rbind(starters, df, stringsAsFactors = FALSE)
df <- df[!duplicated(df$path), ]
if(is_windows()) {
# on windows, removed dups of the same python in the same directory,
Expand Down
2 changes: 1 addition & 1 deletion R/wrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ py_function_custom_scaffold <- function(
write_line("#' ")

# Write docstrings for each parameters
for(i in 1:length(docs$parameters)) {
for (i in 1:length(docs$parameters)) {
param_name <- names(docs$parameters)[i]
param_doc <- process_param_doc_fn(docs$parameters[[param_name]], docs)
write_line(paste0("#' @param ", " ", names(docs$parameters)[i], " ", param_doc))
Expand Down
14 changes: 7 additions & 7 deletions tests/testthat/test-delay-load.R
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
context("delay-load")

test_that("imported module can be customized via delay_load", {

# ensure RETICULATE_PYTHON is set for sub-process so that
# the expected version of Python is loaded
config <- py_config()
withr::local_envvar(RETICULATE_PYTHON = config$python)

# run in a separate process, since we want the attempted module
# load to trigger initialization of Python and so have get_module
# handled specially
result <- callr::r(function() {

sys <- reticulate::import(
"invalid_module_name",
delay_load = list(get_module = function() { "sys" })
)

as.character(sys$byteorder)

})

# validate expected result
expect_true(result %in% c("little", "big"))

})
2 changes: 1 addition & 1 deletion tests/testthat/test-python-dict.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ df_dict = {i[0] if isinstance(i, tuple) else i: df for i, df in random_df.groupb
rdf_dict <- py$df_dict
lapply(rdf_list, expect_s3_class, "data.frame")

for(i in seq_along(rdf_dict)) {
for (i in seq_along(rdf_dict)) {
attr(rdf_dict[[i]], "pandas.index") <- NULL
attr(rdf_list[[i]], "pandas.index") <- NULL
}
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-python-exceptions.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ test_that("py_last_error() returns the R error condition object", {

e <- tryCatch( fn(), error = function(e) e )

for(cls in c("python.builtin.Exception",
"python.builtin.BaseException",
"python.builtin.object",
"error", "condition"))
for (cls in c("python.builtin.Exception",
"python.builtin.BaseException",
"python.builtin.object",
"error", "condition"))
expect_s3_class(e, cls)

expect_identical(conditionMessage(e), e$message)
Expand Down
22 changes: 13 additions & 9 deletions vignettes/python_dependencies.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -103,20 +103,24 @@ slightly more active role in managing their python environments.
However, this can be as simple as supplying a unique environment name.

The most straightforward approach is for users to create a dedicated
python environment for a specific project. For example, a user can can
do something like:
python environment for a specific project. For example, a user can
create a virtual environment in the project directory, like this:

``` r
envname <- "~/my/project/venv"
envname <- "./venv"
tensorflow::install_tensorflow(envname = envname)
sparklyr::install_sparklyr(envname = envname)
pysparklyr::install_pyspark(envname = envname)
use_virtualenv(envname)
```

The user could then place a .Renviron or .Rprofile file in the project
directory, ensuring that reticulate will use always use the python
configured for that project. For example, an .Renviron file in the
project directory could contain:
As described in the [Order of Python
Discovery](versions.html) guide, reticulate will
automatically discover and use a Python virtual environment in the
current working directory like this. Alternatively, if the environment
exists outside the project directory, the user could then place a
.Renviron or .Rprofile file in the project directory, ensuring that
reticulate will use always use the python configured for that project.
For example, an .Renviron file in the project directory could contain:

```
RETICULATE_PYTHON_ENV=~/my/project/venv
Expand Down Expand Up @@ -148,7 +152,7 @@ default environment. For example, installing spark into the default

``` r
tensorflow::install_tensorflow() # creates an "r-tensorflow" env
sparklyr::install_spark(envname = "r-tensorflow")
pysparklyr::install_pyspark(envname = "r-tensorflow")
```

This approach---exporting a installation helper function that defaults
Expand Down

0 comments on commit 7adcbc1

Please sign in to comment.