update layer_embedding()

rstudio · Jul 11, 2023 · b2c3a32 · b2c3a32
1 parent 16b6ac0
commit b2c3a32
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 98 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -2,7 +2,9 @@
 
 - Default TF version installed by `install_keras()` is now 2.13.
 
-- `layer_batch_normalization()` updated signature, with changes to options for distributed training.
+- Updated layers:
+  - `layer_batch_normalization()` updated signature, with changes to options for distributed training.
+  - `layer_embedding()` gains a `sparse` argument.
 
 - Fixed deadlock when an R generator was passed to `fit()`, `predict()`, and other endpoints.
 

diff --git a/R/layers-embedding.R b/R/layers-embedding.R
@@ -1,56 +1,72 @@
 
 
-#' Turns positive integers (indexes) into dense vectors of fixed size.
-#'
-#' For example, `list(4L, 20L) -> list(c(0.25, 0.1), c(0.6, -0.2))` This layer
-#' can only be used as the first layer in a model.
-#'
-#' @inheritParams layer_dense
-#'
-#' @param input_dim int > 0. Size of the vocabulary, i.e. maximum integer
-#'   index + 1.
-#' @param output_dim int >= 0. Dimension of the dense embedding.
-#' @param embeddings_initializer Initializer for the `embeddings` matrix.
-#' @param embeddings_regularizer Regularizer function applied to the
-#'   `embeddings` matrix.
-#' @param activity_regularizer activity_regularizer
-#' @param embeddings_constraint Constraint function applied to the `embeddings`
-#'   matrix.
-#' @param mask_zero Whether or not the input value 0 is a special "padding"
-#'   value that should be masked out. This is useful when using recurrent
-#'   layers, which may take variable length inputs. If this is `TRUE` then all
-#'   subsequent layers in the model need to support masking or an exception will
-#'   be raised. If mask_zero is set to TRUE, as a consequence, index 0 cannot be
-#'   used in the vocabulary (input_dim should equal size of vocabulary + 1).
-#' @param input_length Length of input sequences, when it is constant. This
-#'   argument is required if you are going to connect `Flatten` then `Dense`
-#'   layers upstream (without it, the shape of the dense outputs cannot be
-#'   computed).
+#' Turns positive integers (indexes) into dense vectors of fixed size
+#'
+#' @details
+#' For example, `list(4L, 20L) -> list(c(0.25, 0.1), c(0.6, -0.2))`.
+#'
+#' This layer can only be used on positive integer inputs of a fixed range. The
+#' `layer_text_vectorization()`, `layer_string_lookup()`,
+#' and `layer_integer_lookup()` preprocessing layers can help prepare
+#' inputs for an `Embedding` layer.
+#'
+#' This layer accepts `tf.Tensor`, `tf.RaggedTensor` and `tf.SparseTensor`
+#' input.
+#'
+#' @param input_dim Integer. Size of the vocabulary,
+#' i.e. maximum integer index + 1.
+#'
+#' @param output_dim Integer. Dimension of the dense embedding.
+#'
+#' @param embeddings_initializer Initializer for the `embeddings`
+#' matrix (see `keras.initializers`).
+#'
+#' @param embeddings_regularizer Regularizer function applied to
+#' the `embeddings` matrix (see `keras.regularizers`).
+#'
+#' @param embeddings_constraint Constraint function applied to
+#' the `embeddings` matrix (see `keras.constraints`).
+#'
+#' @param mask_zero Boolean, whether or not the input value 0 is a special
+#' "padding" value that should be masked out. This is useful when using
+#' recurrent layers which may take variable length input. If this is
+#' `TRUE`, then all subsequent layers in the model need to support masking
+#' or an exception will be raised. If mask_zero is set to TRUE, as a
+#' consequence, index 0 cannot be used in the vocabulary (input_dim should
+#' equal size of vocabulary + 1).
+#'
+#' @param input_length Length of input sequences, when it is constant.
+#' This argument is required if you are going to connect
+#' `Flatten` then `Dense` layers upstream
+#' (without it, the shape of the dense outputs cannot be computed).
+#'
+#' @param sparse If TRUE, calling this layer returns a `tf.SparseTensor`. If FALSE,
+#' the layer returns a dense `tf.Tensor`. For an entry with no features in
+#' a sparse tensor (entry with value 0), the embedding vector of index 0 is
+#' returned by default.
+#' @param ... standard layer arguments.
 #'
 #' @section Input shape: 2D tensor with shape: `(batch_size, sequence_length)`.
 #'
 #' @section Output shape: 3D tensor with shape: `(batch_size, sequence_length,
 #'   output_dim)`.
 #'
-#' @section References:
-#' - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287)
-#'
+#' @seealso
+#'   +  <https://www.tensorflow.org/versions/r2.13/api_docs/python/keras/src/layers/core/embedding/Embedding>
+#'   +  <https://keras.io/api/layers>
 #' @export
-layer_embedding <- function(object, input_dim, output_dim, embeddings_initializer = "uniform", embeddings_regularizer = NULL,
-                            activity_regularizer = NULL, embeddings_constraint = NULL, mask_zero = FALSE, input_length = NULL,
-                            batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) {
-  create_layer(keras$layers$Embedding, object, list(
-    input_dim = as.integer(input_dim),
-    output_dim = as.integer(output_dim),
-    embeddings_initializer = embeddings_initializer,
-    embeddings_regularizer = embeddings_regularizer,
-    activity_regularizer = activity_regularizer,
-    embeddings_constraint = embeddings_constraint,
-    mask_zero = mask_zero,
-    input_length = if (!is.null(input_length)) as.integer(input_length) else NULL,
-    batch_size = as_nullable_integer(batch_size),
-    name = name,
-    trainable = trainable,
-    weights = weights
-  ))
-}
+layer_embedding <-
+  function(object, input_dim, output_dim, embeddings_initializer = "uniform",
+           embeddings_regularizer = NULL, activity_regularizer = NULL,
+           embeddings_constraint = NULL, mask_zero = FALSE, input_length = NULL,
+           sparse = FALSE, ...)
+  {
+    args <- capture_args(match.call(), list(
+      input_dim = as.integer,
+      output_dim = as.integer,
+      input_length = as_nullable_integer,
+      batch_size = as_nullable_integer,
+    ), ignore = "object")
+    create_layer(keras$layers$Embedding, object, args)
+  }
+
diff --git a/man/layer_embedding.Rd b/man/layer_embedding.Rd
diff --git a/tools/find-api-diffs.R b/tools/find-api-diffs.R
@@ -4,6 +4,7 @@ library(dplyr, warn.conflicts = FALSE)
 library(reticulate)
 library(envir)
 
+use_virtualenv("r-keras")
 # keras::install_keras(envname = "tf-2.6-cpu")
 # tools/setup-test-envs.R
 # use_miniconda("tf-2.6-cpu", required=TRUE)
@@ -20,7 +21,7 @@ py_to_r_python.builtin.dict_items <- function(x) {
 attach_eval({
   inspect <- reticulate::import("inspect")
 
-  # import_from(magrittr, `%<>%`)
+  import_from(magrittr, `%<>%`)
 
   `%error%` <- function(x, y) tryCatch(x, error = function(e) y)
 

diff --git a/tools/make-layer-wrapper.R b/tools/make-layer-wrapper.R
@@ -2,7 +2,7 @@
 library(tidyverse)
 library(tensorflow)
 library(keras)
-
+use_virtualenv("r-keras")
 stopifnot(interactive())
 inspect <- reticulate::import("inspect")
 
@@ -161,6 +161,8 @@ print.r_py_wrapper2 <- function(x, ...) {
 
 
 
+new_layer_wrapper(keras$layers$Embedding)
+
 new_layer_wrapper(keras$layers$BatchNormalization)
 
 ## example usage: