diff --git a/.tether/man/InputLayer.txt b/.tether/man/InputLayer.txt index f62ae2855..e7928a611 100644 --- a/.tether/man/InputLayer.txt +++ b/.tether/man/InputLayer.txt @@ -41,6 +41,8 @@ class InputLayer(keras.src.layers.layer.Layer) | ---------------------------------------------------------------------- | Readonly properties defined here: | + | batch_shape + | | dtype | Alias of `layer.variable_dtype`. | diff --git a/.tether/man/Layer.txt b/.tether/man/Layer.txt index 24e727604..d8fedcd10 100644 --- a/.tether/man/Layer.txt +++ b/.tether/man/Layer.txt @@ -173,6 +173,9 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation. | ) | Call self as a function. | + | __delattr__(self, name) + | Implement delattr(self, name). + | | __init__( | self, | *, @@ -211,7 +214,11 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation. | return x | ``` | - | add_metric(self) + | add_metric( + | self, + | *args, + | **kwargs + | ) | | add_variable( | self, @@ -347,7 +354,17 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation. | Args: | store: Dict from which the state of the model will be loaded. | - | quantize(self, mode) + | quantize( + | self, + | mode, + | type_check=True + | ) + | + | quantized_build( + | self, + | input_shape, + | mode + | ) | | quantized_call( | self, diff --git a/.tether/man/Loss.txt b/.tether/man/Loss.txt index 59c7e8e8a..39e95949c 100644 --- a/.tether/man/Loss.txt +++ b/.tether/man/Loss.txt @@ -5,6 +5,17 @@ class Loss(keras.src.saving.keras_saveable.KerasSaveable) | | Loss base class. | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | dtype: The dtype of the loss's computations. Defaults to `None`, which + | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a + | `"float32"` unless set to different value + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. + | | To be implemented by subclasses: | | * `call()`: Contains the logic for loss calculation using `y_true`, @@ -54,4 +65,9 @@ class Loss(keras.src.saving.keras_saveable.KerasSaveable) | | from_config(config) | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | dtype + | diff --git a/.tether/man/Metric.txt b/.tether/man/Metric.txt index 94da6fd64..48cbbc716 100644 --- a/.tether/man/Metric.txt +++ b/.tether/man/Metric.txt @@ -6,8 +6,12 @@ class Metric(keras.src.saving.keras_saveable.KerasSaveable) | Encapsulates metric logic and state. | | Args: - | name: (Optional) string name of the metric instance. - | dtype: (Optional) data type of the metric result. + | name: Optional name for the metric instance. + | dtype: The dtype of the metric's computations. Defaults to `None`, which + | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a + | `"float32"` unless set to different value + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Example: | diff --git a/.tether/man/audio_dataset_from_directory.txt b/.tether/man/audio_dataset_from_directory.txt index 98c07b8ac..5d3a175a9 100644 --- a/.tether/man/audio_dataset_from_directory.txt +++ b/.tether/man/audio_dataset_from_directory.txt @@ -73,8 +73,9 @@ Args: length of the longest sequence in the batch. ragged: Whether to return a Ragged dataset (where each sequence has its own length). Defaults to `False`. - shuffle: Whether to shuffle the data. Defaults to `True`. + shuffle: Whether to shuffle the data. If set to `False`, sorts the data in alphanumeric order. + Defaults to `True`. seed: Optional random seed for shuffling and transformations. validation_split: Optional float between 0 and 1, fraction of data to reserve for validation. diff --git a/.tether/man/callback_backup_and_restore.txt b/.tether/man/callback_backup_and_restore.txt index b5731ce4c..c0ac7c369 100644 --- a/.tether/man/callback_backup_and_restore.txt +++ b/.tether/man/callback_backup_and_restore.txt @@ -59,11 +59,11 @@ class BackupAndRestore(keras.src.callbacks.callback.Callback) | When set to an integer, the callback saves the checkpoint every | `save_freq` batches. Set `save_freq=False` only if using | preemption checkpointing (i.e. with `save_before_preemption=True`). - | delete_checkpoint: Boolean, defaults to `True`. This `BackupAndRestore` + | delete_checkpoint: Boolean. This `BackupAndRestore` | callback works by saving a checkpoint to back up the training state. | If `delete_checkpoint=True`, the checkpoint will be deleted after | training is finished. Use `False` if you'd like to keep the checkpoint - | for future usage. + | for future usage. Defaults to `True`. | | Method resolution order: | BackupAndRestore diff --git a/.tether/man/image_dataset_from_directory.txt b/.tether/man/image_dataset_from_directory.txt index 25a014477..a3c3880f8 100644 --- a/.tether/man/image_dataset_from_directory.txt +++ b/.tether/man/image_dataset_from_directory.txt @@ -68,15 +68,15 @@ Args: (must match names of subdirectories). Used to control the order of the classes (otherwise alphanumerical order is used). color_mode: One of `"grayscale"`, `"rgb"`, `"rgba"`. - Defaults to `"rgb"`. Whether the images will be converted to - have 1, 3, or 4 channels. + Whether the images will be converted to + have 1, 3, or 4 channels. Defaults to `"rgb"`. batch_size: Size of the batches of data. Defaults to 32. If `None`, the data will not be batched (the dataset will yield individual samples). image_size: Size to resize images to after they are read from disk, - specified as `(height, width)`. Defaults to `(256, 256)`. + specified as `(height, width)`. Since the pipeline processes batches of images that must all have - the same size, this must be provided. + the same size, this must be provided. Defaults to `(256, 256)`. shuffle: Whether to shuffle the data. Defaults to `True`. If set to `False`, sorts the data in alphanumeric order. seed: Optional random seed for shuffling and transformations. @@ -88,9 +88,10 @@ Args: When `subset="both"`, the utility returns a tuple of two datasets (the training and validation datasets respectively). interpolation: String, the interpolation method used when - resizing images. Defaults to `"bilinear"`. + resizing images. Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + Defaults to `"bilinear"`. follow_links: Whether to visit subdirectories pointed to by symlinks. Defaults to `False`. crop_to_aspect_ratio: If `True`, resize the images without aspect diff --git a/.tether/man/image_smart_resize.txt b/.tether/man/image_smart_resize.txt index ac7abf28d..f852fbc20 100644 --- a/.tether/man/image_smart_resize.txt +++ b/.tether/man/image_smart_resize.txt @@ -56,9 +56,9 @@ Args: or `(batch_size, height, width, channels)`. size: Tuple of `(height, width)` integer. Target size. interpolation: String, interpolation to use for resizing. - Defaults to `'bilinear'`. - Supports `bilinear`, `nearest`, `bicubic`, - `lanczos3`, `lanczos5`. + Supports `"bilinear"`, `"nearest"`, `"bicubic"`, + `"lanczos3"`, `"lanczos5"`. + Defaults to `"bilinear"`. data_format: `"channels_last"` or `"channels_first"`. backend_module: Backend module to use (if different from the default backend). @@ -68,3 +68,4 @@ Returns: If the input image was a NumPy array, the output is a NumPy array, and if it was a backend-native tensor, the output is a backend-native tensor. + diff --git a/.tether/man/keras.distribution.txt b/.tether/man/keras.distribution.txt index 1bbdc17d7..ee66b1b01 100644 --- a/.tether/man/keras.distribution.txt +++ b/.tether/man/keras.distribution.txt @@ -1,4 +1,8 @@ -DataParallel(device_mesh=None, devices=None) +DataParallel( + device_mesh=None, + devices=None, + auto_shard_dataset=True +) DeviceMesh( shape, axis_names, diff --git a/.tether/man/keras.layers.txt b/.tether/man/keras.layers.txt index 1d423974a..64c549eec 100644 --- a/.tether/man/keras.layers.txt +++ b/.tether/man/keras.layers.txt @@ -1247,7 +1247,11 @@ UpSampling3D( **kwargs ) Wrapper(layer, **kwargs) -ZeroPadding1D(padding=1, **kwargs) +ZeroPadding1D( + padding=1, + data_format=None, + **kwargs +) ZeroPadding2D( padding=(1, 1), data_format=None, diff --git a/.tether/man/keras.models.txt b/.tether/man/keras.models.txt index 5bd9724e9..346b916d0 100644 --- a/.tether/man/keras.models.txt +++ b/.tether/man/keras.models.txt @@ -18,7 +18,7 @@ save_model( model, filepath, overwrite=True, - zipped=True, + zipped=None, **kwargs ) Sequential(*args, **kwargs) diff --git a/.tether/man/keras.ops.txt b/.tether/man/keras.ops.txt index 6c4ad3a8b..42563eba4 100644 --- a/.tether/man/keras.ops.txt +++ b/.tether/man/keras.ops.txt @@ -56,6 +56,12 @@ argpartition( ) argsort(x, axis=-1) array(x, dtype=None) +associative_scan( + f, + elems, + reverse=False, + axis=0 +) average( x, axis=None, @@ -480,6 +486,11 @@ scatter_update( indices, updates ) +searchsorted( + sorted_sequence, + values, + side='left' +) segment_max( data, segment_ids, diff --git a/.tether/man/keras.optimizers.txt b/.tether/man/keras.optimizers.txt index 66429c020..047b7c298 100644 --- a/.tether/man/keras.optimizers.txt +++ b/.tether/man/keras.optimizers.txt @@ -124,6 +124,23 @@ Ftrl( **kwargs ) get(identifier) +Lamb( + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-07, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='lamb', + **kwargs +) legacy: Module(keras.api.optimizers.legacy) Lion( learning_rate=0.001, diff --git a/.tether/man/keras.quantizers.txt b/.tether/man/keras.quantizers.txt index 603b088a5..33f6d7d2a 100644 --- a/.tether/man/keras.quantizers.txt +++ b/.tether/man/keras.quantizers.txt @@ -3,7 +3,8 @@ abs_max_quantize( axis, value_range=(-127, 127), dtype='int8', - epsilon=1e-07 + epsilon=1e-07, + to_numpy=False ) AbsMaxQuantizer( axis, diff --git a/.tether/man/keras.saving.txt b/.tether/man/keras.saving.txt index 52b1d7e88..509163e93 100644 --- a/.tether/man/keras.saving.txt +++ b/.tether/man/keras.saving.txt @@ -30,7 +30,7 @@ save_model( model, filepath, overwrite=True, - zipped=True, + zipped=None, **kwargs ) save_weights( diff --git a/.tether/man/keras.txt b/.tether/man/keras.txt index 7f45bc21e..ab50bf563 100644 --- a/.tether/man/keras.txt +++ b/.tether/man/keras.txt @@ -78,6 +78,7 @@ StatelessScope( collect_losses=False, initialize_variables=True ) +SymbolicScope() tree: Module(keras.api.tree) utils: Module(keras.api.utils) Variable( diff --git a/.tether/man/keras_input.txt b/.tether/man/keras_input.txt index b957fd208..ca6840cf5 100644 --- a/.tether/man/keras_input.txt +++ b/.tether/man/keras_input.txt @@ -35,6 +35,8 @@ Args: be passed into the input - they will be densified with a default value of 0. This feature is only supported with the TensorFlow backend. Defaults to `False`. + batch_shape: Optional shape tuple (tuple of integers or `None` objects), + including the batch size. name: Optional name string for the layer. Should be unique in a model (do not reuse the same name twice). It will be autogenerated if it isn't provided. diff --git a/.tether/man/keras_model.txt b/.tether/man/keras_model.txt index 5a7bcd42e..f2d49a892 100644 --- a/.tether/man/keras_model.txt +++ b/.tether/man/keras_model.txt @@ -231,7 +231,11 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr | there is a mismatch in the number of weights, or a mismatch in | the shape of the weights. | - | quantize(self, mode) + | quantize( + | self, + | mode, + | **kwargs + | ) | Quantize the weights of the model. | | Note that the model must be built first before calling this method. @@ -246,7 +250,7 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr | self, | filepath, | overwrite=True, - | zipped=True, + | zipped=None, | **kwargs | ) | Saves a model as a `.keras` file. @@ -260,7 +264,9 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr | the target location, or instead ask the user via | an interactive prompt. | zipped: Whether to save the model as a zipped `.keras` - | archive (default), or as an unzipped directory. + | archive (default when saving locally), or as an + | unzipped directory (default when saving on the + | Hugging Face Hub). | | Example: | diff --git a/.tether/man/layer_conv_2d.txt b/.tether/man/layer_conv_2d.txt index fe2d5459b..50df9909d 100644 --- a/.tether/man/layer_conv_2d.txt +++ b/.tether/man/layer_conv_2d.txt @@ -6,10 +6,10 @@ class Conv2D(keras.src.layers.convolutional.base_conv.BaseConv) | 2D convolution layer. | | This layer creates a convolution kernel that is convolved with the layer - | input over a single spatial (or temporal) dimension to produce a tensor of - | outputs. If `use_bias` is True, a bias vector is created and added to the - | outputs. Finally, if `activation` is not `None`, it is applied to the - | outputs as well. + | input over a 2D spatial (or temporal) dimension (height and width) to + | produce a tensor of outputs. If `use_bias` is True, a bias vector is created + | and added to the outputs. Finally, if `activation` is not `None`, it is + | applied to the outputs as well. | | Args: | filters: int, the dimension of the output space (the number of filters diff --git a/.tether/man/layer_conv_3d.txt b/.tether/man/layer_conv_3d.txt index bb763e506..b61e11469 100644 --- a/.tether/man/layer_conv_3d.txt +++ b/.tether/man/layer_conv_3d.txt @@ -6,10 +6,10 @@ class Conv3D(keras.src.layers.convolutional.base_conv.BaseConv) | 3D convolution layer. | | This layer creates a convolution kernel that is convolved with the layer - | input over a single spatial (or temporal) dimension to produce a tensor of - | outputs. If `use_bias` is True, a bias vector is created and added to the - | outputs. Finally, if `activation` is not `None`, it is applied to the - | outputs as well. + | input over a 3D spatial (or temporal) dimension (width,height and depth) to + | produce a tensor of outputs. If `use_bias` is True, a bias vector is created + | and added to the outputs. Finally, if `activation` is not `None`, it is + | applied to the outputs as well. | | Args: | filters: int, the dimension of the output space (the number of filters diff --git a/.tether/man/layer_dense.txt b/.tether/man/layer_dense.txt index c89a7ca89..b5d9b7b12 100644 --- a/.tether/man/layer_dense.txt +++ b/.tether/man/layer_dense.txt @@ -119,7 +119,11 @@ class Dense(keras.src.layers.layer.Layer) | Args: | store: Dict from which the state of the model will be loaded. | - | quantize(self, mode) + | quantize( + | self, + | mode, + | type_check=True + | ) | | quantized_build( | self, @@ -127,12 +131,6 @@ class Dense(keras.src.layers.layer.Layer) | mode | ) | - | quantized_call( - | self, - | inputs, - | training=None - | ) - | | save_own_variables(self, store) | Saves the state of the layer. | diff --git a/.tether/man/layer_einsum_dense.txt b/.tether/man/layer_einsum_dense.txt index afa72681d..178c0c90b 100644 --- a/.tether/man/layer_einsum_dense.txt +++ b/.tether/man/layer_einsum_dense.txt @@ -158,7 +158,11 @@ class EinsumDense(keras.src.layers.layer.Layer) | Args: | store: Dict from which the state of the model will be loaded. | - | quantize(self, mode) + | quantize( + | self, + | mode, + | type_check=True + | ) | | quantized_build( | self, @@ -166,12 +170,6 @@ class EinsumDense(keras.src.layers.layer.Layer) | mode | ) | - | quantized_call( - | self, - | inputs, - | training=None - | ) - | | save_own_variables(self, store) | Saves the state of the layer. | diff --git a/.tether/man/layer_embedding.txt b/.tether/man/layer_embedding.txt index 087ad2ad6..56b85642e 100644 --- a/.tether/man/layer_embedding.txt +++ b/.tether/man/layer_embedding.txt @@ -123,7 +123,11 @@ class Embedding(keras.src.layers.layer.Layer) | Args: | store: Dict from which the state of the model will be loaded. | - | quantize(self, mode) + | quantize( + | self, + | mode, + | type_check=True + | ) | | quantized_build( | self, @@ -131,7 +135,11 @@ class Embedding(keras.src.layers.layer.Layer) | mode | ) | - | quantized_call(self, inputs) + | quantized_call( + | self, + | *args, + | **kwargs + | ) | | save_own_variables(self, store) | Saves the state of the layer. diff --git a/.tether/man/layer_rescaling.txt b/.tether/man/layer_rescaling.txt index 8a0f1612a..5f3809581 100644 --- a/.tether/man/layer_rescaling.txt +++ b/.tether/man/layer_rescaling.txt @@ -60,4 +60,27 @@ class Rescaling(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) | An object config is a Python dictionary (serializable) | containing the information needed to re-instantiate it. | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) + | Creates an operation from its config. + | + | This method is the reverse of `get_config`, capable of instantiating the + | same operation from the config dictionary. + | + | Note: If you override this method, you might receive a serialized dtype + | config, which is a `dict`. You can deserialize it as follows: + | + | ```python + | if "dtype" in config and isinstance(config["dtype"], dict): + | policy = dtype_policies.deserialize(config["dtype"]) + | ``` + | + | Args: + | config: A Python dictionary, typically the output of `get_config`. + | + | Returns: + | An operation instance. + | diff --git a/.tether/man/layer_string_lookup.txt b/.tether/man/layer_string_lookup.txt index 8ec231384..7d95a9188 100644 --- a/.tether/man/layer_string_lookup.txt +++ b/.tether/man/layer_string_lookup.txt @@ -7,7 +7,7 @@ class StringLookup(keras.src.layers.preprocessing.index_lookup.IndexLookup) | | This layer translates a set of arbitrary strings into integer output via a | table-based vocabulary lookup. This layer will perform no splitting or - | transformation of input strings. For a layer than can split and tokenize + | transformation of input strings. For a layer that can split and tokenize | natural language, see the `keras.layers.TextVectorization` layer. | | The vocabulary for the layer must be either supplied on construction or diff --git a/.tether/man/layer_torch_module_wrapper.txt b/.tether/man/layer_torch_module_wrapper.txt index 1f837a6f9..81c7d22cb 100644 --- a/.tether/man/layer_torch_module_wrapper.txt +++ b/.tether/man/layer_torch_module_wrapper.txt @@ -94,6 +94,7 @@ class TorchModuleWrapper(keras.src.layers.layer.Layer) | call( | self, | *args, + | training=None, | **kwargs | ) | diff --git a/.tether/man/layer_zero_padding_1d.txt b/.tether/man/layer_zero_padding_1d.txt index 419f0ffb9..9bce50640 100644 --- a/.tether/man/layer_zero_padding_1d.txt +++ b/.tether/man/layer_zero_padding_1d.txt @@ -1,7 +1,7 @@ Help on class ZeroPadding1D in module keras.src.layers.reshaping.zero_padding1d: class ZeroPadding1D(keras.src.layers.layer.Layer) - | ZeroPadding1D(padding=1, **kwargs) + | ZeroPadding1D(padding=1, data_format=None, **kwargs) | | Zero-padding layer for 1D input (e.g. temporal sequence). | @@ -35,12 +35,29 @@ class ZeroPadding1D(keras.src.layers.layer.Layer) | the padding dimension (axis 1). | - If tuple of 2 ints: how many zeros to add at the beginning and the | end of the padding dimension (`(left_pad, right_pad)`). + | data_format: A string, one of `"channels_last"` (default) or + | `"channels_first"`. The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch_size, axis_to_pad, channels)` while `"channels_first"` + | corresponds to inputs with shape + | `(batch_size, channels, axis_to_pad)`. + | When unspecified, uses `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json` (if exists). Defaults to + | `"channels_last"`. | | Input shape: - | 3D tensor with shape `(batch_size, axis_to_pad, features)` + | 3D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, axis_to_pad, features)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, features, axis_to_pad)` | | Output shape: - | 3D tensor with shape `(batch_size, padded_axis, features)` + | 3D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, padded_axis, features)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, features, padded_axis)` | | Method resolution order: | ZeroPadding1D @@ -58,6 +75,7 @@ class ZeroPadding1D(keras.src.layers.layer.Layer) | __init__( | self, | padding=1, + | data_format=None, | **kwargs | ) | Initialize self. See help(type(self)) for accurate signature. diff --git a/.tether/man/loss_binary_crossentropy.txt b/.tether/man/loss_binary_crossentropy.txt index 2aefc4450..718ea8470 100644 --- a/.tether/man/loss_binary_crossentropy.txt +++ b/.tether/man/loss_binary_crossentropy.txt @@ -33,7 +33,8 @@ class BinaryCrossentropy(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Examples: | diff --git a/.tether/man/loss_binary_focal_crossentropy.txt b/.tether/man/loss_binary_focal_crossentropy.txt index cec70078a..f71617e14 100644 --- a/.tether/man/loss_binary_focal_crossentropy.txt +++ b/.tether/man/loss_binary_focal_crossentropy.txt @@ -51,7 +51,8 @@ class BinaryFocalCrossentropy(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Examples: | diff --git a/.tether/man/loss_categorical_crossentropy.txt b/.tether/man/loss_categorical_crossentropy.txt index b055df7a6..6aa605c0c 100644 --- a/.tether/man/loss_categorical_crossentropy.txt +++ b/.tether/man/loss_categorical_crossentropy.txt @@ -28,7 +28,8 @@ class CategoricalCrossentropy(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Examples: | diff --git a/.tether/man/loss_categorical_focal_crossentropy.txt b/.tether/man/loss_categorical_focal_crossentropy.txt index 5c11e40f8..c0cf66f5a 100644 --- a/.tether/man/loss_categorical_focal_crossentropy.txt +++ b/.tether/man/loss_categorical_focal_crossentropy.txt @@ -69,7 +69,8 @@ class CategoricalFocalCrossentropy(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Examples: | diff --git a/.tether/man/loss_categorical_hinge.txt b/.tether/man/loss_categorical_hinge.txt index 43c8c0789..f6ecb64cc 100644 --- a/.tether/man/loss_categorical_hinge.txt +++ b/.tether/man/loss_categorical_hinge.txt @@ -21,7 +21,8 @@ class CategoricalHinge(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | CategoricalHinge diff --git a/.tether/man/loss_cosine_similarity.txt b/.tether/man/loss_cosine_similarity.txt index 3bfd68b62..05caf72d8 100644 --- a/.tether/man/loss_cosine_similarity.txt +++ b/.tether/man/loss_cosine_similarity.txt @@ -28,7 +28,8 @@ class CosineSimilarity(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | CosineSimilarity diff --git a/.tether/man/loss_ctc.txt b/.tether/man/loss_ctc.txt index e73360d09..31d384585 100644 --- a/.tether/man/loss_ctc.txt +++ b/.tether/man/loss_ctc.txt @@ -13,7 +13,8 @@ class CTC(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | CTC diff --git a/.tether/man/loss_dice.txt b/.tether/man/loss_dice.txt index 5fd198d5d..13ae30fcd 100644 --- a/.tether/man/loss_dice.txt +++ b/.tether/man/loss_dice.txt @@ -20,7 +20,8 @@ class Dice(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Returns: | Dice loss value. diff --git a/.tether/man/loss_hinge.txt b/.tether/man/loss_hinge.txt index dc1694323..b08626cbe 100644 --- a/.tether/man/loss_hinge.txt +++ b/.tether/man/loss_hinge.txt @@ -22,7 +22,8 @@ class Hinge(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | Hinge diff --git a/.tether/man/loss_huber.txt b/.tether/man/loss_huber.txt index ff5b3bff0..40801320a 100644 --- a/.tether/man/loss_huber.txt +++ b/.tether/man/loss_huber.txt @@ -28,7 +28,8 @@ class Huber(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | Huber diff --git a/.tether/man/loss_kl_divergence.txt b/.tether/man/loss_kl_divergence.txt index 44a7dd687..f12c343c1 100644 --- a/.tether/man/loss_kl_divergence.txt +++ b/.tether/man/loss_kl_divergence.txt @@ -23,7 +23,8 @@ class KLDivergence(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | KLDivergence diff --git a/.tether/man/loss_log_cosh.txt b/.tether/man/loss_log_cosh.txt index cf0aafee7..7a41349fb 100644 --- a/.tether/man/loss_log_cosh.txt +++ b/.tether/man/loss_log_cosh.txt @@ -21,7 +21,8 @@ class LogCosh(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | LogCosh diff --git a/.tether/man/loss_mean_absolute_error.txt b/.tether/man/loss_mean_absolute_error.txt index 27e803d15..76c6a5c94 100644 --- a/.tether/man/loss_mean_absolute_error.txt +++ b/.tether/man/loss_mean_absolute_error.txt @@ -19,7 +19,8 @@ class MeanAbsoluteError(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | MeanAbsoluteError diff --git a/.tether/man/loss_mean_absolute_percentage_error.txt b/.tether/man/loss_mean_absolute_percentage_error.txt index f4cb015dd..611a1742d 100644 --- a/.tether/man/loss_mean_absolute_percentage_error.txt +++ b/.tether/man/loss_mean_absolute_percentage_error.txt @@ -19,7 +19,8 @@ class MeanAbsolutePercentageError(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | MeanAbsolutePercentageError diff --git a/.tether/man/loss_mean_squared_error.txt b/.tether/man/loss_mean_squared_error.txt index a801b002a..eb118d1bf 100644 --- a/.tether/man/loss_mean_squared_error.txt +++ b/.tether/man/loss_mean_squared_error.txt @@ -19,7 +19,8 @@ class MeanSquaredError(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | MeanSquaredError diff --git a/.tether/man/loss_mean_squared_logarithmic_error.txt b/.tether/man/loss_mean_squared_logarithmic_error.txt index 2abfd8aa7..9d9d6af2c 100644 --- a/.tether/man/loss_mean_squared_logarithmic_error.txt +++ b/.tether/man/loss_mean_squared_logarithmic_error.txt @@ -19,7 +19,8 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | MeanSquaredLogarithmicError diff --git a/.tether/man/loss_poisson.txt b/.tether/man/loss_poisson.txt index 3b3c1c05f..1473128c9 100644 --- a/.tether/man/loss_poisson.txt +++ b/.tether/man/loss_poisson.txt @@ -19,7 +19,8 @@ class Poisson(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | Poisson diff --git a/.tether/man/loss_sparse_categorical_crossentropy.txt b/.tether/man/loss_sparse_categorical_crossentropy.txt index c1c36c27d..6c8141cc9 100644 --- a/.tether/man/loss_sparse_categorical_crossentropy.txt +++ b/.tether/man/loss_sparse_categorical_crossentropy.txt @@ -27,7 +27,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Examples: | diff --git a/.tether/man/loss_squared_hinge.txt b/.tether/man/loss_squared_hinge.txt index 19da83106..6a33b95fe 100644 --- a/.tether/man/loss_squared_hinge.txt +++ b/.tether/man/loss_squared_hinge.txt @@ -22,7 +22,8 @@ class SquaredHinge(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Method resolution order: | SquaredHinge diff --git a/.tether/man/loss_tversky.txt b/.tether/man/loss_tversky.txt index 6c27f9a32..155a20164 100644 --- a/.tether/man/loss_tversky.txt +++ b/.tether/man/loss_tversky.txt @@ -23,7 +23,8 @@ class Tversky(LossFunctionWrapper) | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a | `"float32"` unless set to different value - | (via `keras.backend.set_floatx()`). + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. | | Returns: | Tversky loss value. diff --git a/.tether/man/op_argsort.txt b/.tether/man/op_argsort.txt index 328ecc9a0..44c66d6e4 100644 --- a/.tether/man/op_argsort.txt +++ b/.tether/man/op_argsort.txt @@ -5,7 +5,7 @@ Returns the indices that would sort a tensor. Args: x: Input tensor. - axis: Axis along which to sort. Defaults to`-1` (the last axis). If + axis: Axis along which to sort. Defaults to `-1` (the last axis). If `None`, the flattened tensor is used. Returns: @@ -31,3 +31,4 @@ array([[0, 1], array([[0, 1], [1, 0], [0, 1]], dtype=int32) + diff --git a/.tether/man/op_binary_crossentropy.txt b/.tether/man/op_binary_crossentropy.txt index 732f5b028..8c1e89665 100644 --- a/.tether/man/op_binary_crossentropy.txt +++ b/.tether/man/op_binary_crossentropy.txt @@ -22,7 +22,7 @@ Args: probabilities. Set it to `True` if `output` represents logits; otherwise, set it to `False` if `output` represents probabilities. - Defaults to`False`. + Defaults to `False`. Returns: Integer tensor: The computed binary cross-entropy loss between @@ -35,3 +35,4 @@ Example: >>> binary_crossentropy(target, output) array([0.10536054 0.10536054 0.22314355 0.22314355], shape=(4,), dtype=float32) + diff --git a/.tether/man/op_categorical_crossentropy.txt b/.tether/man/op_categorical_crossentropy.txt index 4cdb638fd..290b85c8c 100644 --- a/.tether/man/op_categorical_crossentropy.txt +++ b/.tether/man/op_categorical_crossentropy.txt @@ -24,7 +24,7 @@ Args: probabilities. Set it to `True` if `output` represents logits; otherwise, set it to `False` if `output` represents probabilities. - Defaults to`False`. + Defaults to `False`. axis: (optional) The axis along which the categorical cross-entropy is computed. Defaults to `-1`, which corresponds to the last dimension of @@ -46,3 +46,4 @@ Example: ... [0.2, 0.3, 0.5]]) >>> categorical_crossentropy(target, output) array([0.10536054 0.22314355 0.6931472 ], shape=(3,), dtype=float32) + diff --git a/.tether/man/op_logsumexp.txt b/.tether/man/op_logsumexp.txt index a0175c066..1bfe27bce 100644 --- a/.tether/man/op_logsumexp.txt +++ b/.tether/man/op_logsumexp.txt @@ -11,9 +11,9 @@ Args: x: Input tensor. axis: An integer or a tuple of integers specifying the axis/axes along which to compute the sum. If `None`, the sum is computed - over all elements. Defaults to`None`. + over all elements. Defaults to `None`. keepdims: A boolean indicating whether to keep the dimensions of - the input tensor when computing the sum. Defaults to`False`. + the input tensor when computing the sum. Defaults to `False`. Returns: A tensor containing the logarithm of the sum of exponentials of @@ -24,3 +24,4 @@ Example: >>> x = keras.ops.convert_to_tensor([1., 2., 3.]) >>> logsumexp(x) 3.407606 + diff --git a/.tether/man/op_one_hot.txt b/.tether/man/op_one_hot.txt index 5639a4f4d..f0803b961 100644 --- a/.tether/man/op_one_hot.txt +++ b/.tether/man/op_one_hot.txt @@ -18,8 +18,8 @@ Args: x: Integer tensor to be encoded. The shape can be arbitrary, but the dtype should be integer. num_classes: Number of classes for the one-hot encoding. - axis: Axis along which the encoding is performed. Defaults to - `-1`, which represents the last axis. + axis: Axis along which the encoding is performed. + `-1` represents the last axis. Defaults to `-1`. dtype: (Optional) Data type of the output tensor. If not provided, it defaults to the default data type of the backend. sparse: Whether to return a sparse tensor; for backends that support diff --git a/.tether/man/op_pad.txt b/.tether/man/op_pad.txt index d7545f9ce..50a663360 100644 --- a/.tether/man/op_pad.txt +++ b/.tether/man/op_pad.txt @@ -20,7 +20,7 @@ Args: mode: One of `"constant"`, `"edge"`, `"linear_ramp"`, `"maximum"`, `"mean"`, `"median"`, `"minimum"`, `"reflect"`, `"symmetric"`, `"wrap"`, `"empty"`, - `"circular"`. Defaults to`"constant"`. + `"circular"`. Defaults to `"constant"`. constant_values: value to pad with if `mode == "constant"`. Defaults to `0`. A `ValueError` is raised if not None and `mode != "constant"`. @@ -36,3 +36,4 @@ Note: Returns: Padded tensor. + diff --git a/.tether/man/op_segment_max.txt b/.tether/man/op_segment_max.txt index b2d98b393..00da31996 100644 --- a/.tether/man/op_segment_max.txt +++ b/.tether/man/op_segment_max.txt @@ -10,13 +10,13 @@ Computes the max of segments in a tensor. Args: data: Input tensor. - segment_ids: A 1-D tensor containing segment indices for each - element in `data`. + segment_ids: A N-D tensor containing segment indices for each + element in `data`. data.shape[:len(segment_ids.shape)] should match. num_segments: An integer representing the total number of segments. If not specified, it is inferred from the maximum value in `segment_ids`. sorted: A boolean indicating whether `segment_ids` is sorted. - Defaults to`False`. + Defaults to `False`. Returns: A tensor containing the max of segments, where each element @@ -29,3 +29,4 @@ Example: >>> num_segments = 3 >>> keras.ops.segment_max(data, segment_ids, num_segments) array([2, 20, 200], dtype=int32) + diff --git a/.tether/man/op_segment_sum.txt b/.tether/man/op_segment_sum.txt index 75cd0563c..55be5cd3c 100644 --- a/.tether/man/op_segment_sum.txt +++ b/.tether/man/op_segment_sum.txt @@ -10,13 +10,14 @@ Computes the sum of segments in a tensor. Args: data: Input tensor. - segment_ids: A 1-D tensor containing segment indices for each - element in `data`. + segment_ids: A N-D tensor containing segment indices for each + element in `data`. Num dims for segment ids should be strictly + smaller or equal to number of dims in data. num_segments: An integer representing the total number of segments. If not specified, it is inferred from the maximum value in `segment_ids`. sorted: A boolean indicating whether `segment_ids` is sorted. - Defaults to`False`. + Defaults to `False`. Returns: A tensor containing the sum of segments, where each element @@ -29,3 +30,4 @@ Example: >>> num_segments = 3 >>> keras.ops.segment_sum(data, segment_ids,num_segments) array([3, 30, 300], dtype=int32) + diff --git a/.tether/man/op_sparse_categorical_crossentropy.txt b/.tether/man/op_sparse_categorical_crossentropy.txt index d4843bbc4..063ac17ff 100644 --- a/.tether/man/op_sparse_categorical_crossentropy.txt +++ b/.tether/man/op_sparse_categorical_crossentropy.txt @@ -25,7 +25,7 @@ Args: or probabilities. Set it to `True` if `output` represents logits; otherwise, set it to `False` if `output` represents probabilities. - Defaults to`False`. + Defaults to `False`. axis: (optional) The axis along which the sparse categorical cross-entropy is computed. Defaults to `-1`, which corresponds to the last dimension @@ -44,3 +44,4 @@ Example: ... [0.2, 0.3, 0.5]]) >>> sparse_categorical_crossentropy(target, output) array([0.10536056 0.22314355 0.6931472 ], shape=(3,), dtype=float32) + diff --git a/.tether/man/op_top_k.txt b/.tether/man/op_top_k.txt index ff50fe391..5a624c43a 100644 --- a/.tether/man/op_top_k.txt +++ b/.tether/man/op_top_k.txt @@ -11,7 +11,7 @@ Args: x: Input tensor. k: An integer representing the number of top elements to retrieve. sorted: A boolean indicating whether to sort the output in - descending order. Defaults to`True`. + descending order. Defaults to `True`. Returns: A tuple containing two tensors. The first tensor contains the @@ -26,3 +26,4 @@ Example: array([9 7 5], shape=(3,), dtype=int32) >>> print(indices) array([4 2 0], shape=(3,), dtype=int32) + diff --git a/.tether/man/optimizer_adadelta.txt b/.tether/man/optimizer_adadelta.txt index 5b2cc1ab4..0f251b3d0 100644 --- a/.tether/man/optimizer_adadelta.txt +++ b/.tether/man/optimizer_adadelta.txt @@ -73,7 +73,10 @@ class Adadelta(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | Reference: diff --git a/.tether/man/optimizer_adafactor.txt b/.tether/man/optimizer_adafactor.txt index 2208a6d2b..165191134 100644 --- a/.tether/man/optimizer_adafactor.txt +++ b/.tether/man/optimizer_adafactor.txt @@ -75,7 +75,10 @@ class Adafactor(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | Reference: diff --git a/.tether/man/optimizer_adagrad.txt b/.tether/man/optimizer_adagrad.txt index e28101c9b..68de3b76e 100644 --- a/.tether/man/optimizer_adagrad.txt +++ b/.tether/man/optimizer_adagrad.txt @@ -66,7 +66,10 @@ class Adagrad(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | Reference: diff --git a/.tether/man/optimizer_adam.txt b/.tether/man/optimizer_adam.txt index 134726550..60d2631d2 100644 --- a/.tether/man/optimizer_adam.txt +++ b/.tether/man/optimizer_adam.txt @@ -80,7 +80,10 @@ class Adam(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | Method resolution order: | Adam diff --git a/.tether/man/optimizer_adam_w.txt b/.tether/man/optimizer_adam_w.txt index 6a01a918a..2566a7536 100644 --- a/.tether/man/optimizer_adam_w.txt +++ b/.tether/man/optimizer_adam_w.txt @@ -83,7 +83,10 @@ class AdamW(keras.src.optimizers.adam.Adam) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | References: diff --git a/.tether/man/optimizer_adamax.txt b/.tether/man/optimizer_adamax.txt index e352758d2..29dd8cbce 100644 --- a/.tether/man/optimizer_adamax.txt +++ b/.tether/man/optimizer_adamax.txt @@ -86,7 +86,10 @@ class Adamax(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | Reference: diff --git a/.tether/man/optimizer_ftrl.txt b/.tether/man/optimizer_ftrl.txt index 95884fc2e..b38deb1f8 100644 --- a/.tether/man/optimizer_ftrl.txt +++ b/.tether/man/optimizer_ftrl.txt @@ -114,7 +114,10 @@ class Ftrl(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | Method resolution order: | Ftrl diff --git a/.tether/man/optimizer_lion.txt b/.tether/man/optimizer_lion.txt index 4d51bb839..987adadbd 100644 --- a/.tether/man/optimizer_lion.txt +++ b/.tether/man/optimizer_lion.txt @@ -74,7 +74,10 @@ class Lion(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | References: diff --git a/.tether/man/optimizer_loss_scale.txt b/.tether/man/optimizer_loss_scale.txt index 15f91f69e..82d098039 100644 --- a/.tether/man/optimizer_loss_scale.txt +++ b/.tether/man/optimizer_loss_scale.txt @@ -74,7 +74,10 @@ class LossScaleOptimizer(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | Method resolution order: | LossScaleOptimizer diff --git a/.tether/man/optimizer_nadam.txt b/.tether/man/optimizer_nadam.txt index 2f2b0aa56..7adf055fe 100644 --- a/.tether/man/optimizer_nadam.txt +++ b/.tether/man/optimizer_nadam.txt @@ -70,7 +70,10 @@ class Nadam(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | Reference: diff --git a/.tether/man/optimizer_rmsprop.txt b/.tether/man/optimizer_rmsprop.txt index 8b988c9d5..c99153e5c 100644 --- a/.tether/man/optimizer_rmsprop.txt +++ b/.tether/man/optimizer_rmsprop.txt @@ -76,7 +76,10 @@ class RMSprop(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | | Example: diff --git a/.tether/man/optimizer_sgd.txt b/.tether/man/optimizer_sgd.txt index 67187475d..cf78db8b2 100644 --- a/.tether/man/optimizer_sgd.txt +++ b/.tether/man/optimizer_sgd.txt @@ -80,7 +80,10 @@ class SGD(keras.src.optimizers.optimizer.Optimizer) | value of the gradients since the last update. This is known as | "gradient accumulation". This can be useful | when your batch size is very small, in order to reduce gradient - | noise at each update step. + | noise at each update step. EMA frequency will look at "accumulated" + | iterations value (optimizer steps // gradient_accumulation_steps). + | Learning rate schedules will look at "real" iterations value + | (optimizer steps). | | Method resolution order: | SGD diff --git a/.tether/man/pad_sequences.txt b/.tether/man/pad_sequences.txt index bc45a58b8..78e0f304f 100644 --- a/.tether/man/pad_sequences.txt +++ b/.tether/man/pad_sequences.txt @@ -60,7 +60,8 @@ Args: truncating: String, "pre" or "post" (optional, defaults to `"pre"`): remove values from sequences larger than `maxlen`, either at the beginning or at the end of the sequences. - value: Float or String, padding value. (Optional, defaults to 0.) + value: Float or String, padding value. (Optional, defaults to `0.`) Returns: NumPy array with shape `(len(sequences), maxlen)` + diff --git a/.tether/man/quantize_weights.txt b/.tether/man/quantize_weights.txt index e1825477c..4e2a10ad0 100644 --- a/.tether/man/quantize_weights.txt +++ b/.tether/man/quantize_weights.txt @@ -1,5 +1,9 @@ __signature__ -keras.Model.quantize(self, mode) +keras.Model.quantize( + self, + mode, + **kwargs +) __doc__ Quantize the weights of the model. diff --git a/.tether/man/save_model.txt b/.tether/man/save_model.txt index 8f6d50e79..2552e2362 100644 --- a/.tether/man/save_model.txt +++ b/.tether/man/save_model.txt @@ -3,7 +3,7 @@ keras.saving.save_model( model, filepath, overwrite=True, - zipped=True, + zipped=None, **kwargs ) __doc__ @@ -15,7 +15,8 @@ Args: overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user via an interactive prompt. zipped: Whether to save the model as a zipped `.keras` - archive (default), or as an unzipped directory. + archive (default when saving locally), or as an unzipped directory + (default when saving on the Hugging Face Hub). Example: diff --git a/.tether/man/text_dataset_from_directory.txt b/.tether/man/text_dataset_from_directory.txt index 1e3e05f72..ca09d5c88 100644 --- a/.tether/man/text_dataset_from_directory.txt +++ b/.tether/man/text_dataset_from_directory.txt @@ -61,13 +61,15 @@ Args: This is the explicit list of class names (must match names of subdirectories). Used to control the order of the classes (otherwise alphanumerical order is used). - batch_size: Size of the batches of data. Defaults to 32. + batch_size: Size of the batches of data. If `None`, the data will not be batched (the dataset will yield individual samples). + Defaults to `32`. max_length: Maximum size of a text string. Texts longer than this will be truncated to `max_length`. - shuffle: Whether to shuffle the data. Defaults to `True`. + shuffle: Whether to shuffle the data. If set to `False`, sorts the data in alphanumeric order. + Defaults to `True`. seed: Optional random seed for shuffling and transformations. validation_split: Optional float between 0 and 1, fraction of data to reserve for validation. diff --git a/NAMESPACE b/NAMESPACE index b00236bfb..eda9d2f98 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -438,6 +438,7 @@ export(op_argmin) export(op_argpartition) export(op_argsort) export(op_array) +export(op_associative_scan) export(op_average) export(op_average_pool) export(op_batch_normalization) @@ -593,6 +594,7 @@ export(op_rsqrt) export(op_scan) export(op_scatter) export(op_scatter_update) +export(op_searchsorted) export(op_segment_max) export(op_segment_sum) export(op_select) @@ -657,6 +659,7 @@ export(optimizer_adam) export(optimizer_adam_w) export(optimizer_adamax) export(optimizer_ftrl) +export(optimizer_lamb) export(optimizer_lion) export(optimizer_loss_scale) export(optimizer_nadam) diff --git a/NEWS.md b/NEWS.md index 8ed13c488..e1709a7ab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,24 @@ # keras3 (development version) +- Added compatibility with Keras v3.5.0. User facing changes: + + - New functions: + - `op_associative_scan()` + - `op_searchsorted()` + - `optimizer_lamb()` + - `keras$DTypePolicy` instances can now be supplied to `dtype` argument for + losses, metrics, and layers. + - Add integration with the Hugging Face Hub. You can now save models to + Hugging Face Hub directly `save_model()` and load .keras models directly + from Hugging Face Hub with `load_model()`. + - Added compatibility with NumPy 2.0. + - Improved `keras$distribution` API support for very large models. + - Bug fixes and performance improvements. + - Add `data_format` argument to `layer_zero_padding_1d()` layer. + - Miscellaneous documentation improvements. + - Bug fixes and performance improvements. + + # keras3 1.1.0 - Fixed issue where GPUs would not be found when running on Windows under WSL Linux. diff --git a/R/Layer.R b/R/Layer.R index a69a40903..9931728ef 100644 --- a/R/Layer.R +++ b/R/Layer.R @@ -217,7 +217,7 @@ #' ``` #' #' * ```r -#' add_metric() +#' add_metric(...) #' ``` #' #' * ```r @@ -371,9 +371,9 @@ #' Return the values of `layer$weights` as a list of R or NumPy arrays. #' #' * ```r -#' quantize(mode) +#' quantize(mode, type_check = TRUE) #' ``` -#' Currently, only the `Dense` and `EinsumDense` layers support in-place +#' Currently, only the `Dense`, `EinsumDense` and `Embedding` layers support in-place #' quantization via this `quantize()` method. #' #' Example: @@ -383,6 +383,10 @@ #' ``` #' #' * ```r +#' quantized_build(input_shape, mode) +#' ``` +#' +#' * ```r #' quantized_call(...) #' ``` #' diff --git a/R/Loss.R b/R/Loss.R index 5d52e311d..1e62afc08 100644 --- a/R/Loss.R +++ b/R/Loss.R @@ -61,9 +61,15 @@ #' initialize(name=NULL, reduction="sum_over_batch_size", dtype=NULL) #' ``` #' Args: -#' * `name` -#' * `reduction`: Valid values are one of `{"sum_over_batch_size", "sum", NULL, "none"}` -#' * `dtype` +#' * `name`: Optional name for the loss instance. +#' * `reduction`: Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' * `dtype`: The dtype of the loss's computations. Defaults to `NULL`, which +#' means using [`config_floatx()`]. `config_floatx()` is a +#' `"float32"` unless set to different value +#' (via [`config_set_floatx()`]). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' * ``` #' __call__(y_true, y_pred, sample_weight=NULL) @@ -74,6 +80,12 @@ #' get_config() #' ``` #' +#' # Readonly properties: +#' +#' * ```r +#' dtype +#' ``` +#' #' @returns A function that returns `Loss` instances, similar to the #' builtin loss functions. #' @inheritSection Layer Symbols in scope diff --git a/R/Metric.R b/R/Metric.R index f25f356fe..35c5a5dd5 100644 --- a/R/Metric.R +++ b/R/Metric.R @@ -116,8 +116,12 @@ #' Initialize self. #' #' Args: -#' * `name`: (Optional) string name of the metric instance. -#' * `dtype`: (Optional) data type of the metric result. +#' * `name`: Optional name for the metric instance. +#' * `dtype`: The dtype of the metric's computations. Defaults to `NULL`, which +#' means using [`config_floatx()`]. `config_floatx()` is a +#' `"float32"` unless set to different value +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' * ```r #' add_variable(shape, initializer, dtype=NULL, aggregation = 'sum', name=NULL) diff --git a/R/callbacks.R b/R/callbacks.R index e593f296f..cdb001ec3 100644 --- a/R/callbacks.R +++ b/R/callbacks.R @@ -83,11 +83,11 @@ #' preemption checkpointing (i.e. with `save_before_preemption = TRUE`). #' #' @param delete_checkpoint -#' Boolean, defaults to `TRUE`. This `backup_and_restore` +#' Boolean. This `backup_and_restore` #' callback works by saving a checkpoint to back up the training state. #' If `delete_checkpoint = TRUE`, the checkpoint will be deleted after #' training is finished. Use `FALSE` if you'd like to keep the checkpoint -#' for future usage. +#' for future usage. Defaults to `TRUE`. #' #' @returns A `Callback` instance that can be passed to [`fit.keras.src.models.model.Model()`]. #' @export diff --git a/R/dataset-utils.R b/R/dataset-utils.R index 8af426347..65996860c 100644 --- a/R/dataset-utils.R +++ b/R/dataset-utils.R @@ -380,8 +380,8 @@ function (dataset, left_size = NULL, right_size = NULL, shuffle = FALSE, #' #' @param color_mode #' One of `"grayscale"`, `"rgb"`, `"rgba"`. -#' Defaults to `"rgb"`. Whether the images will be converted to -#' have 1, 3, or 4 channels. +#' Whether the images will be converted to +#' have 1, 3, or 4 channels. Defaults to `"rgb"`. #' #' @param batch_size #' Size of the batches of data. Defaults to 32. @@ -390,9 +390,9 @@ function (dataset, left_size = NULL, right_size = NULL, shuffle = FALSE, #' #' @param image_size #' Size to resize images to after they are read from disk, -#' specified as `(height, width)`. Defaults to `(256, 256)`. +#' specified as `(height, width)`. #' Since the pipeline processes batches of images that must all have -#' the same size, this must be provided. +#' the same size, this must be provided. Defaults to `(256, 256)`. #' #' @param shuffle #' Whether to shuffle the data. Defaults to `TRUE`. @@ -414,9 +414,10 @@ function (dataset, left_size = NULL, right_size = NULL, shuffle = FALSE, #' #' @param interpolation #' String, the interpolation method used when -#' resizing images. Defaults to `"bilinear"`. +#' resizing images. #' Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, #' `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. +#' Defaults to `"bilinear"`. #' #' @param follow_links #' Whether to visit subdirectories pointed to by symlinks. @@ -548,17 +549,19 @@ function (directory, labels = "inferred", label_mode = "int", #' of the classes (otherwise alphanumerical order is used). #' #' @param batch_size -#' Size of the batches of data. Defaults to 32. +#' Size of the batches of data. #' If `NULL`, the data will not be batched #' (the dataset will yield individual samples). +#' Defaults to `32`. #' #' @param max_length #' Maximum size of a text string. Texts longer than this will #' be truncated to `max_length`. #' #' @param shuffle -#' Whether to shuffle the data. Defaults to `TRUE`. +#' Whether to shuffle the data. #' If set to `FALSE`, sorts the data in alphanumeric order. +#' Defaults to `TRUE`. #' #' @param seed #' Optional random seed for shuffling and transformations. diff --git a/R/layer-methods.R b/R/layer-methods.R index 84d728a8c..78681ac88 100644 --- a/R/layer-methods.R +++ b/R/layer-methods.R @@ -165,6 +165,7 @@ reset_state <- function(object) { #' @param mode #' The mode of the quantization. Only 'int8' is supported at this #' time. +#' @param ... Passed on to the `object` quantization method. #' #' @export #' @returns `model`, invisibly. Note this is just a convenience for usage with `|>`, the @@ -173,7 +174,7 @@ reset_state <- function(object) { #' @family layer methods #' @tether keras.Model.quantize quantize_weights <- -function (object, mode) +function (object, mode, ...) { - object$quantize(mode) + object$quantize(mode, ...) } diff --git a/R/layers-convolutional.R b/R/layers-convolutional.R index 3ecbdc63a..60684aba4 100644 --- a/R/layers-convolutional.R +++ b/R/layers-convolutional.R @@ -284,10 +284,10 @@ function (object, filters, kernel_size, strides = 1L, padding = "valid", #' #' @description #' This layer creates a convolution kernel that is convolved with the layer -#' input over a single spatial (or temporal) dimension to produce a tensor of -#' outputs. If `use_bias` is `TRUE`, a bias vector is created and added to the -#' outputs. Finally, if `activation` is not `NULL`, it is applied to the -#' outputs as well. +#' input over a 2D spatial (or temporal) dimension (height and width) to +#' produce a tensor of outputs. If `use_bias` is `TRUE`, a bias vector is created +#' and added to the outputs. Finally, if `activation` is not `NULL`, it is +#' applied to the outputs as well. #' #' # Input Shape #' - If `data_format="channels_last"`: @@ -563,10 +563,10 @@ function (object, filters, kernel_size, strides = list(1L, 1L), #' #' @description #' This layer creates a convolution kernel that is convolved with the layer -#' input over a single spatial (or temporal) dimension to produce a tensor of -#' outputs. If `use_bias` is `TRUE`, a bias vector is created and added to the -#' outputs. Finally, if `activation` is not `NULL`, it is applied to the -#' outputs as well. +#' input over a 3D spatial (or temporal) dimension (width,height and depth) to +#' produce a tensor of outputs. If `use_bias` is `TRUE`, a bias vector is created +#' and added to the outputs. Finally, if `activation` is not `NULL`, it is +#' applied to the outputs as well. #' #' # Input Shape #' - If `data_format="channels_last"`: diff --git a/R/layers-core.R b/R/layers-core.R index 687c8d53a..ec82b59d6 100644 --- a/R/layers-core.R +++ b/R/layers-core.R @@ -40,6 +40,10 @@ #' ) #' ``` #' +#' - ```r +#' quantize(mode, type_check = TRUE) +#' ``` +#' #' # Readonly properties: #' #' - `kernel` @@ -193,7 +197,7 @@ function (object, units, activation = NULL, use_bias = TRUE, #' ``` #' #' - ```r -#' quantize(mode) +#' quantize(mode, type_check = TRUE) #' ``` #' #' # Readonly properties: @@ -326,7 +330,7 @@ function (object, equation, output_shape, activation = NULL, #' ``` #' #' - ```r -#' quantize(mode) +#' quantize(mode, type_check = TRUE) #' ``` #' #' - ```r @@ -334,7 +338,7 @@ function (object, equation, output_shape, activation = NULL, #' ``` #' #' - ```r -#' quantized_call(inputs) +#' quantized_call(...) #' ``` #' #' # Readonly properties: diff --git a/R/layers-preprocessing.R b/R/layers-preprocessing.R index 1332c1d72..854532193 100644 --- a/R/layers-preprocessing.R +++ b/R/layers-preprocessing.R @@ -1756,8 +1756,8 @@ function (object, height, width, interpolation = "bilinear", #' @description #' This layer translates a set of arbitrary strings into integer output via a #' table-based vocabulary lookup. This layer will perform no splitting or -#' transformation of input strings. For a layer than can split and tokenize -#' natural language, see the `layer_text_vectorization` layer. +#' transformation of input strings. For a layer that can split and tokenize +#' natural language, see [`layer_text_vectorization`]. #' #' The vocabulary for the layer must be either supplied on construction or #' learned via `adapt()`. During `adapt()`, the layer will analyze a data set, diff --git a/R/layers-reshaping.R b/R/layers-reshaping.R index 590dbfb56..eef8da00a 100644 --- a/R/layers-reshaping.R +++ b/R/layers-reshaping.R @@ -617,18 +617,37 @@ function (object, size = list(2L, 2L, 2L), data_format = NULL, #' ``` #' #' # Input Shape -#' 3D tensor with shape `(batch_size, axis_to_pad, features)` +#' 3D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, axis_to_pad, features)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, features, axis_to_pad)` #' #' # Output Shape -#' 3D tensor with shape `(batch_size, padded_axis, features)` +#' 3D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, padded_axis, features)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, features, padded_axis)` #' #' @param padding -#' Int, or list of int (length 2), or named listionary. +#' Int, or list of int (length 2). #' - If int: how many zeros to add at the beginning and end of -#' the padding dimension (axis 1). +#' the padding dimension (axis 2). #' - If list of 2 ints: how many zeros to add at the beginning and the #' end of the padding dimension (`(left_pad, right_pad)`). #' +#' @param data_format +#' A string, one of `"channels_last"` (default) or +#' `"channels_first"`. The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch_size, axis_to_pad, channels)` while `"channels_first"` +#' corresponds to inputs with shape +#' `(batch_size, channels, axis_to_pad)`. +#' When unspecified, uses `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json` (if exists). Defaults to +#' `"channels_last"`. +#' #' @param object #' Object to compose the layer with. A tensor, array, or sequential model. #' @@ -645,7 +664,7 @@ function (object, size = list(2L, 2L, 2L), data_format = NULL, #' #' @tether keras.layers.ZeroPadding1D layer_zero_padding_1d <- -function (object, padding = 1L, ...) +function (object, padding = 1L, data_format = NULL, ...) { args <- capture_args(list(padding = as_integer, input_shape = normalize_shape, batch_size = as_integer, batch_input_shape = normalize_shape), diff --git a/R/losses.R b/R/losses.R index f4f0481ce..a536bf00c 100644 --- a/R/losses.R +++ b/R/losses.R @@ -102,7 +102,8 @@ #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values. shape = `[batch_size, d0, .. dN]`. @@ -317,7 +318,8 @@ function (y_true, y_pred, from_logits = FALSE, label_smoothing = 0, #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values, of shape `(batch_size, d0, .. dN)`. @@ -430,7 +432,8 @@ function (y_true, y_pred, apply_class_balancing = FALSE, #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Tensor of one-hot true targets. @@ -590,7 +593,8 @@ function (y_true, y_pred, from_logits = FALSE, label_smoothing = 0, #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Tensor of one-hot true targets. @@ -656,7 +660,8 @@ function (y_true, y_pred, alpha = 0.25, gamma = 2, #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' The ground truth values. `y_true` values are expected to be @@ -732,7 +737,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Tensor of true targets. @@ -820,7 +826,8 @@ function (y_true, y_pred, axis = -1L, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param ... #' For forward/backward compatability. @@ -875,7 +882,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", name = "dice", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' The ground truth values. `y_true` values are expected to be -1 @@ -950,7 +958,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' tensor of true targets. @@ -1017,7 +1026,8 @@ function (y_true, y_pred, delta = 1, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Tensor of true targets. @@ -1085,7 +1095,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values with shape = `[batch_size, d0, .. dN]`. @@ -1147,7 +1158,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values with shape = `[batch_size, d0, .. dN]`. @@ -1214,7 +1226,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values with shape = `[batch_size, d0, .. dN]`. @@ -1276,7 +1289,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values with shape = `[batch_size, d0, .. dN]`. @@ -1342,7 +1356,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values with shape = `[batch_size, d0, .. dN]`. @@ -1405,7 +1420,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values. shape = `[batch_size, d0, .. dN]`. @@ -1512,7 +1528,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' Ground truth values. @@ -1591,7 +1608,8 @@ function (y_true, y_pred, from_logits = FALSE, ignore_class = NULL, #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param y_true #' The ground truth values. `y_true` values are expected to be -1 @@ -1648,7 +1666,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param ... #' For forward/backward compatability. @@ -1657,7 +1676,6 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' CTC loss value. #' #' @export -#' @inheritParams loss_hinge #' @family losses #' @tether keras.losses.CTC # @seealso @@ -1715,7 +1733,8 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' The dtype of the loss's computations. Defaults to `NULL`, which #' means using `config_floatx()`. `config_floatx()` is a #' `"float32"` unless set to different value -#' (via `config_set_floatx()`). +#' (via `config_set_floatx()`). If a `keras$DTypePolicy` is +#' provided, then the `compute_dtype` will be utilized. #' #' @param ... #' For forward/backward compatability. diff --git a/R/metrics.R b/R/metrics.R index afc5f7bd1..06bde67c1 100644 --- a/R/metrics.R +++ b/R/metrics.R @@ -2319,13 +2319,20 @@ function (..., num_classes, name = NULL, dtype = NULL, ignore_class = NULL, #' Standalone usage: #' #' ```{r} -#' y_true <- rbind(c(0, 0, 1), c(1, 0, 0), c(0, 1, 0), c(1, 0, 0)) -#' y_pred <- rbind(c(0.2, 0.3, 0.5), c(0.1, 0.2, 0.7), c(0.5, 0.3, 0.1), +#' y_true <- rbind(c(0, 0, 1), +#' c(1, 0, 0), +#' c(0, 1, 0), +#' c(1, 0, 0)) +#' y_pred <- rbind(c(0.2, 0.3, 0.5), +#' c(0.1, 0.2, 0.7), +#' c(0.5, 0.3, 0.1), #' c(0.1, 0.4, 0.5)) #' sample_weight <- c(0.1, 0.2, 0.3, 0.4) +#' #' m <- metric_one_hot_iou(num_classes = 3, target_class_ids = c(0, 2)) -#' m$update_state( -#' y_true = y_true, y_pred = y_pred, sample_weight = sample_weight) +#' m$update_state(y_true = y_true, +#' y_pred = y_pred, +#' sample_weight = sample_weight) #' m$result() #' ``` #' @@ -2599,6 +2606,7 @@ function (y_true, y_pred, from_logits = FALSE, label_smoothing = 0, #' # = -((log 0.95), (log 0.1)) #' # = [0.051, 2.302] #' # Reduced xent = (0.051 + 2.302) / 2 +#' #' m <- metric_categorical_crossentropy() #' m$update_state(rbind(c(0, 1, 0), c(0, 0, 1)), #' rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1))) diff --git a/R/model-creation.R b/R/model-creation.R index c527d6650..b647e9b3e 100644 --- a/R/model-creation.R +++ b/R/model-creation.R @@ -86,6 +86,10 @@ keras_model <- function(inputs = NULL, outputs = NULL, ...) { #' value of 0. This feature is only supported with the TensorFlow #' backend. Defaults to `FALSE`. #' +#' @param batch_shape +#' Optional shape list (list of integers or `NULL` objects), +#' including the batch size. +#' #' @param name #' Optional name string for the layer. #' Should be unique in a model (do not reuse the same name twice). @@ -100,9 +104,6 @@ keras_model <- function(inputs = NULL, outputs = NULL, ...) { #' Boolean, whether the input is optional or not. #' An optional input can accept `NULL` values. #' -#' @param batch_shape -#' Shape, including the batch dim. -#' #' @export #' @family model creation # @seealso diff --git a/R/model-persistence.R b/R/model-persistence.R index 2f03e7a0f..43616ad44 100644 --- a/R/model-persistence.R +++ b/R/model-persistence.R @@ -49,7 +49,8 @@ #' #' @param zipped #' Whether to save the model as a zipped `.keras` -#' archive (default), or as an unzipped directory. +#' archive (default when saving locally), or as an unzipped directory +#' (default when saving on the Hugging Face Hub). #' #' @param ... #' For forward/backward compatability. @@ -67,7 +68,7 @@ # @seealso # + save_model <- -function (model, filepath = NULL, overwrite = FALSE, zipped = TRUE, ...) +function (model, filepath = NULL, overwrite = FALSE, zipped = NULL, ...) { if(is.null(filepath) -> return_serialized) { filepath <- tempfile(pattern = "keras_model-", fileext = ".keras") diff --git a/R/ops.R b/R/ops.R index 2ed8481c2..1246c0a19 100644 --- a/R/ops.R +++ b/R/ops.R @@ -338,6 +338,34 @@ function (inputs, indices, updates) do.call(keras$ops$scatter_update, args) } +#' Perform a binary search +#' +#' @description +#' Perform a binary search, returning indices for insertion of `values` +#' into `sorted_sequence` that maintain the sorting order. +#' +#' @returns +#' Tensor of insertion indices of same shape as `values`. +#' +#' @param sorted_sequence +#' 1-D input tensor, sorted along the innermost +#' dimension. +#' +#' @param values +#' N-D tensor of query insertion values. +#' +#' @param side +#' `'left'` or `'right'`, specifying the direction in which to insert +#' for the equality case (tie-breaker). +#' +#' @export +#' @family core ops +#' @family ops +#' @tether keras.ops.searchsorted +op_searchsorted <- +function (sorted_sequence, values, side = "left") +keras$ops$searchsorted(sorted_sequence, values, side) + #' Gets the shape of the tensor input. #' @@ -1054,11 +1082,11 @@ function (x, sequence_length, sequence_stride, fft_length, length = NULL, #' @param axis #' An integer or a list of integers specifying the axis/axes #' along which to compute the sum. If `NULL`, the sum is computed -#' over all elements. Defaults to`NULL`. +#' over all elements. Defaults to `NULL`. #' #' @param keepdims #' A boolean indicating whether to keep the dimensions of -#' the input tensor when computing the sum. Defaults to`FALSE`. +#' the input tensor when computing the sum. Defaults to `FALSE`. #' #' @export #' @family math ops @@ -1224,8 +1252,10 @@ keras$ops$rsqrt(x) #' Input tensor. #' #' @param segment_ids -#' A 1-D tensor containing segment indices for each +#' A N-D tensor containing segment indices for each #' element in `data`. +#' `head(op_shape(data), length(op_shape(segment_ids)))` should match +#' `op_shape(segment_ids)` #' #' @param num_segments #' An integer representing the total number of @@ -1234,7 +1264,7 @@ keras$ops$rsqrt(x) #' #' @param sorted #' A boolean indicating whether `segment_ids` is sorted. -#' Defaults to`FALSE`. +#' Defaults to `FALSE`. #' #' @export #' @family math ops @@ -1271,8 +1301,9 @@ function (data, segment_ids, num_segments = NULL, sorted = FALSE) #' Input tensor. #' #' @param segment_ids -#' A 1-D tensor containing segment indices for each -#' element in `data`. +#' A N-D tensor containing segment indices for each +#' element in `data`. Num dims for segment ids should be strictly +#' smaller or equal to number of dims in data. #' #' @param num_segments #' An integer representing the total number of @@ -1281,7 +1312,7 @@ function (data, segment_ids, num_segments = NULL, sorted = FALSE) #' #' @param sorted #' A boolean indicating whether `segment_ids` is sorted. -#' Defaults to`FALSE`. +#' Defaults to `FALSE`. #' #' @export #' @family math ops @@ -1468,7 +1499,7 @@ function (x, sequence_length, sequence_stride, fft_length, window = "hann", #' #' @param sorted #' A boolean indicating whether to sort the output in -#' descending order. Defaults to`TRUE`. +#' descending order. Defaults to `TRUE`. #' #' @export #' @family math ops @@ -2272,8 +2303,8 @@ function (inputs, num_classes, axis = -1L, dtype = NULL, sparse = FALSE, ...) #' Number of classes for the one-hot encoding. #' #' @param axis -#' Axis along which the encoding is performed. Defaults to -#' `-1`, which represents the last axis. +#' Axis along which the encoding is performed. +#' `-1` represents the last axis. Defaults to `-1`. #' #' @param dtype #' (Optional) Data type of the output tensor. If not @@ -2684,7 +2715,7 @@ keras$ops$softsign(x) #' or probabilities. #' Set it to `TRUE` if `output` represents logits; otherwise, #' set it to `FALSE` if `output` represents probabilities. -#' Defaults to`FALSE`. +#' Defaults to `FALSE`. #' #' @param axis #' (optional) The axis along which the sparse categorical @@ -3513,6 +3544,81 @@ function (x, dtype = NULL) keras$ops$array(x, dtype) } +#' Performs a scan with an associative binary operation, in parallel. +#' +#' @description +#' This operation his similar to [`op_scan()`], with the key difference that +#' `op_associative_scan()` is a parallel implementation with +#' potentially significant performance benefits, especially when jit compiled. +#' The catch is that it can only be used when `f` is a binary associative +#' operation (i.e. it must verify `f(a, f(b, c)) == f(f(a, b), c)`). +#' +#' For an introduction to associative scans, refer to this paper: +#' Blelloch, Guy E. 1990. +#' [Prefix Sums and Their Applications]( +#' https://www.cs.cmu.edu/~guyb/papers/Ble93.pdf). +#' +#' # Examples +#' ```{r} +#' sum_fn <- function(x, y) x + y +#' xs <- op_arange(5L) +#' op_associative_scan(sum_fn, xs) +#' ``` +#' +#' ```{r} +#' sum_fn <- function(x, y) { +#' str(list(x = x, y = y)) +#' map2(x, y, \(.x, .y) .x + .y) +#' } +#' +#' xs <- list(op_array(1:2), +#' op_array(1:2), +#' op_array(1:2)) +#' ys <- op_associative_scan(sum_fn, xs, axis = 1) +#' ys +#' ``` +#' +#' @returns +#' A (possibly nested tree structure of) array(s) of the same shape +#' and structure as `elems`, in which the `k`'th element of `axis` is +#' the result of recursively applying `f` to combine the first `k` +#' elements of `elems` along `axis`. For example, given +#' `elems = list(a, b, c, ...)`, the result would be +#' `list(a, f(a, b), f(f(a, b), c), ...)`. +#' +#' @param f +#' A callable implementing an associative binary operation with +#' signature `r = f(a, b)`. Function `f` must be associative, i.e., +#' it must satisfy the equation +#' `f(a, f(b, c)) == f(f(a, b), c)`. +#' The inputs and result are (possibly nested tree structures +#' of) array(s) matching `elems`. Each array has a dimension in place +#' of the `axis` dimension. `f` should be applied elementwise over +#' the `axis` dimension. +#' The result `r` has the same shape (and structure) as the +#' two inputs `a` and `b`. +#' +#' @param elems +#' A (possibly nested tree structure of) array(s), each with +#' an `axis` dimension of size `num_elems`. +#' +#' @param reverse +#' A boolean stating if the scan should be reversed with respect +#' to the `axis` dimension. +#' +#' @param axis +#' an integer identifying the axis over which the scan should occur. +#' +#' @export +#' @family core ops +#' @family ops +#' @tether keras.ops.associative_scan +op_associative_scan <- +function(f, elems, reverse = FALSE, axis = 1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$associative_scan, args) +} #' Compute the weighted average along the specified axis. #' @@ -6025,7 +6131,7 @@ keras$ops$outer(x1, x2) #' One of `"constant"`, `"edge"`, `"linear_ramp"`, #' `"maximum"`, `"mean"`, `"median"`, `"minimum"`, #' `"reflect"`, `"symmetric"`, `"wrap"`, `"empty"`, -#' `"circular"`. Defaults to`"constant"`. +#' `"circular"`. Defaults to `"constant"`. #' #' @param constant_values #' Value to pad with if `mode == "constant"`. diff --git a/R/optimizers.R b/R/optimizers.R index c6d6f86ed..23d3a6237 100644 --- a/R/optimizers.R +++ b/R/optimizers.R @@ -93,13 +93,16 @@ #' automatically set a loss scale factor. #' #' @param gradient_accumulation_steps -#' Int or `NULL`. If an int, model & optimizer +#' Int or `NULL`. If an int, model and optimizer #' variables will not be updated at every step; instead they will be #' updated every `gradient_accumulation_steps` steps, using the average #' value of the gradients since the last update. This is known as #' "gradient accumulation". This can be useful #' when your batch size is very small, in order to reduce gradient -#' noise at each update step. +#' noise at each update step. EMA frequency will look at "accumulated" +#' iterations value (optimizer steps // gradient_accumulation_steps). +#' Learning rate schedules will look at "real" iterations value +#' (optimizer steps). #' #' @param ... #' For forward/backward compatability. @@ -913,6 +916,129 @@ function (learning_rate = 0.001, learning_rate_power = -0.5, } +#' Optimizer that implements the Lamb algorithm. +#' +#' @description +#' Lamb is a stochastic gradient descent method that +#' uses layer-wise adaptive moments to adjusts the +#' learning rate for each parameter based on the ratio of the +#' norm of the weight to the norm of the gradient +#' This helps to stabilize the training process and improves convergence +#' especially for large batch sizes. +#' +#' # References +#' - [Yang et al.](https://arxiv.org/pdf/1904.00962) +#' +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param beta_1 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 1st moment estimates. Defaults to +#' `0.9`. +#' +#' @param beta_2 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 2nd moment estimates. Defaults to +#' `0.999`. +#' +#' @param epsilon +#' A small constant for numerical stability. +#' Defaults to `1e-7`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change +#' after each training batch), and periodically overwriting the +#' weights with their moving average. +#' +#' @param ema_momentum +#' Float, defaults to `0.99`. Only used if `use_ema = TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema = TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, +#' and you need to explicitly overwrite the variables +#' at the end of training by calling +#' `optimizer$finalize_variable_values()` (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse +#' of the scale factor will be multiplied by the gradients before +#' updating variables. Useful for preventing underflow during +#' mixed precision training. Alternately, +#' [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param gradient_accumulation_steps +#' Int or `NULL`. If an int, model and optimizer +#' variables will not be updated at every step; instead they will be +#' updated every `gradient_accumulation_steps` steps, using the average +#' value of the gradients since the last update. This is known as +#' "gradient accumulation". This can be useful +#' when your batch size is very small, in order to reduce gradient +#' noise at each update step. EMA frequency will look at "accumulated" +#' iterations value (optimizer steps // gradient_accumulation_steps). +#' Learning rate schedules will look at "real" iterations value +#' (optimizer steps). +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family optimizers +#' @returns an `Optimizer` instance +#' @tether keras.optimizers.Lamb +optimizer_lamb <- +function (learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, + epsilon = 1e-07, weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = FALSE, ema_momentum = 0.99, + ema_overwrite_frequency = NULL, loss_scale_factor = NULL, + gradient_accumulation_steps = NULL, name = "lamb", ...) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Lamb, args) +} + + #' Optimizer that implements the Lion algorithm. #' #' @description diff --git a/R/preprocessing-image.R b/R/preprocessing-image.R index 83014ef62..97a25c2dd 100644 --- a/R/preprocessing-image.R +++ b/R/preprocessing-image.R @@ -60,9 +60,9 @@ #' #' @param interpolation #' String, interpolation to use for resizing. +#' Supports `"bilinear"`, `"nearest"`, `"bicubic"`, +#' `"lanczos3"`, `"lanczos5"`. #' Defaults to `'bilinear'`. -#' Supports `bilinear`, `nearest`, `bicubic`, -#' `lanczos3`, `lanczos5`. #' #' @param data_format #' `"channels_last"` or `"channels_first"`. diff --git a/R/utils.R b/R/utils.R index 976cfb2e3..3a871b80f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -477,7 +477,7 @@ function (seed) #' `maxlen`, either at the beginning or at the end of the sequences. #' #' @param value -#' Float or String, padding value. (Optional, defaults to 0.) +#' Float or String, padding value. (Optional, defaults to `0`) #' #' @export #' @family utils diff --git a/docs/dev/LICENSE-text.html b/docs/dev/LICENSE-text.html index 750b8bc06..d2898eda2 100644 --- a/docs/dev/LICENSE-text.html +++ b/docs/dev/LICENSE-text.html @@ -8,7 +8,7 @@ keras3 - 1.0.0.9001 + 1.1.0.9000 + + + + + +
+
+
+ +
+

This operation his similar to op_scan(), with the key difference that +op_associative_scan() is a parallel implementation with +potentially significant performance benefits, especially when jit compiled. +The catch is that it can only be used when f is a binary associative +operation (i.e. it must verify f(a, f(b, c)) == f(f(a, b), c)).

+

For an introduction to associative scans, refer to this paper: +Blelloch, Guy E. 1990. +Prefix Sums and Their Applications.

+
+ +
+

Usage

+
op_associative_scan(f, elems, reverse = FALSE, axis = 1L)
+
+ +
+

Arguments

+ + +
f
+

A callable implementing an associative binary operation with +signature r = f(a, b). Function f must be associative, i.e., +it must satisfy the equation +f(a, f(b, c)) == f(f(a, b), c). +The inputs and result are (possibly nested tree structures +of) array(s) matching elems. Each array has a dimension in place +of the axis dimension. f should be applied elementwise over +the axis dimension. +The result r has the same shape (and structure) as the +two inputs a and b.

+ + +
elems
+

A (possibly nested tree structure of) array(s), each with +an axis dimension of size num_elems.

+ + +
reverse
+

A boolean stating if the scan should be reversed with respect +to the axis dimension.

+ + +
axis
+

an integer identifying the axis over which the scan should occur.

+ +
+
+

Value

+

A (possibly nested tree structure of) array(s) of the same shape +and structure as elems, in which the k'th element of axis is +the result of recursively applying f to combine the first k +elements of elems along axis. For example, given +elems = list(a, b, c, ...), the result would be +list(a, f(a, b), f(f(a, b), c), ...).

+
+
+

Examples

+

sum_fn <- function(x, y) x + y
+xs <- op_arange(5L)
+op_associative_scan(sum_fn, xs)

+

## tf.Tensor([ 0  1  3  6 10], shape=(5), dtype=int32)
+

+

sum_fn <- function(x, y) {
+  str(list(x = x, y = y))
+  map2(x, y, \(.x, .y) .x + .y)
+}
+
+xs <- list(op_array(1:2),
+           op_array(1:2),
+           op_array(1:2))
+ys <- op_associative_scan(sum_fn, xs, axis = 1)

+

## List of 2
+##  $ x:List of 3
+##   ..$ :<tf.Tensor: shape=(1), dtype=int32, numpy=array([1], dtype=int32)>
+##   ..$ :<tf.Tensor: shape=(1), dtype=int32, numpy=array([1], dtype=int32)>
+##   ..$ :<tf.Tensor: shape=(1), dtype=int32, numpy=array([1], dtype=int32)>
+##  $ y:List of 3
+##   ..$ :<tf.Tensor: shape=(1), dtype=int32, numpy=array([2], dtype=int32)>
+##   ..$ :<tf.Tensor: shape=(1), dtype=int32, numpy=array([2], dtype=int32)>
+##   ..$ :<tf.Tensor: shape=(1), dtype=int32, numpy=array([2], dtype=int32)>
+

+

ys

+

## [[1]]
+## tf.Tensor([1 3], shape=(2), dtype=int32)
+##
+## [[2]]
+## tf.Tensor([1 3], shape=(2), dtype=int32)
+##
+## [[3]]
+## tf.Tensor([1 3], shape=(2), dtype=int32)
+

+
+
+

See also

+

Other core ops:
op_cast()
op_cond()
op_convert_to_numpy()
op_convert_to_tensor()
op_custom_gradient()
op_dtype()
op_fori_loop()
op_is_tensor()
op_map()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_shape()
op_slice()
op_slice_update()
op_stop_gradient()
op_switch()
op_unstack()
op_vectorized_map()
op_while_loop()

+

Other ops:
op_abs()
op_add()
op_all()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_average_pool()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_ceil()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_depthwise_conv()
op_det()
op_diag()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_greater()
op_greater_equal()
op_hard_sigmoid()
op_hard_silu()
op_hstack()
op_identity()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_leaky_relu()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_roll()
op_round()
op_rsqrt()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_average.html b/docs/dev/reference/op_average.html index a3315d159..9619b7e36 100644 --- a/docs/dev/reference/op_average.html +++ b/docs/dev/reference/op_average.html @@ -8,7 +8,7 @@ keras3 - 1.0.0.9001 + 1.1.0.9000 + + + + + +
+
+
+ +
+

Perform a binary search, returning indices for insertion of values +into sorted_sequence that maintain the sorting order.

+
+ +
+

Usage

+
op_searchsorted(sorted_sequence, values, side = "left")
+
+ +
+

Arguments

+ + +
sorted_sequence
+

1-D input tensor, sorted along the innermost +dimension.

+ + +
values
+

N-D tensor of query insertion values.

+ + +
side
+

'left' or 'right', specifying the direction in which to insert +for the equality case (tie-breaker).

+ +
+
+

Value

+

Tensor of insertion indices of same shape as values.

+
+
+

See also

+

Other core ops:
op_associative_scan()
op_cast()
op_cond()
op_convert_to_numpy()
op_convert_to_tensor()
op_custom_gradient()
op_dtype()
op_fori_loop()
op_is_tensor()
op_map()
op_scan()
op_scatter()
op_scatter_update()
op_shape()
op_slice()
op_slice_update()
op_stop_gradient()
op_switch()
op_unstack()
op_vectorized_map()
op_while_loop()

+

Other ops:
op_abs()
op_add()
op_all()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_ceil()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_depthwise_conv()
op_det()
op_diag()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_greater()
op_greater_equal()
op_hard_sigmoid()
op_hard_silu()
op_hstack()
op_identity()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_leaky_relu()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_roll()
op_round()
op_rsqrt()
op_scan()
op_scatter()
op_scatter_update()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_segment_max.html b/docs/dev/reference/op_segment_max.html index f71368d48..813ce9a58 100644 --- a/docs/dev/reference/op_segment_max.html +++ b/docs/dev/reference/op_segment_max.html @@ -8,7 +8,7 @@ keras3 - 1.0.0.9001 + 1.1.0.9000 + + + + + +
+
+
+ +
+

Lamb is a stochastic gradient descent method that +uses layer-wise adaptive moments to adjusts the +learning rate for each parameter based on the ratio of the +norm of the weight to the norm of the gradient +This helps to stabilize the training process and improves convergence +especially for large batch sizes.

+
+ +
+

Usage

+
optimizer_lamb(
+  learning_rate = 0.001,
+  beta_1 = 0.9,
+  beta_2 = 0.999,
+  epsilon = 1e-07,
+  weight_decay = NULL,
+  clipnorm = NULL,
+  clipvalue = NULL,
+  global_clipnorm = NULL,
+  use_ema = FALSE,
+  ema_momentum = 0.99,
+  ema_overwrite_frequency = NULL,
+  loss_scale_factor = NULL,
+  gradient_accumulation_steps = NULL,
+  name = "lamb",
+  ...
+)
+
+ +
+

Arguments

+ + +
learning_rate
+

A float, a +LearningRateSchedule() instance, or +a callable that takes no arguments and returns the actual value to +use. The learning rate. Defaults to 0.001.

+ + +
beta_1
+

A float value or a constant float tensor, or a callable +that takes no arguments and returns the actual value to use. The +exponential decay rate for the 1st moment estimates. Defaults to +0.9.

+ + +
beta_2
+

A float value or a constant float tensor, or a callable +that takes no arguments and returns the actual value to use. The +exponential decay rate for the 2nd moment estimates. Defaults to +0.999.

+ + +
epsilon
+

A small constant for numerical stability. +Defaults to 1e-7.

+ + +
weight_decay
+

Float. If set, weight decay is applied.

+ + +
clipnorm
+

Float. If set, the gradient of each weight is individually +clipped so that its norm is no higher than this value.

+ + +
clipvalue
+

Float. If set, the gradient of each weight is clipped to be +no higher than this value.

+ + +
global_clipnorm
+

Float. If set, the gradient of all weights is clipped +so that their global norm is no higher than this value.

+ + +
use_ema
+

Boolean, defaults to FALSE. +If TRUE, exponential moving average +(EMA) is applied. EMA consists of computing an exponential moving +average of the weights of the model (as the weight values change +after each training batch), and periodically overwriting the +weights with their moving average.

+ + +
ema_momentum
+

Float, defaults to 0.99. Only used if use_ema = TRUE. +This is the momentum to use when computing +the EMA of the model's weights: +new_average = ema_momentum * old_average + (1 - ema_momentum) * current_variable_value.

+ + +
ema_overwrite_frequency
+

Int or NULL, defaults to NULL. Only used if +use_ema = TRUE. Every ema_overwrite_frequency steps of iterations, +we overwrite the model variable by its moving average. +If NULL, the optimizer +does not overwrite model variables in the middle of training, +and you need to explicitly overwrite the variables +at the end of training by calling +optimizer$finalize_variable_values() (which updates the model +variables in-place). When using the built-in fit() training loop, +this happens automatically after the last epoch, +and you don't need to do anything.

+ + +
loss_scale_factor
+

Float or NULL. If a float, the scale factor will +be multiplied the loss before computing gradients, and the inverse +of the scale factor will be multiplied by the gradients before +updating variables. Useful for preventing underflow during +mixed precision training. Alternately, +optimizer_loss_scale() will +automatically set a loss scale factor.

+ + +
gradient_accumulation_steps
+

Int or NULL. If an int, model and optimizer +variables will not be updated at every step; instead they will be +updated every gradient_accumulation_steps steps, using the average +value of the gradients since the last update. This is known as +"gradient accumulation". This can be useful +when your batch size is very small, in order to reduce gradient +noise at each update step. EMA frequency will look at "accumulated" +iterations value (optimizer steps // gradient_accumulation_steps). +Learning rate schedules will look at "real" iterations value +(optimizer steps).

+ + +
name
+

String. The name to use +for momentum accumulator weights created by +the optimizer.

+ + +
...
+

For forward/backward compatability.

+ +
+
+

Value

+

an Optimizer instance

+
+
+

References

+ +
+ + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/optimizer_lion.html b/docs/dev/reference/optimizer_lion.html index d1302c1a4..72b32a611 100644 --- a/docs/dev/reference/optimizer_lion.html +++ b/docs/dev/reference/optimizer_lion.html @@ -26,7 +26,7 @@ keras3 - 1.0.0.9001 + 1.1.0.9000