Skip to content

Commit

Permalink
Bump version to v0.24.0 + deprecations (#3570)
Browse files Browse the repository at this point in the history
  • Loading branch information
snarayan21 committed Aug 23, 2024
1 parent efa3a38 commit 020b0ef
Show file tree
Hide file tree
Showing 11 changed files with 15 additions and 247 deletions.
2 changes: 1 addition & 1 deletion composer/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

"""The Composer Version."""

__version__ = '0.24.0.dev0'
__version__ = '0.24.0'
2 changes: 0 additions & 2 deletions composer/callbacks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from composer.callbacks.activation_monitor import ActivationMonitor
from composer.callbacks.checkpoint_saver import CheckpointSaver
from composer.callbacks.early_stopper import EarlyStopper
from composer.callbacks.eval_output_logging_callback import EvalOutputLogging
from composer.callbacks.export_for_inference import ExportForInferenceCallback
from composer.callbacks.free_outputs import FreeOutputs
from composer.callbacks.generate import Generate
Expand All @@ -36,7 +35,6 @@
'CheckpointSaver',
'MLPerfCallback',
'EarlyStopper',
'EvalOutputLogging',
'ExportForInferenceCallback',
'ThresholdStopper',
'ImageVisualizer',
Expand Down
129 changes: 0 additions & 129 deletions composer/callbacks/eval_output_logging_callback.py

This file was deleted.

18 changes: 1 addition & 17 deletions composer/core/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
ParallelismConfig,
ParallelismType,
TPConfig,
VersionedDeprecationWarning,
batch_get,
batch_set,
dist,
Expand Down Expand Up @@ -617,7 +616,7 @@ def _validate_parallelism_configs(self):
# Load monolith rank0 only
if self.load_monolith_rank0_only:
if self.tp_config is not None:
raise ValueError('load_fsdp_monolith_rank0_only is not compatible with tensor parallelism (TP).')
raise ValueError('load_monolith_rank0_only is not compatible with tensor parallelism (TP).')
assert self.fsdp_config is not None
error_message = ''
if self.fsdp_config.sync_module_states == False:
Expand Down Expand Up @@ -900,21 +899,6 @@ def fsdp_state_dict_type(self):
def fsdp_sharded_state_dict_enabled(self):
return self.fsdp_config is not None and self.fsdp_enabled and self.fsdp_state_dict_type == 'sharded'

@property
def fsdp_device_mesh(self):
warnings.warn(VersionedDeprecationWarning('fsdp_device_mesh is deprecated. Use device_mesh instead.', '0.24'))
return self.device_mesh

@property
def load_fsdp_monolith_rank0_only(self):
warnings.warn(
VersionedDeprecationWarning(
'load_fsdp_monolith_rank0_only is deprecated. Use load_monolith_rank0_only instead.',
'0.24',
),
)
return self.load_monolith_rank0_only

@property
def load_monolith_rank0_only(self):
return (
Expand Down
3 changes: 1 addition & 2 deletions composer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@
Transform,
VersionedDeprecationWarning,
checkpoint,
create_fsdp_config,
dist,
ensure_tuple,
export_with_logger,
Expand Down Expand Up @@ -1323,7 +1322,7 @@ def __init__(
if isinstance(parallelism_config['fsdp'], FSDPConfig):
parallelism_config_args['fsdp'] = parallelism_config['fsdp']
else:
parallelism_config_args['fsdp'] = create_fsdp_config(parallelism_config['fsdp'])
parallelism_config_args['fsdp'] = FSDPConfig(**parallelism_config['fsdp'])
if 'tp' in parallelism_config and parallelism_config['tp'] is not None:
if isinstance(parallelism_config['tp'], TPConfig):
parallelism_config_args['tp'] = parallelism_config['tp']
Expand Down
3 changes: 1 addition & 2 deletions composer/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
UCObjectStore,
build_remote_backend,
)
from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig, create_fsdp_config
from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig
from composer.utils.remote_uploader import RemoteFilesExistingCheckStatus, RemoteUploader
from composer.utils.retrying import retry
from composer.utils.string_enum import StringEnum
Expand Down Expand Up @@ -153,7 +153,6 @@
'KNOWN_COMPRESSORS',
'STR_TO_DTYPE',
'ParallelismType',
'create_fsdp_config',
'FSDPConfig',
'TPConfig',
'ParallelismConfig',
Expand Down
39 changes: 0 additions & 39 deletions composer/utils/parallelism.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@

"""Parallelism configs."""

import warnings
from dataclasses import dataclass
from typing import Any, Optional

from torch.distributed._tensor.device_mesh import DeviceMesh

from composer.utils.warnings import VersionedDeprecationWarning


@dataclass
class FSDPConfig:
Expand Down Expand Up @@ -45,42 +42,6 @@ class FSDPConfig:
verbose: bool = False


def create_fsdp_config(fsdp_config: dict[str, Any]):
"""Modify fsdp_config to set default values for missing keys."""
fsdp_config = {**fsdp_config} # Shallow copy to avoid modifying input
if 'process_group' in fsdp_config:
warnings.warn(
VersionedDeprecationWarning(
'process_group is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
remove_version='0.24',
),
)

if 'device_mesh' in fsdp_config:
warnings.warn(
VersionedDeprecationWarning(
'device_mesh is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
remove_version='0.24',
),
)
if 'data_parallel_shard_degree' in fsdp_config or 'data_parallel_replicate_degree' in fsdp_config:
raise ValueError(
'Cannot specify both `device_mesh` and `data_parallel_shard_degree` or `data_parallel_replicate_degree`. Please remove `device_mesh`.',
)
device_mesh = fsdp_config.pop('device_mesh')
if len(device_mesh) == 1:
fsdp_config['data_parallel_shard_degree'] = device_mesh[0]
elif len(device_mesh) == 2:
fsdp_config['data_parallel_replicate_degree'] = device_mesh[0]
fsdp_config['data_parallel_shard_degree'] = device_mesh[1]
else:
raise ValueError(
f'device_mesh must be of length 1 or 2 but received length {len(device_mesh)} with device mesh {device_mesh}.',
)

return FSDPConfig(**fsdp_config)


@dataclass
class TPConfig:
"""Configuration for tensor parallelism (TP)."""
Expand Down
4 changes: 2 additions & 2 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the
<!-- BEGIN_COMPOSER_BUILD_MATRIX -->
| Composer Version | CUDA Support | Docker Tag |
|--------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0.23.5 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.23.5` |
| 0.23.5 | No | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.23.5_cpu` |
| 0.24.0 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.24.0` |
| 0.24.0 | No | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.24.0_cpu` |
<!-- END_COMPOSER_BUILD_MATRIX -->

**Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually
Expand Down
16 changes: 8 additions & 8 deletions docker/build_matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -194,36 +194,36 @@
TORCHVISION_VERSION: 0.17.2
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
CUDA_VERSION: 12.4.1
IMAGE_NAME: composer-0-23-5
IMAGE_NAME: composer-0-24-0
MOFED_VERSION: latest-23.10
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/composer:0.23.5
- ghcr.io/databricks-mosaic/composer:0.23.5
- mosaicml/composer:0.24.0
- ghcr.io/databricks-mosaic/composer:0.24.0
- mosaicml/composer:latest
- ghcr.io/databricks-mosaic/composer:latest
TARGET: composer_stage
TORCHVISION_VERSION: 0.19.0
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: ubuntu:20.04
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
CUDA_VERSION: ''
IMAGE_NAME: composer-0-23-5-cpu
IMAGE_NAME: composer-0-24-0-cpu
MOFED_VERSION: latest-23.10
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/composer:0.23.5_cpu
- ghcr.io/databricks-mosaic/composer:0.23.5_cpu
- mosaicml/composer:0.24.0_cpu
- ghcr.io/databricks-mosaic/composer:0.24.0_cpu
- mosaicml/composer:latest_cpu
- ghcr.io/databricks-mosaic/composer:latest_cpu
TARGET: composer_stage
Expand Down
2 changes: 1 addition & 1 deletion docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def _main():
composer_entries = []

# The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images
composer_versions = ['0.23.5'] # Only build images for the latest composer version
composer_versions = ['0.24.0'] # Only build images for the latest composer version
composer_python_versions = [PRODUCTION_PYTHON_VERSION] # just build composer against the latest

for product in itertools.product(composer_python_versions, composer_versions, cuda_options):
Expand Down
Loading

0 comments on commit 020b0ef

Please sign in to comment.