diff --git a/diffusion/callbacks/log_diffusion_images.py b/diffusion/callbacks/log_diffusion_images.py index 402ffd5d..c36b75d9 100644 --- a/diffusion/callbacks/log_diffusion_images.py +++ b/diffusion/callbacks/log_diffusion_images.py @@ -98,7 +98,7 @@ def __init__(self, latent_batch = {} tokenized_t5 = t5_tokenizer(batch, padding='max_length', - max_length=t5_tokenizer.model.max_length, + max_length=t5_tokenizer.model_max_length, truncation=True, return_tensors='pt') t5_attention_mask = tokenized_t5['attention_mask'].to(torch.bool).cuda() @@ -108,7 +108,7 @@ def __init__(self, tokenized_clip = clip_tokenizer(batch, padding='max_length', - max_length=t5_tokenizer.model.max_length, + max_length=clip_tokenizer.model_max_length, truncation=True, return_tensors='pt') clip_attention_mask = tokenized_clip['attention_mask'].cuda() diff --git a/diffusion/train.py b/diffusion/train.py index 9a9dbaf5..becff0f1 100644 --- a/diffusion/train.py +++ b/diffusion/train.py @@ -88,17 +88,20 @@ def train(config: DictConfig) -> None: model: ComposerModel = hydra.utils.instantiate(config.model) + # If the model has a tokenizer, we'll need it for the dataset + if hasattr(model, 'tokenizer'): + tokenizer = model.tokenizer + else: + tokenizer = None + if hasattr(model, 'autoencoder_loss'): # Check if this is training an autoencoder. If so, the optimizer needs different param groups optimizer = make_autoencoder_optimizer(config, model) - tokenizer = None elif isinstance(model, ComposerTextToImageMMDiT): # Check if this is training a transformer. If so, the optimizer needs different param groups optimizer = make_transformer_optimizer(config, model) - tokenizer = model.tokenizer else: optimizer = hydra.utils.instantiate(config.optimizer, params=model.parameters()) - tokenizer = model.tokenizer # Load train dataset. Currently this expects to load according to the datasetHparam method. # This means adding external datasets is currently not super easy. Will refactor or check for