reduce dcoder model size to make eager runable on v2-8 (#7288)

pytorch · Jun 18, 2024 · a0b76fc · a0b76fc
1 parent 1492eba
commit a0b76fc
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/examples/decoder_only_model.py b/examples/decoder_only_model.py
@@ -7,13 +7,14 @@
 from torch import nn
 
 
+# the default config is intentionally kept low to make it runable on a sigle tpu v2-8 core.
 @dataclass
 class DecoderOnlyConfig:
-  hidden_size: int = 1024
+  hidden_size: int = 512
   num_hidden_layers: int = 2
   num_attention_heads: int = 8
   num_key_value_heads: int = 4
-  intermediate_size = 32 * 1024
+  intermediate_size = 32 * 512
   vocab_size = 3200
   use_flash_attention = False