add gpt2 training config

2025-12-16 21:28:29 +00:00 · 2023-02-03 22:14:37 +00:00
parent e170e40872
commit f9348f3f18
1 changed files with 23 additions and 0 deletions
--- a/config/train_gpt2.py
+++ b/config/train_gpt2.py
@@ -0,0 +1,23 @@
 # config for training GPT-2 (124M) down to very nice loss of ~2.85 on 1 node of 8X A100 40GB
 wandb_log = True
 wandb_project = 'owt'
 wandb_run_name='gpt2-124M'
 # these make the total batch size be ~0.5M
 # 12 batch size * 1024 block size * 5 gradaccum * 8 GPUs = 491,520
 batch_size = 12
 block_size = 1024
 gradient_accumulation_steps = 5
 # this makes total number of tokens be 300B
 max_iters = 600000
 lr_decay_iters = 600000
 # eval stuff
 eval_interval = 1000
 eval_iters = 200
 log_interval = 10
 # weight decay
 weight_decay = 1e-1