stay true to the README file and set grad accum to 5, so the default batch size is about 0.5M and is reproducing gpt2

2025-10-21 18:47:39 +00:00 · 2023-01-27 20:51:50 +00:00
parent 79dbe0086d
commit 001c1e7be7
1 changed files with 1 additions and 1 deletions
--- a/train.py
+++ b/train.py
@@ -45,7 +45,7 @@ wandb_project = 'owt'
 wandb_run_name = 'gpt2' # 'run' + str(time.time())
 # data
 dataset = 'openwebtext'
-gradient_accumulation_steps = 1 # used to simulate larger batch sizes
+gradient_accumulation_steps = 5 # used to simulate larger batch sizes
 batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size
 block_size = 1024
 # model