From 001c1e7be708cb505b3db76d6c7b3468bd1e471a Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Fri, 27 Jan 2023 20:51:50 +0000 Subject: [PATCH] stay true to the README file and set grad accum to 5, so the default batch size is about 0.5M and is reproducing gpt2 --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 7433546..9831c2d 100644 --- a/train.py +++ b/train.py @@ -45,7 +45,7 @@ wandb_project = 'owt' wandb_run_name = 'gpt2' # 'run' + str(time.time()) # data dataset = 'openwebtext' -gradient_accumulation_steps = 1 # used to simulate larger batch sizes +gradient_accumulation_steps = 5 # used to simulate larger batch sizes batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size block_size = 1024 # model