1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-11-11 04:19:57 +00:00

fix for training stability on single GPU

This commit is contained in:
Otavio Good 2023-02-13 10:42:44 -08:00
parent e58f0cfa94
commit 086ebe1822

View File

@ -45,7 +45,7 @@ wandb_project = 'owt'
wandb_run_name = 'gpt2' # 'run' + str(time.time()) wandb_run_name = 'gpt2' # 'run' + str(time.time())
# data # data
dataset = 'openwebtext' dataset = 'openwebtext'
gradient_accumulation_steps = 1 # used to simulate larger batch sizes gradient_accumulation_steps = 5 # used to simulate larger batch sizes
batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size
block_size = 1024 block_size = 1024
# model # model
@ -92,6 +92,7 @@ else:
# if not ddp, we are running on a single gpu, and one process # if not ddp, we are running on a single gpu, and one process
master_process = True master_process = True
seed_offset = 0 seed_offset = 0
gradient_accumulation_steps *= 8 # simulate 8 gpus
if master_process: if master_process:
os.makedirs(out_dir, exist_ok=True) os.makedirs(out_dir, exist_ok=True)