1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-12-18 14:10:28 +00:00

Merge pull request #145 from otaviogood/gradAccumStability

fix for training stability on single GPU
This commit is contained in:
Andrej 2023-02-14 18:48:54 -08:00 committed by GitHub
commit ae3a8d5fdd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -45,7 +45,7 @@ wandb_project = 'owt'
wandb_run_name = 'gpt2' # 'run' + str(time.time()) wandb_run_name = 'gpt2' # 'run' + str(time.time())
# data # data
dataset = 'openwebtext' dataset = 'openwebtext'
gradient_accumulation_steps = 1 # used to simulate larger batch sizes gradient_accumulation_steps = 5 # used to simulate larger batch sizes
batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size
block_size = 1024 block_size = 1024
# model # model
@ -92,6 +92,7 @@ else:
# if not ddp, we are running on a single gpu, and one process # if not ddp, we are running on a single gpu, and one process
master_process = True master_process = True
seed_offset = 0 seed_offset = 0
gradient_accumulation_steps *= 8 # simulate 8 gpus
if master_process: if master_process:
os.makedirs(out_dir, exist_ok=True) os.makedirs(out_dir, exist_ok=True)