mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-11-11 04:19:57 +00:00
fix for training stability on single GPU
This commit is contained in:
parent
e58f0cfa94
commit
086ebe1822
3
train.py
3
train.py
@ -45,7 +45,7 @@ wandb_project = 'owt'
|
|||||||
wandb_run_name = 'gpt2' # 'run' + str(time.time())
|
wandb_run_name = 'gpt2' # 'run' + str(time.time())
|
||||||
# data
|
# data
|
||||||
dataset = 'openwebtext'
|
dataset = 'openwebtext'
|
||||||
gradient_accumulation_steps = 1 # used to simulate larger batch sizes
|
gradient_accumulation_steps = 5 # used to simulate larger batch sizes
|
||||||
batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size
|
batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size
|
||||||
block_size = 1024
|
block_size = 1024
|
||||||
# model
|
# model
|
||||||
@ -92,6 +92,7 @@ else:
|
|||||||
# if not ddp, we are running on a single gpu, and one process
|
# if not ddp, we are running on a single gpu, and one process
|
||||||
master_process = True
|
master_process = True
|
||||||
seed_offset = 0
|
seed_offset = 0
|
||||||
|
gradient_accumulation_steps *= 8 # simulate 8 gpus
|
||||||
|
|
||||||
if master_process:
|
if master_process:
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
os.makedirs(out_dir, exist_ok=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user