mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-12-18 14:10:28 +00:00
change grad accum to default off because i think it just confuses everyone
This commit is contained in:
parent
d01863ef01
commit
d8b1a94519
2
train.py
2
train.py
@ -45,7 +45,7 @@ wandb_project = 'owt'
|
||||
wandb_run_name = 'gpt2' # 'run' + str(time.time())
|
||||
# data
|
||||
dataset = 'openwebtext'
|
||||
gradient_accumulation_steps = 5 # used to simulate larger batch sizes
|
||||
gradient_accumulation_steps = 1 # used to simulate larger batch sizes
|
||||
batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size
|
||||
block_size = 1024
|
||||
# model
|
||||
|
Loading…
Reference in New Issue
Block a user