1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-11-14 05:44:51 +00:00

change grad accum to default off because i think it just confuses everyone

This commit is contained in:
Andrej Karpathy 2023-02-02 18:38:49 +00:00
parent d01863ef01
commit d8b1a94519

View File

@ -45,7 +45,7 @@ wandb_project = 'owt'
wandb_run_name = 'gpt2' # 'run' + str(time.time()) wandb_run_name = 'gpt2' # 'run' + str(time.time())
# data # data
dataset = 'openwebtext' dataset = 'openwebtext'
gradient_accumulation_steps = 5 # used to simulate larger batch sizes gradient_accumulation_steps = 1 # used to simulate larger batch sizes
batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size batch_size = 12 # if gradient_accumulation_steps > 1, this is the micro-batch size
block_size = 1024 block_size = 1024
# model # model