1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-12-18 14:10:28 +00:00

let me set bias=True until I validate it properly, but this should be ok to merge to master for now, is equivalent to previous functionality

This commit is contained in:
Andrej Karpathy 2023-01-27 20:45:28 +00:00
parent e808a67149
commit 79dbe0086d

View File

@ -53,7 +53,7 @@ n_layer = 12
n_head = 12 n_head = 12
n_embd = 768 n_embd = 768
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+ dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
bias = False # do we use bias inside LayerNorm and Linear layers? bias = True # do we use bias inside LayerNorm and Linear layers?
# adamw optimizer # adamw optimizer
learning_rate = 6e-4 # max learning rate learning_rate = 6e-4 # max learning rate
max_iters = 600000 # total number of training iterations max_iters = 600000 # total number of training iterations