mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-12-18 14:10:28 +00:00
let me set bias=True until I validate it properly, but this should be ok to merge to master for now, is equivalent to previous functionality
This commit is contained in:
parent
e808a67149
commit
79dbe0086d
2
train.py
2
train.py
@ -53,7 +53,7 @@ n_layer = 12
|
|||||||
n_head = 12
|
n_head = 12
|
||||||
n_embd = 768
|
n_embd = 768
|
||||||
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
|
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
|
||||||
bias = False # do we use bias inside LayerNorm and Linear layers?
|
bias = True # do we use bias inside LayerNorm and Linear layers?
|
||||||
# adamw optimizer
|
# adamw optimizer
|
||||||
learning_rate = 6e-4 # max learning rate
|
learning_rate = 6e-4 # max learning rate
|
||||||
max_iters = 600000 # total number of training iterations
|
max_iters = 600000 # total number of training iterations
|
||||||
|
Loading…
Reference in New Issue
Block a user