mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-11-10 20:09:58 +00:00
let me set bias=True until I validate it properly, but this should be ok to merge to master for now, is equivalent to previous functionality
This commit is contained in:
parent
e808a67149
commit
79dbe0086d
2
train.py
2
train.py
@ -53,7 +53,7 @@ n_layer = 12
|
||||
n_head = 12
|
||||
n_embd = 768
|
||||
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
|
||||
bias = False # do we use bias inside LayerNorm and Linear layers?
|
||||
bias = True # do we use bias inside LayerNorm and Linear layers?
|
||||
# adamw optimizer
|
||||
learning_rate = 6e-4 # max learning rate
|
||||
max_iters = 600000 # total number of training iterations
|
||||
|
Loading…
Reference in New Issue
Block a user