mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2025-08-30 09:27:57 +00:00
adding a lightweight configurator that may be a terrible mistake lol. also adding configs to evaluate the baseline GPT2 versions released by OpenAI on OWT. we have some ways to go to match those numbers atm
This commit is contained in:
8
config/eval_gpt2.py
Normal file
8
config/eval_gpt2.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# evaluate the base gpt2
|
||||
# n_layer=12, n_head=12, n_embd=768
|
||||
# 124M parameters
|
||||
batch_size = 8
|
||||
eval_iters = 500 # use more iterations to get good estimate
|
||||
eval_only = True
|
||||
wandb_log = False
|
||||
init_from = 'gpt2'
|
8
config/eval_gpt2_large.py
Normal file
8
config/eval_gpt2_large.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# evaluate the base gpt2
|
||||
# n_layer=36, n_head=20, n_embd=1280
|
||||
# 774M parameters
|
||||
batch_size = 8
|
||||
eval_iters = 500 # use more iterations to get good estimate
|
||||
eval_only = True
|
||||
wandb_log = False
|
||||
init_from = 'gpt2-large'
|
8
config/eval_gpt2_medium.py
Normal file
8
config/eval_gpt2_medium.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# evaluate the base gpt2
|
||||
# n_layer=24, n_head=16, n_embd=1024
|
||||
# 350M parameters
|
||||
batch_size = 8
|
||||
eval_iters = 500 # use more iterations to get good estimate
|
||||
eval_only = True
|
||||
wandb_log = False
|
||||
init_from = 'gpt2-medium'
|
8
config/eval_gpt2_xl.py
Normal file
8
config/eval_gpt2_xl.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# evaluate the base gpt2
|
||||
# n_layer=48, n_head=25, n_embd=1600
|
||||
# 1558M parameters
|
||||
batch_size = 8
|
||||
eval_iters = 500 # use more iterations to get good estimate
|
||||
eval_only = True
|
||||
wandb_log = False
|
||||
init_from = 'gpt2-xl'
|
Reference in New Issue
Block a user