1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2025-08-30 09:27:57 +00:00

adding a lightweight configurator that may be a terrible mistake lol. also adding configs to evaluate the baseline GPT2 versions released by OpenAI on OWT. we have some ways to go to match those numbers atm

This commit is contained in:
Andrej Karpathy
2022-12-28 23:31:23 +00:00
parent c9fe00c0e9
commit 5d2b4807bf
6 changed files with 96 additions and 2 deletions

8
config/eval_gpt2.py Normal file
View File

@@ -0,0 +1,8 @@
# evaluate the base gpt2
# n_layer=12, n_head=12, n_embd=768
# 124M parameters
batch_size = 8
eval_iters = 500 # use more iterations to get good estimate
eval_only = True
wandb_log = False
init_from = 'gpt2'

View File

@@ -0,0 +1,8 @@
# evaluate the base gpt2
# n_layer=36, n_head=20, n_embd=1280
# 774M parameters
batch_size = 8
eval_iters = 500 # use more iterations to get good estimate
eval_only = True
wandb_log = False
init_from = 'gpt2-large'

View File

@@ -0,0 +1,8 @@
# evaluate the base gpt2
# n_layer=24, n_head=16, n_embd=1024
# 350M parameters
batch_size = 8
eval_iters = 500 # use more iterations to get good estimate
eval_only = True
wandb_log = False
init_from = 'gpt2-medium'

8
config/eval_gpt2_xl.py Normal file
View File

@@ -0,0 +1,8 @@
# evaluate the base gpt2
# n_layer=48, n_head=25, n_embd=1600
# 1558M parameters
batch_size = 8
eval_iters = 500 # use more iterations to get good estimate
eval_only = True
wandb_log = False
init_from = 'gpt2-xl'