adding a lightweight configurator that may be a terrible mistake lol. also adding configs to evaluate the baseline GPT2 versions released by OpenAI on OWT. we have some ways to go to match those numbers atm

2025-08-30 09:27:57 +00:00 · 2022-12-28 23:31:23 +00:00
parent c9fe00c0e9
commit 5d2b4807bf
6 changed files with 96 additions and 2 deletions
--- a/config/eval_gpt2.py
+++ b/config/eval_gpt2.py
@@ -0,0 +1,8 @@
+# evaluate the base gpt2
+# n_layer=12, n_head=12, n_embd=768
+# 124M parameters
+batch_size = 8
+eval_iters = 500 # use more iterations to get good estimate
+eval_only = True
+wandb_log = False
+init_from = 'gpt2'
--- a/config/eval_gpt2_large.py
+++ b/config/eval_gpt2_large.py
@@ -0,0 +1,8 @@
+# evaluate the base gpt2
+# n_layer=36, n_head=20, n_embd=1280
+# 774M parameters
+batch_size = 8
+eval_iters = 500 # use more iterations to get good estimate
+eval_only = True
+wandb_log = False
+init_from = 'gpt2-large'
--- a/config/eval_gpt2_medium.py
+++ b/config/eval_gpt2_medium.py
@@ -0,0 +1,8 @@
+# evaluate the base gpt2
+# n_layer=24, n_head=16, n_embd=1024
+# 350M parameters
+batch_size = 8
+eval_iters = 500 # use more iterations to get good estimate
+eval_only = True
+wandb_log = False
+init_from = 'gpt2-medium'
--- a/config/eval_gpt2_xl.py
+++ b/config/eval_gpt2_xl.py
@@ -0,0 +1,8 @@
+# evaluate the base gpt2
+# n_layer=48, n_head=25, n_embd=1600
+# 1558M parameters
+batch_size = 8
+eval_iters = 500 # use more iterations to get good estimate
+eval_only = True
+wandb_log = False
+init_from = 'gpt2-xl'