nanogpt-experiments/config/finetune_shakespeare.py

import time

out_dir = 'out-shakespeare'
eval_interval = 5
eval_iters = 40
wandb_log = False # feel free to turn on
wandb_project = 'shakespeare'
wandb_run_name = 'ft-' + str(time.time())

dataset = 'shakespeare'
init_from = 'gpt2-xl' # this is the largest GPT-2 model

# only save checkpoints if the validation loss improves
always_save_checkpoint = False

# the number of examples per iter:
# 1 batch_size * 32 grad_accum * 1024 tokens = 32,768 tokens/iter
# shakespeare has 301,966 tokens, so 1 epoch ~= 9.2 iters
batch_size = 1
gradient_accumulation_steps = 32
max_iters = 20

# finetune at constant LR
learning_rate = 3e-5
decay_lr = False
candidate changes to apis, have to think through more 2023-01-01 01:29:48 +00:00			`import time`

			`out_dir = 'out-shakespeare'`
tune the hyperparams a bit, in configs 2023-02-05 19:31:18 +00:00			`eval_interval = 5`
			`eval_iters = 40`
candidate changes to apis, have to think through more 2023-01-01 01:29:48 +00:00			`wandb_log = False # feel free to turn on`
			`wandb_project = 'shakespeare'`
			`wandb_run_name = 'ft-' + str(time.time())`

			`dataset = 'shakespeare'`
tune the hyperparams a bit, in configs 2023-02-05 19:31:18 +00:00			`init_from = 'gpt2-xl' # this is the largest GPT-2 model`

			`# only save checkpoints if the validation loss improves`
			`always_save_checkpoint = False`

			`# the number of examples per iter:`
			`# 1 batch_size * 32 grad_accum * 1024 tokens = 32,768 tokens/iter`
			`# shakespeare has 301,966 tokens, so 1 epoch ~= 9.2 iters`
candidate changes to apis, have to think through more 2023-01-01 01:29:48 +00:00			`batch_size = 1`
tune the hyperparams a bit, in configs 2023-02-05 19:31:18 +00:00			`gradient_accumulation_steps = 32`
			`max_iters = 20`
candidate changes to apis, have to think through more 2023-01-01 01:29:48 +00:00
tune the hyperparams a bit, in configs 2023-02-05 19:31:18 +00:00			`# finetune at constant LR`
			`learning_rate = 3e-5`
candidate changes to apis, have to think through more 2023-01-01 01:29:48 +00:00			`decay_lr = False`