mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2025-08-31 09:57:57 +00:00
add support for character-level language models, a new character-level shakespeare dataset, a new config file that shows how to train a character-level baby GPT on it, and adjust the sample function to figure out if it should decode with characters or GPT2 bpe tokens. The current implementation is a bit hacky and basically assumes just these two possibilities. In the future we may want to support more general encoders or decoders.
This commit is contained in:
36
config/train_shakespeare_char.py
Normal file
36
config/train_shakespeare_char.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# train a miniature character-level shakespeare model
|
||||
# good for debugging and playing on macbooks and such
|
||||
|
||||
out_dir = 'out-shakespeare-char'
|
||||
eval_interval = 250 # keep frequent because we'll overfit
|
||||
eval_iters = 200
|
||||
log_interval = 10 # don't print too too often
|
||||
|
||||
# we expect to overfit on this small dataset, so only save when val improves
|
||||
always_save_checkpoint = True
|
||||
|
||||
wandb_log = False # override via command line if you like
|
||||
wandb_project = 'shakespeare-char'
|
||||
wandb_run_name = 'mini-gpt'
|
||||
|
||||
dataset = 'shakespeare_char'
|
||||
batch_size = 64
|
||||
block_size = 128 # context of up to 128 previous characters
|
||||
|
||||
# baby GPT model :)
|
||||
n_layer = 4
|
||||
n_head = 4
|
||||
n_embd = 128
|
||||
dropout = 0.0
|
||||
|
||||
learning_rate = 1e-3 # with baby networks can afford to go a bit higher
|
||||
max_iters = 5000
|
||||
lr_decay_iters = 5000 # make equal to max_iters usually
|
||||
min_lr = 1e-4 # learning_rate / 10 usually
|
||||
beta2 = 0.99 # make a bit bigger because number of tokens per iter is small
|
||||
|
||||
warmup_iters = 100 # not super necessary potentially
|
||||
|
||||
# on macbook also add
|
||||
# device = 'cpu' # run on cpu only
|
||||
# compile = False # do not torch compile the model
|
Reference in New Issue
Block a user