1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-12-18 14:10:28 +00:00

a bit better settings... for a single gpu at least. these settings would fry a simple cpu though i think

This commit is contained in:
Andrej Karpathy 2023-01-14 03:59:53 +00:00
parent 91d02510ce
commit 7d7ded25ce

View File

@ -15,13 +15,13 @@ wandb_run_name = 'mini-gpt'
dataset = 'shakespeare_char' dataset = 'shakespeare_char'
batch_size = 64 batch_size = 64
block_size = 128 # context of up to 128 previous characters block_size = 256 # context of up to 128 previous characters
# baby GPT model :) # baby GPT model :)
n_layer = 4 n_layer = 6
n_head = 4 n_head = 6
n_embd = 128 n_embd = 384
dropout = 0.0 dropout = 0.2
learning_rate = 1e-3 # with baby networks can afford to go a bit higher learning_rate = 1e-3 # with baby networks can afford to go a bit higher
max_iters = 5000 max_iters = 5000