From 7d7ded25ce400388f7e6084ba829de9637e840f7 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sat, 14 Jan 2023 03:59:53 +0000 Subject: [PATCH] a bit better settings... for a single gpu at least. these settings would fry a simple cpu though i think --- config/train_shakespeare_char.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/train_shakespeare_char.py b/config/train_shakespeare_char.py index d2b6e83..c50e4dd 100644 --- a/config/train_shakespeare_char.py +++ b/config/train_shakespeare_char.py @@ -15,13 +15,13 @@ wandb_run_name = 'mini-gpt' dataset = 'shakespeare_char' batch_size = 64 -block_size = 128 # context of up to 128 previous characters +block_size = 256 # context of up to 128 previous characters # baby GPT model :) -n_layer = 4 -n_head = 4 -n_embd = 128 -dropout = 0.0 +n_layer = 6 +n_head = 6 +n_embd = 384 +dropout = 0.2 learning_rate = 1e-3 # with baby networks can afford to go a bit higher max_iters = 5000