mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2025-01-05 15:00:28 +00:00
minor args re-arranging and removing some spurious ones like wandb entity ty @tcapelle
This commit is contained in:
parent
529c967a65
commit
9629093e53
@ -8,7 +8,7 @@ from model import GPTConfig, GPT
|
|||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
out_dir = 'out'
|
out_dir = 'out'
|
||||||
device = 'cuda:2'
|
device = 'cuda'
|
||||||
compile = False
|
compile = False
|
||||||
start = "\n" # or "<|endoftext|>" or whatever you like
|
start = "\n" # or "<|endoftext|>" or whatever you like
|
||||||
num_samples = 10 # number of samples to draw
|
num_samples = 10 # number of samples to draw
|
||||||
|
15
train.py
15
train.py
@ -10,7 +10,6 @@ $ torchrun --standalone --nproc_per_node=4 train.py
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
|
|
||||||
@ -31,9 +30,9 @@ log_interval = 1
|
|||||||
eval_iters = 200
|
eval_iters = 200
|
||||||
eval_only = False # if True, script exits right after the first eval
|
eval_only = False # if True, script exits right after the first eval
|
||||||
always_save_checkpoint = True # if True, always save a checkpoint after each eval
|
always_save_checkpoint = True # if True, always save a checkpoint after each eval
|
||||||
|
init_from = 'scratch' # 'scratch' or 'resume' or 'gpt2*'
|
||||||
# wandb logging
|
# wandb logging
|
||||||
wandb_log = False # disabled by default
|
wandb_log = False # disabled by default
|
||||||
wandb_entity = 'karpathy'
|
|
||||||
wandb_project = 'owt'
|
wandb_project = 'owt'
|
||||||
wandb_run_name = 'gpt2' # 'run' + str(time.time())
|
wandb_run_name = 'gpt2' # 'run' + str(time.time())
|
||||||
# data
|
# data
|
||||||
@ -41,24 +40,24 @@ dataset = 'openwebtext'
|
|||||||
batch_size = 12
|
batch_size = 12
|
||||||
block_size = 1024
|
block_size = 1024
|
||||||
# model
|
# model
|
||||||
device = 'cuda:0'
|
|
||||||
init_from = 'scratch' # 'scratch' or 'resume' or 'gpt2*'
|
|
||||||
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
|
|
||||||
n_layer = 12
|
n_layer = 12
|
||||||
n_head = 12
|
n_head = 12
|
||||||
n_embd = 768
|
n_embd = 768
|
||||||
|
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
|
||||||
# adamw optimizer
|
# adamw optimizer
|
||||||
learning_rate = 6e-4 # max learning rate
|
learning_rate = 6e-4 # max learning rate
|
||||||
max_iters = 400000 # total number of training iterations
|
max_iters = 600000 # total number of training iterations
|
||||||
weight_decay = 1e-2
|
weight_decay = 1e-2
|
||||||
betas = (0.9, 0.95)
|
betas = (0.9, 0.95)
|
||||||
# learning rate decay settings
|
# learning rate decay settings
|
||||||
decay_lr = True # whether to decay the learning rate
|
decay_lr = True # whether to decay the learning rate
|
||||||
warmup_iters = 2000 # how many steps to warm up for
|
warmup_iters = 2000 # how many steps to warm up for
|
||||||
lr_decay_iters = 400000 # should be ~= max_iters per Chinchilla
|
lr_decay_iters = 600000 # should be ~= max_iters per Chinchilla
|
||||||
min_lr = 6e-5 # minimum learning rate, should be ~= learning_rate/10 per Chinchilla
|
min_lr = 6e-5 # minimum learning rate, should be ~= learning_rate/10 per Chinchilla
|
||||||
# DDP settings
|
# DDP settings
|
||||||
backend = 'nccl' # 'nccl', 'gloo', etc.
|
backend = 'nccl' # 'nccl', 'gloo', etc.
|
||||||
|
# system
|
||||||
|
device = 'cuda'
|
||||||
compile = True # use PyTorch 2.0 to compile the model to be faster
|
compile = True # use PyTorch 2.0 to compile the model to be faster
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
exec(open('configurator.py').read()) # overrides from command line or config file
|
exec(open('configurator.py').read()) # overrides from command line or config file
|
||||||
@ -181,7 +180,7 @@ def get_lr(iter):
|
|||||||
|
|
||||||
# logging
|
# logging
|
||||||
if wandb_log and gpu_id == 0:
|
if wandb_log and gpu_id == 0:
|
||||||
wandb.init(project=wandb_project, entity=wandb_entity, name=wandb_run_name)
|
wandb.init(project=wandb_project, name=wandb_run_name)
|
||||||
wandb.config = {
|
wandb.config = {
|
||||||
"batch_size": batch_size,
|
"batch_size": batch_size,
|
||||||
"block_size": block_size,
|
"block_size": block_size,
|
||||||
|
Loading…
Reference in New Issue
Block a user