mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2025-10-26 21:07:40 +00:00
small readme clarification and training script defaults changes
This commit is contained in:
8
train.py
8
train.py
@@ -19,14 +19,14 @@ out_dir = 'out'
|
||||
eval_interval = 500
|
||||
log_interval = 1
|
||||
# wandb logging
|
||||
wandb_log = False
|
||||
wandb_log = False # disabled by default
|
||||
wandb_entity = 'karpathy'
|
||||
wandb_project = 'owt'
|
||||
wandb_run_name = 'owt1' # 'run' + str(time.time())
|
||||
wandb_run_name = 'gpt2' # 'run' + str(time.time())
|
||||
# data
|
||||
dataset = 'openwebtext'
|
||||
batch_size = 32
|
||||
block_size = 512
|
||||
batch_size = 8
|
||||
block_size = 1024
|
||||
# model
|
||||
device = 'cuda:0'
|
||||
init_from = 'scratch' # 'scratch' or 'resume' or 'gpt2*'
|
||||
|
||||
Reference in New Issue
Block a user