1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2025-10-19 17:47:39 +00:00

add torch.compile by default, shows almost 1.8X improvement in throughput nice

This commit is contained in:
Andrej Karpathy
2022-12-30 00:07:13 +00:00
parent fb52554ca8
commit 5a725d9098
4 changed files with 19 additions and 3 deletions

View File

@@ -59,6 +59,7 @@ lr_decay_iters = 320000 # how many steps to decay the learning rate for
min_lr = 1e-5 # minimum learning rate
# DDP settings
backend = 'nccl' # 'nccl', 'gloo', etc.
compile_model = True # use PyTorch 2.0 to compile the model to be faster
# -----------------------------------------------------------------------------
# poor man's Configurator. Potentially a bad idea. Example usage:
# $ python train.py override_file --batch_size=32
@@ -156,6 +157,12 @@ optimizer = model.configure_optimizers(weight_decay, learning_rate, betas)
if init_from == 'resume':
optimizer.load_state_dict(checkpoint['optimizer'])
# compile the model
if compile_model:
print("compiling the model... (takes a ~minute)")
unoptimized_model = model
model = torch.compile(model) # requires PyTorch 2.0
# wrap model into DDP container
if ddp:
model = DDP(model, device_ids=[gpu_id])