mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2025-10-19 17:47:39 +00:00
add torch.compile by default, shows almost 1.8X improvement in throughput nice
This commit is contained in:
7
train.py
7
train.py
@@ -59,6 +59,7 @@ lr_decay_iters = 320000 # how many steps to decay the learning rate for
|
||||
min_lr = 1e-5 # minimum learning rate
|
||||
# DDP settings
|
||||
backend = 'nccl' # 'nccl', 'gloo', etc.
|
||||
compile_model = True # use PyTorch 2.0 to compile the model to be faster
|
||||
# -----------------------------------------------------------------------------
|
||||
# poor man's Configurator. Potentially a bad idea. Example usage:
|
||||
# $ python train.py override_file --batch_size=32
|
||||
@@ -156,6 +157,12 @@ optimizer = model.configure_optimizers(weight_decay, learning_rate, betas)
|
||||
if init_from == 'resume':
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
|
||||
# compile the model
|
||||
if compile_model:
|
||||
print("compiling the model... (takes a ~minute)")
|
||||
unoptimized_model = model
|
||||
model = torch.compile(model) # requires PyTorch 2.0
|
||||
|
||||
# wrap model into DDP container
|
||||
if ddp:
|
||||
model = DDP(model, device_ids=[gpu_id])
|
||||
|
Reference in New Issue
Block a user