mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-12-18 06:00:29 +00:00
Merge pull request #236 from ymurenko/master
fix "cuda out of memory" when resuming training
This commit is contained in:
commit
2457471c9c
1
train.py
1
train.py
@ -190,6 +190,7 @@ scaler = torch.cuda.amp.GradScaler(enabled=(dtype == 'float16'))
|
||||
optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)
|
||||
if init_from == 'resume':
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
checkpoint = None # free up memory
|
||||
|
||||
# compile the model
|
||||
if compile:
|
||||
|
Loading…
Reference in New Issue
Block a user