mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-12-18 06:00:29 +00:00
fix "cuda out of memory" when resuming training
This commit is contained in:
parent
a82b33b525
commit
4ac2e8ce3a
1
train.py
1
train.py
@ -189,6 +189,7 @@ scaler = torch.cuda.amp.GradScaler(enabled=(dtype == 'float16'))
|
|||||||
optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)
|
optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)
|
||||||
if init_from == 'resume':
|
if init_from == 'resume':
|
||||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||||
|
checkpoint = None # free up memory
|
||||||
|
|
||||||
# compile the model
|
# compile the model
|
||||||
if compile:
|
if compile:
|
||||||
|
Loading…
Reference in New Issue
Block a user