fix "cuda out of memory" when resuming training

2025-07-30 06:12:52 +00:00 · 2023-04-05 17:28:55 -04:00 · 2023-04-05 17:28:55 -04:00 · 4ac2e8ce3a
commit 4ac2e8ce3a
parent a82b33b525
1 changed files with 1 additions and 0 deletions
--- a/train.py
+++ b/train.py
@ -189,6 +189,7 @@ scaler = torch.cuda.amp.GradScaler(enabled=(dtype == 'float16'))
 optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)
 if init_from == 'resume':
    optimizer.load_state_dict(checkpoint['optimizer'])
+checkpoint = None # free up memory

 # compile the model
 if compile: