1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-09-21 03:39:44 +00:00

Merge pull request #236 from ymurenko/master

fix "cuda out of memory" when resuming training
This commit is contained in:
Andrej 2023-04-12 22:09:42 -07:00 committed by GitHub
commit 2457471c9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -190,6 +190,7 @@ scaler = torch.cuda.amp.GradScaler(enabled=(dtype == 'float16'))
optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type) optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)
if init_from == 'resume': if init_from == 'resume':
optimizer.load_state_dict(checkpoint['optimizer']) optimizer.load_state_dict(checkpoint['optimizer'])
checkpoint = None # free up memory
# compile the model # compile the model
if compile: if compile: