From 9da1627c7f46948d6b133f496178187e8f16860f Mon Sep 17 00:00:00 2001 From: Ramtin Gharleghi <1287898+ramtingh@users.noreply.github.com> Date: Sat, 4 Feb 2023 15:07:36 +1100 Subject: [PATCH] Explicitly set ddp device --- train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/train.py b/train.py index a66fa91..32a2eff 100644 --- a/train.py +++ b/train.py @@ -85,6 +85,7 @@ if ddp: ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) device = f'cuda:{ddp_local_rank}' + torch.cuda.set_device(device) master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. seed_offset = ddp_rank # each process gets a different seed else: