Merge pull request #120 from nynyg/remove_cpu_pin_mem

Pin memory only when training on GPU
2025-06-25 22:52:51 +00:00 · 2023-02-04 11:28:08 -08:00 · 2023-02-04 11:28:08 -08:00 · dc149891b6
commit dc149891b6
parent 77e7e04c26 b8286f343e
1 changed files with 6 additions and 1 deletions
--- a/train.py
+++ b/train.py
@ -113,7 +113,12 @@ def get_batch(split):
    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
    # pin arrays x,y, which allows us to move them to GPU asynchronously (non_blocking=True)
+    if "cuda" in device:
+        # GPU training
        x, y = x.pin_memory().to(device, non_blocking=True), y.pin_memory().to(device, non_blocking=True)
+    else:
+        # CPU or MPS training
+        x, y = x.to(device), y.to(device)
    return x, y

 # init these up here, can override if init_from='resume' (i.e. from a checkpoint)