Add note about fix

2025-07-07 04:12:50 +00:00 · 2024-06-24 20:13:10 +01:00 · 2024-06-24 20:13:10 +01:00 · f3118fe74d
commit f3118fe74d
parent 0194d45e43
1 changed files with 1 additions and 1 deletions
--- a/train.py
+++ b/train.py
@ -140,7 +140,7 @@ def get_batch(split, step):
    else:
        data = np.memmap(os.path.join(data_dir, 'val.bin'), dtype=np.uint16, mode='r')
    d_rng = random.Random(f"{split}-{step}-{seed}")
-    ix = [ d_rng.randint(0, len(data) - block_size) for _ in range(batch_size) ]
+    ix = [ d_rng.randint(0, len(data) - block_size) for _ in range(batch_size) ] # TODO: I think this needs to be len(data) - block_size - 1 but changing it breaks determinism badly
    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
    if device_type == 'cuda':