1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-09-21 03:39:44 +00:00

Add note about fix

This commit is contained in:
osmarks 2024-06-24 20:13:10 +01:00
parent 0194d45e43
commit f3118fe74d

View File

@ -140,7 +140,7 @@ def get_batch(split, step):
else: else:
data = np.memmap(os.path.join(data_dir, 'val.bin'), dtype=np.uint16, mode='r') data = np.memmap(os.path.join(data_dir, 'val.bin'), dtype=np.uint16, mode='r')
d_rng = random.Random(f"{split}-{step}-{seed}") d_rng = random.Random(f"{split}-{step}-{seed}")
ix = [ d_rng.randint(0, len(data) - block_size) for _ in range(batch_size) ] ix = [ d_rng.randint(0, len(data) - block_size) for _ in range(batch_size) ] # TODO: I think this needs to be len(data) - block_size - 1 but changing it breaks determinism badly
x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix]) x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix]) y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
if device_type == 'cuda': if device_type == 'cuda':