mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-11-10 20:09:58 +00:00
Merge pull request #270 from LaihoE/master
fix np.sum overflows on windows
This commit is contained in:
commit
ed7887c888
@ -50,7 +50,7 @@ tokenized = split_dataset.map(
|
||||
|
||||
# concatenate all the ids in each dataset into one large file we can use for training
|
||||
for split, dset in tokenized.items():
|
||||
arr_len = np.sum(dset['len'])
|
||||
arr_len = np.sum(dset['len'], dtype=np.uint64)
|
||||
filename = os.path.join(os.path.dirname(__file__), f'{split}.bin')
|
||||
dtype = np.uint16 # (can do since enc.max_token_value == 50256 is < 2**16)
|
||||
arr = np.memmap(filename, dtype=dtype, mode='w+', shape=(arr_len,))
|
||||
|
Loading…
Reference in New Issue
Block a user