mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-11-13 05:19:58 +00:00
np.sum overflows on windows
This commit is contained in:
parent
7fe4a099ad
commit
6649b299eb
@ -50,7 +50,7 @@ tokenized = split_dataset.map(
|
||||
|
||||
# concatenate all the ids in each dataset into one large file we can use for training
|
||||
for split, dset in tokenized.items():
|
||||
arr_len = np.sum(dset['len'])
|
||||
arr_len = np.sum(dset['len'], dtype=np.uint64)
|
||||
filename = os.path.join(os.path.dirname(__file__), f'{split}.bin')
|
||||
dtype = np.uint16 # (can do since enc.max_token_value == 50256 is < 2**16)
|
||||
arr = np.memmap(filename, dtype=dtype, mode='w+', shape=(arr_len,))
|
||||
|
Loading…
Reference in New Issue
Block a user