mirror of
				https://github.com/osmarks/nanogpt-experiments.git
				synced 2025-10-31 07:13:01 +00:00 
			
		
		
		
	np.sum overflows on windows
This commit is contained in:
		| @@ -50,7 +50,7 @@ tokenized = split_dataset.map( | ||||
|  | ||||
| # concatenate all the ids in each dataset into one large file we can use for training | ||||
| for split, dset in tokenized.items(): | ||||
|     arr_len = np.sum(dset['len']) | ||||
|     arr_len = np.sum(dset['len'], dtype=np.uint64) | ||||
|     filename = os.path.join(os.path.dirname(__file__), f'{split}.bin') | ||||
|     dtype = np.uint16 # (can do since enc.max_token_value == 50256 is < 2**16) | ||||
|     arr = np.memmap(filename, dtype=dtype, mode='w+', shape=(arr_len,)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Laiho
					Laiho