1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2025-11-11 21:03:01 +00:00

Merge pull request #305 from okuvshynov/fix_osx_dataload

nanogpt: fix multiprocessing in load_dataset on os x
This commit is contained in:
Andrej
2023-06-17 20:26:35 -07:00
committed by GitHub

View File

@@ -16,6 +16,7 @@ num_proc = 8
# it is better than 1 usually though
num_proc_load_dataset = num_proc
if __name__ == '__main__':
# takes 54GB in huggingface .cache dir, about 8M documents (8,013,769)
dataset = load_dataset("openwebtext", num_proc=num_proc_load_dataset)