1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2025-09-06 21:07:57 +00:00

use relative paths so that running the data prep scripts always create files in local folder, no matter where run from

This commit is contained in:
DG
2023-01-20 10:39:45 -08:00
parent 2c7806db6e
commit edb7a7eab0
3 changed files with 7 additions and 6 deletions

View File

@@ -47,8 +47,8 @@ print(f"val has {len(val_ids):,} tokens")
# export to bin files
train_ids = np.array(train_ids, dtype=np.uint16)
val_ids = np.array(val_ids, dtype=np.uint16)
train_ids.tofile('train.bin')
val_ids.tofile('val.bin')
train_ids.tofile(os.path.join(os.path.dirname(__file__), 'train.bin'))
val_ids.tofile(os.path.join(os.path.dirname(__file__), 'val.bin'))
# save the meta information as well, to help us encode/decode later
meta = {
@@ -56,7 +56,7 @@ meta = {
'itos': itos,
'stoi': stoi,
}
with open('meta.pkl', 'wb') as f:
with open(os.path.join(os.path.dirname(__file__), 'meta.pkl'), 'wb') as f:
pickle.dump(meta, f)
# length of dataset in characters: 1115394