mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2025-09-03 11:27:57 +00:00
use relative paths so that running the data prep scripts always create files in local folder, no matter where run from
This commit is contained in:
@@ -25,8 +25,8 @@ print(f"val has {len(val_ids):,} tokens")
|
||||
# export to bin files
|
||||
train_ids = np.array(train_ids, dtype=np.uint16)
|
||||
val_ids = np.array(val_ids, dtype=np.uint16)
|
||||
train_ids.tofile('train.bin')
|
||||
val_ids.tofile('val.bin')
|
||||
train_ids.tofile(os.path.join(os.path.dirname(__file__), 'train.bin'))
|
||||
val_ids.tofile(os.path.join(os.path.dirname(__file__), 'val.bin'))
|
||||
|
||||
# train.bin has 301,966 tokens
|
||||
# val.bin has 36,059 tokens
|
||||
|
Reference in New Issue
Block a user