mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2025-09-01 10:27:58 +00:00
candidate changes to apis, have to think through more
This commit is contained in:
@@ -35,6 +35,7 @@ enc = tiktoken.get_encoding("gpt2")
|
||||
def process(example):
|
||||
ids = enc.encode_ordinary(example['text']) # encode_ordinary ignores any special tokens
|
||||
ids.append(enc.eot_token) # add the end of text token, e.g. 50256 for gpt2 bpe
|
||||
# note: I think eot should be prepended not appended... hmm. it's called "eot" though...
|
||||
out = {'ids': ids, 'len': len(ids)}
|
||||
return out
|
||||
|
||||
|
Reference in New Issue
Block a user