mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-08 13:48:19 +00:00
Python - Update bindings
This commit is contained in:
@@ -60,10 +60,8 @@ if args.type == "gpt2":
|
||||
|
||||
# Create a Tokenizer using BPE
|
||||
tok_r = Tokenizer(BPE.from_files(args.vocab, args.merges))
|
||||
# Use ByteLevel Normalizer
|
||||
tok_r.normalizer = normalizers.ByteLevel(add_prefix_space=False)
|
||||
# Use ByteLevel PreTokenizer
|
||||
tok_r.pre_tokenizer = pre_tokenizers.ByteLevel()
|
||||
tok_r.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False)
|
||||
# Use ByteLevel Decoder
|
||||
tok_r.decoder = decoders.ByteLevel()
|
||||
elif args.type == "bert":
|
||||
|
||||
Reference in New Issue
Block a user