mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Fixing doc. (#1499)
* Fixing doc. * SentencePieceUnigram and Convert.py still used sentencepiece * stub --------- Co-authored-by: Arthur Zucker <arthur.zucker@gmail.com>
This commit is contained in:
@ -102,9 +102,9 @@ class SpmConverter(Converter):
|
||||
tokenizer.normalizer = self.normalizer(self.proto)
|
||||
|
||||
replacement = "▁"
|
||||
add_prefix_space = True
|
||||
tokenizer.pre_tokenizer = Metaspace(replacement=replacement, add_prefix_space=add_prefix_space)
|
||||
tokenizer.decoder = decoders.Metaspace(replacement=replacement, add_prefix_space=add_prefix_space)
|
||||
prepend_scheme = "always"
|
||||
tokenizer.pre_tokenizer = Metaspace(replacement=replacement, prepend_scheme=prepend_scheme)
|
||||
tokenizer.decoder = decoders.Metaspace(replacement=replacement, prepend_scheme=prepend_scheme)
|
||||
post_processor = self.post_processor(tokenizer)
|
||||
if post_processor:
|
||||
tokenizer.post_processor = post_processor
|
||||
|
Reference in New Issue
Block a user