Fixing doc. (#1499)

* Fixing doc.

* SentencePieceUnigram  and Convert.py still used sentencepiece

* stub

---------

Co-authored-by: Arthur Zucker <arthur.zucker@gmail.com>
This commit is contained in:
Nicolas Patry
2024-04-17 09:32:40 +02:00
committed by GitHub
parent 949d9e3e0e
commit 91393ef75e
7 changed files with 20 additions and 11 deletions

View File

@ -102,9 +102,9 @@ class SpmConverter(Converter):
tokenizer.normalizer = self.normalizer(self.proto)
replacement = ""
add_prefix_space = True
tokenizer.pre_tokenizer = Metaspace(replacement=replacement, add_prefix_space=add_prefix_space)
tokenizer.decoder = decoders.Metaspace(replacement=replacement, add_prefix_space=add_prefix_space)
prepend_scheme = "always"
tokenizer.pre_tokenizer = Metaspace(replacement=replacement, prepend_scheme=prepend_scheme)
tokenizer.decoder = decoders.Metaspace(replacement=replacement, prepend_scheme=prepend_scheme)
post_processor = self.post_processor(tokenizer)
if post_processor:
tokenizer.post_processor = post_processor