Merge pull request #93 from Tomarchelone/patch-1

Fix indexing bug in add_tokens()
This commit is contained in:
MOI Anthony
2020-01-22 15:47:33 -05:00
committed by GitHub

View File

@ -587,7 +587,7 @@ impl Tokenizer {
continue;
}
let new_id = (self.model.get_vocab_size() - 1 + self.added_tokens.len()) as u32;
let new_id = (self.model.get_vocab_size() + self.added_tokens.len()) as u32;
let id = self
.added_tokens
.entry(token.clone())