Add WordPiece decoder for Bert

This commit is contained in:
Anthony MOI
2019-12-06 19:30:42 -05:00
parent 3abdfaf852
commit ea9b75d6cd
2 changed files with 10 additions and 0 deletions

View File

@ -1,2 +1,3 @@
// Re-export this as a decoder
pub use super::pre_tokenizers::byte_level;
pub mod wordpiece;

View File

@ -0,0 +1,9 @@
use crate::tokenizer::Decoder;
pub struct WordPiece;
impl Decoder for WordPiece {
fn decode(&self, tokens: Vec<String>) -> String {
tokens.join(" ").replace(" ##", "")
}
}