mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Add WordPiece decoder for Bert
This commit is contained in:
@ -1,2 +1,3 @@
|
||||
// Re-export this as a decoder
|
||||
pub use super::pre_tokenizers::byte_level;
|
||||
pub mod wordpiece;
|
||||
|
9
tokenizers/src/decoders/wordpiece.rs
Normal file
9
tokenizers/src/decoders/wordpiece.rs
Normal file
@ -0,0 +1,9 @@
|
||||
use crate::tokenizer::Decoder;
|
||||
|
||||
pub struct WordPiece;
|
||||
|
||||
impl Decoder for WordPiece {
|
||||
fn decode(&self, tokens: Vec<String>) -> String {
|
||||
tokens.join(" ").replace(" ##", "")
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user