[Fix #17] BPE & WordPiece models saving

This commit is contained in:
Anthony MOI
2019-12-31 13:56:28 -05:00
parent 2125e4d422
commit f28ca58fd9
5 changed files with 144 additions and 1 deletions

View File

@ -35,6 +35,14 @@ name = "bitflags"
version = "1.2.1" version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "c2-chacha"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "0.1.10" version = "0.1.10"
@ -109,6 +117,16 @@ name = "either"
version = "1.5.3" version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "getrandom"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)",
"wasi 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "ghost" name = "ghost"
version = "0.1.1" version = "0.1.1"
@ -233,6 +251,11 @@ dependencies = [
"syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", "syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "ppv-lite86"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "proc-macro-hack" name = "proc-macro-hack"
version = "0.5.11" version = "0.5.11"
@ -300,6 +323,43 @@ dependencies = [
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "rand"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_chacha"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_hc"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.3.0" version = "1.3.0"
@ -452,6 +512,7 @@ version = "0.0.11"
dependencies = [ dependencies = [
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
@ -498,6 +559,11 @@ name = "version_check"
version = "0.9.1" version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "wasi"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.8" version = "0.3.8"
@ -523,6 +589,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" "checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" "checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
"checksum crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3aa945d63861bfe624b55d153a39684da1e8c0bc8fba932f7ee3a3c16cea3ca" "checksum crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3aa945d63861bfe624b55d153a39684da1e8c0bc8fba932f7ee3a3c16cea3ca"
@ -531,6 +598,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4" "checksum crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4"
"checksum ctor 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd8ce37ad4184ab2ce004c33bf6379185d3b1c95801cab51026bd271bf68eedc" "checksum ctor 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd8ce37ad4184ab2ce004c33bf6379185d3b1c95801cab51026bd271bf68eedc"
"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
"checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407"
"checksum ghost 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2a36606a68532b5640dc86bb1f33c64b45c4682aad4c50f3937b317ea387f3d6" "checksum ghost 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2a36606a68532b5640dc86bb1f33c64b45c4682aad4c50f3937b317ea387f3d6"
"checksum hermit-abi 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f629dc602392d3ec14bfc8a09b5e644d7ffd725102b48b81e59f90f2633621d7" "checksum hermit-abi 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f629dc602392d3ec14bfc8a09b5e644d7ffd725102b48b81e59f90f2633621d7"
"checksum indoc 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3f9553c1e16c114b8b77ebeb329e5f2876eed62a8d51178c8bc6bff0d65f98f8" "checksum indoc 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3f9553c1e16c114b8b77ebeb329e5f2876eed62a8d51178c8bc6bff0d65f98f8"
@ -546,12 +614,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72" "checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72"
"checksum paste 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "423a519e1c6e828f1e73b720f9d9ed2fa643dce8a7737fb43235ce0b41eeaa49" "checksum paste 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "423a519e1c6e828f1e73b720f9d9ed2fa643dce8a7737fb43235ce0b41eeaa49"
"checksum paste-impl 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "4214c9e912ef61bf42b81ba9a47e8aad1b2ffaf739ab162bf96d1e011f54e6c5" "checksum paste-impl 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "4214c9e912ef61bf42b81ba9a47e8aad1b2ffaf739ab162bf96d1e011f54e6c5"
"checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
"checksum proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)" = "ecd45702f76d6d3c75a80564378ae228a85f0b59d2f3ed43c91b4a69eb2ebfc5" "checksum proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)" = "ecd45702f76d6d3c75a80564378ae228a85f0b59d2f3ed43c91b4a69eb2ebfc5"
"checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27" "checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27"
"checksum pyo3 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f9df1468dddf8a59ec799cf3b930bb75ec09deabe875ba953e06c51d1077136" "checksum pyo3 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f9df1468dddf8a59ec799cf3b930bb75ec09deabe875ba953e06c51d1077136"
"checksum pyo3-derive-backend 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f6e56fb3e97b344a8f87d036f94578399402c6b75949de6270cd07928f790b1" "checksum pyo3-derive-backend 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f6e56fb3e97b344a8f87d036f94578399402c6b75949de6270cd07928f790b1"
"checksum pyo3cls 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "97452dcdf5941627ebc5c06664a07821fc7fc88d7515f02178193a8ebe316468" "checksum pyo3cls 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "97452dcdf5941627ebc5c06664a07821fc7fc88d7515f02178193a8ebe316468"
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
"checksum rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412"
"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853"
"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
"checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" "checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
"checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" "checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
"checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd" "checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd"
@ -577,6 +650,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum unindent 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "63f18aa3b0e35fed5a0048f029558b1518095ffe2a0a31fb87c93dece93a4993" "checksum unindent 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "63f18aa3b0e35fed5a0048f029558b1518095ffe2a0a31fb87c93dece93a4993"
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
"checksum version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" "checksum version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce"
"checksum wasi 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d"
"checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@ -1,10 +1,11 @@
extern crate tokenizers as tk; extern crate tokenizers as tk;
use super::error::ToPyResult;
use super::utils::Container; use super::utils::Container;
use pyo3::exceptions; use pyo3::exceptions;
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::*; use pyo3::types::*;
use std::path::{Path, PathBuf};
/// A Model represents some tokenization algorithm like BPE or Word /// A Model represents some tokenization algorithm like BPE or Word
/// This class cannot be constructed directly. Please use one of the concrete models. /// This class cannot be constructed directly. Please use one of the concrete models.
@ -21,6 +22,19 @@ impl Model {
"Cannot create a Model directly. Use a concrete subclass", "Cannot create a Model directly. Use a concrete subclass",
)) ))
} }
fn save(&self, folder: &str, name: &str) -> PyResult<Vec<String>> {
let saved: PyResult<Vec<_>> = ToPyResult(
self.model
.execute(|model| model.save(Path::new(folder), name)),
)
.into();
Ok(saved?
.into_iter()
.map(|path| path.to_string_lossy().into_owned())
.collect())
}
} }
/// BPE Model /// BPE Model

View File

@ -7,6 +7,7 @@ use std::{
fs::File, fs::File,
io::prelude::*, io::prelude::*,
io::{BufRead, BufReader}, io::{BufRead, BufReader},
path::{Path, PathBuf},
}; };
pub struct BPE { pub struct BPE {
@ -260,6 +261,37 @@ impl Model for BPE {
fn id_to_token(&self, id: u32) -> Option<String> { fn id_to_token(&self, id: u32) -> Option<String> {
self.vocab_r.get(&id).cloned() self.vocab_r.get(&id).cloned()
} }
fn save(&self, folder: &Path, name: &str) -> Result<Vec<PathBuf>> {
// Write vocab.json
let vocab_path: PathBuf = [folder, Path::new(&format!("{}-vocab.json", name))]
.iter()
.collect();
let mut vocab_file = File::create(&vocab_path)?;
let serialized = serde_json::to_string(&self.vocab)?;
vocab_file.write_all(&serialized.as_bytes())?;
// Write merges.txt
let merges_path: PathBuf = [folder, Path::new(&format!("{}-merges.txt", name))]
.iter()
.collect();
let mut merges_file = File::create(&merges_path)?;
let mut merges: Vec<(Pair, u32)> = self
.merges
.iter()
.map(|(pair, (rank, _))| (*pair, *rank))
.collect();
merges.sort_unstable_by_key(|k| k.1);
merges_file.write_all(
&merges
.into_iter()
.map(|(pair, _)| format!("{} {}\n", pair.0, pair.1).into_bytes())
.flatten()
.collect::<Vec<_>>()[..],
)?;
Ok(vec![vocab_path, merges_path])
}
} }
#[cfg(test)] #[cfg(test)]

View File

@ -3,7 +3,9 @@ use std::{
collections::HashMap, collections::HashMap,
fmt, fmt,
fs::File, fs::File,
io::prelude::*,
io::{BufRead, BufReader}, io::{BufRead, BufReader},
path::{Path, PathBuf},
}; };
#[derive(Debug)] #[derive(Debug)]
@ -145,6 +147,25 @@ impl Model for WordPiece {
fn id_to_token(&self, id: u32) -> Option<String> { fn id_to_token(&self, id: u32) -> Option<String> {
self.vocab_r.get(&id).cloned() self.vocab_r.get(&id).cloned()
} }
fn save(&self, folder: &Path, name: &str) -> Result<Vec<PathBuf>> {
// Write vocab.txt
let vocab_path: PathBuf = [folder, Path::new(&format!("{}-vocab.txt", name))]
.iter()
.collect();
let mut vocab_file = File::create(&vocab_path)?;
let mut vocab: Vec<(&String, &u32)> = self.vocab.iter().collect();
vocab.sort_unstable_by_key(|k| *k.1);
vocab_file.write_all(
&vocab
.into_iter()
.map(|(token, _)| token.as_bytes().to_owned())
.flatten()
.collect::<Vec<_>>()[..],
)?;
Ok(vec![vocab_path])
}
} }
#[cfg(test)] #[cfg(test)]

View File

@ -18,6 +18,7 @@ use std::{
collections::HashMap, collections::HashMap,
fs::File, fs::File,
io::{BufRead, BufReader}, io::{BufRead, BufReader},
path::{Path, PathBuf},
}; };
mod encoding; mod encoding;
@ -40,6 +41,7 @@ pub trait Model {
fn token_to_id(&self, token: &str) -> Option<u32>; fn token_to_id(&self, token: &str) -> Option<u32>;
fn id_to_token(&self, id: u32) -> Option<String>; fn id_to_token(&self, id: u32) -> Option<String>;
fn get_vocab_size(&self) -> usize; fn get_vocab_size(&self) -> usize;
fn save(&self, folder: &Path, name: &str) -> Result<Vec<PathBuf>>;
} }
/// A `PostProcessor` has the responsibility to post process an encoded output of the `Tokenizer`. /// A `PostProcessor` has the responsibility to post process an encoded output of the `Tokenizer`.