Fix clippy warnings

This commit is contained in:
Anthony MOI
2021-03-10 20:26:39 -05:00
committed by Anthony MOI
parent ee95e7f0cd
commit 56a9196030
9 changed files with 106 additions and 117 deletions

View File

@ -57,11 +57,9 @@ impl Decoder for PyDecoder {
#[pymethods] #[pymethods]
impl PyDecoder { impl PyDecoder {
#[staticmethod] #[staticmethod]
fn custom(decoder: PyObject) -> PyResult<Self> { fn custom(decoder: PyObject) -> Self {
let decoder = PyDecoderWrapper::Custom( let decoder = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(decoder))));
CustomDecoder::new(decoder).map(|d| Arc::new(RwLock::new(d)))?, PyDecoder::new(decoder)
);
Ok(PyDecoder::new(decoder))
} }
fn __getstate__(&self, py: Python) -> PyResult<PyObject> { fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -147,8 +145,8 @@ pub struct PyByteLevelDec {}
#[pymethods] #[pymethods]
impl PyByteLevelDec { impl PyByteLevelDec {
#[new] #[new]
fn new() -> PyResult<(Self, PyDecoder)> { fn new() -> (Self, PyDecoder) {
Ok((PyByteLevelDec {}, ByteLevel::default().into())) (PyByteLevelDec {}, ByteLevel::default().into())
} }
} }
@ -188,8 +186,8 @@ impl PyWordPieceDec {
#[new] #[new]
#[args(prefix = "String::from(\"##\")", cleanup = "true")] #[args(prefix = "String::from(\"##\")", cleanup = "true")]
fn new(prefix: String, cleanup: bool) -> PyResult<(Self, PyDecoder)> { fn new(prefix: String, cleanup: bool) -> (Self, PyDecoder) {
Ok((PyWordPieceDec {}, WordPiece::new(prefix, cleanup).into())) (PyWordPieceDec {}, WordPiece::new(prefix, cleanup).into())
} }
} }
@ -230,11 +228,11 @@ impl PyMetaspaceDec {
#[new] #[new]
#[args(replacement = "PyChar('▁')", add_prefix_space = "true")] #[args(replacement = "PyChar('▁')", add_prefix_space = "true")]
fn new(replacement: PyChar, add_prefix_space: bool) -> PyResult<(Self, PyDecoder)> { fn new(replacement: PyChar, add_prefix_space: bool) -> (Self, PyDecoder) {
Ok(( (
PyMetaspaceDec {}, PyMetaspaceDec {},
Metaspace::new(replacement.0, add_prefix_space).into(), Metaspace::new(replacement.0, add_prefix_space).into(),
)) )
} }
} }
@ -261,8 +259,8 @@ impl PyBPEDecoder {
#[new] #[new]
#[args(suffix = "String::from(\"</w>\")")] #[args(suffix = "String::from(\"</w>\")")]
fn new(suffix: String) -> PyResult<(Self, PyDecoder)> { fn new(suffix: String) -> (Self, PyDecoder) {
Ok((PyBPEDecoder {}, BPEDecoder::new(suffix).into())) (PyBPEDecoder {}, BPEDecoder::new(suffix).into())
} }
} }
@ -272,8 +270,8 @@ pub(crate) struct CustomDecoder {
} }
impl CustomDecoder { impl CustomDecoder {
pub(crate) fn new(inner: PyObject) -> PyResult<Self> { pub(crate) fn new(inner: PyObject) -> Self {
Ok(CustomDecoder { inner }) CustomDecoder { inner }
} }
} }
@ -387,8 +385,7 @@ mod test {
let obj: PyObject = Py::new(py, py_msp).unwrap().into_py(py); let obj: PyObject = Py::new(py, py_msp).unwrap().into_py(py);
obj obj
}); });
let py_seq = let py_seq = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj))));
PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj).unwrap())));
assert!(serde_json::to_string(&py_seq).is_err()); assert!(serde_json::to_string(&py_seq).is_err());
} }
} }

View File

@ -41,10 +41,10 @@ impl PySequenceProtocol for PyEncoding {
#[pymethods] #[pymethods]
impl PyEncoding { impl PyEncoding {
#[new] #[new]
fn new() -> PyResult<Self> { fn new() -> Self {
Ok(Self { Self {
encoding: tk::tokenizer::Encoding::default(), encoding: tk::tokenizer::Encoding::default(),
}) }
} }
fn __getstate__(&self, py: Python) -> PyResult<PyObject> { fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -441,8 +441,7 @@ impl PyEncoding {
/// The length of previous content to be included in each overflowing piece /// The length of previous content to be included in each overflowing piece
#[args(stride = "0")] #[args(stride = "0")]
#[text_signature = "(self, max_length, stride=0)"] #[text_signature = "(self, max_length, stride=0)"]
fn truncate(&mut self, max_length: usize, stride: usize) -> PyResult<()> { fn truncate(&mut self, max_length: usize, stride: usize) {
self.encoding.truncate(max_length, stride); self.encoding.truncate(max_length, stride);
Ok(())
} }
} }

View File

@ -91,12 +91,12 @@ where
#[pymethods] #[pymethods]
impl PyModel { impl PyModel {
#[new] #[new]
fn __new__() -> PyResult<Self> { fn __new__() -> Self {
// Instantiate a default empty model. This doesn't really make sense, but we need // Instantiate a default empty model. This doesn't really make sense, but we need
// to be able to instantiate an empty model for pickle capabilities. // to be able to instantiate an empty model for pickle capabilities.
Ok(PyModel { PyModel {
model: Arc::new(RwLock::new(BPE::default().into())), model: Arc::new(RwLock::new(BPE::default().into())),
}) }
} }
fn __getstate__(&self, py: Python) -> PyResult<PyObject> { fn __getstate__(&self, py: Python) -> PyResult<PyObject> {

View File

@ -102,10 +102,10 @@ impl Normalizer for PyNormalizer {
#[pymethods] #[pymethods]
impl PyNormalizer { impl PyNormalizer {
#[staticmethod] #[staticmethod]
fn custom(obj: PyObject) -> PyResult<Self> { fn custom(obj: PyObject) -> Self {
Ok(Self { Self {
normalizer: PyNormalizerWrapper::Custom(CustomNormalizer::new(obj)).into(), normalizer: PyNormalizerWrapper::Custom(CustomNormalizer::new(obj)).into(),
}) }
} }
fn __getstate__(&self, py: Python) -> PyResult<PyObject> { fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -279,10 +279,10 @@ impl PyBertNormalizer {
handle_chinese_chars: bool, handle_chinese_chars: bool,
strip_accents: Option<bool>, strip_accents: Option<bool>,
lowercase: bool, lowercase: bool,
) -> PyResult<(Self, PyNormalizer)> { ) -> (Self, PyNormalizer) {
let normalizer = let normalizer =
BertNormalizer::new(clean_text, handle_chinese_chars, strip_accents, lowercase); BertNormalizer::new(clean_text, handle_chinese_chars, strip_accents, lowercase);
Ok((PyBertNormalizer {}, normalizer.into())) (PyBertNormalizer {}, normalizer.into())
} }
} }
@ -293,8 +293,8 @@ pub struct PyNFD {}
#[pymethods] #[pymethods]
impl PyNFD { impl PyNFD {
#[new] #[new]
fn new() -> PyResult<(Self, PyNormalizer)> { fn new() -> (Self, PyNormalizer) {
Ok((PyNFD {}, PyNormalizer::new(NFD.into()))) (PyNFD {}, PyNormalizer::new(NFD.into()))
} }
} }
@ -305,8 +305,8 @@ pub struct PyNFKD {}
#[pymethods] #[pymethods]
impl PyNFKD { impl PyNFKD {
#[new] #[new]
fn new() -> PyResult<(Self, PyNormalizer)> { fn new() -> (Self, PyNormalizer) {
Ok((PyNFKD {}, NFKD.into())) (PyNFKD {}, NFKD.into())
} }
} }
@ -317,8 +317,8 @@ pub struct PyNFC {}
#[pymethods] #[pymethods]
impl PyNFC { impl PyNFC {
#[new] #[new]
fn new() -> PyResult<(Self, PyNormalizer)> { fn new() -> (Self, PyNormalizer) {
Ok((PyNFC {}, NFC.into())) (PyNFC {}, NFC.into())
} }
} }
@ -329,8 +329,8 @@ pub struct PyNFKC {}
#[pymethods] #[pymethods]
impl PyNFKC { impl PyNFKC {
#[new] #[new]
fn new() -> PyResult<(Self, PyNormalizer)> { fn new() -> (Self, PyNormalizer) {
Ok((PyNFKC {}, NFKC.into())) (PyNFKC {}, NFKC.into())
} }
} }
@ -360,8 +360,8 @@ impl PySequence {
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> { fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
Ok(PyTuple::new(py, &[PyList::empty(py)])) PyTuple::new(py, &[PyList::empty(py)])
} }
} }
@ -379,8 +379,8 @@ pub struct PyLowercase {}
#[pymethods] #[pymethods]
impl PyLowercase { impl PyLowercase {
#[new] #[new]
fn new() -> PyResult<(Self, PyNormalizer)> { fn new() -> (Self, PyNormalizer) {
Ok((PyLowercase {}, Lowercase.into())) (PyLowercase {}, Lowercase.into())
} }
} }
@ -412,8 +412,8 @@ impl PyStrip {
#[new] #[new]
#[args(left = "true", right = "true")] #[args(left = "true", right = "true")]
fn new(left: bool, right: bool) -> PyResult<(Self, PyNormalizer)> { fn new(left: bool, right: bool) -> (Self, PyNormalizer) {
Ok((PyStrip {}, Strip::new(left, right).into())) (PyStrip {}, Strip::new(left, right).into())
} }
} }
@ -424,8 +424,8 @@ pub struct PyStripAccents {}
#[pymethods] #[pymethods]
impl PyStripAccents { impl PyStripAccents {
#[new] #[new]
fn new() -> PyResult<(Self, PyNormalizer)> { fn new() -> (Self, PyNormalizer) {
Ok((PyStripAccents {}, StripAccents.into())) (PyStripAccents {}, StripAccents.into())
} }
} }
@ -436,8 +436,8 @@ pub struct PyNmt {}
#[pymethods] #[pymethods]
impl PyNmt { impl PyNmt {
#[new] #[new]
fn new() -> PyResult<(Self, PyNormalizer)> { fn new() -> (Self, PyNormalizer) {
Ok((PyNmt {}, Nmt.into())) (PyNmt {}, Nmt.into())
} }
} }

View File

@ -101,10 +101,10 @@ impl PreTokenizer for PyPreTokenizer {
#[pymethods] #[pymethods]
impl PyPreTokenizer { impl PyPreTokenizer {
#[staticmethod] #[staticmethod]
fn custom(pretok: PyObject) -> PyResult<Self> { fn custom(pretok: PyObject) -> Self {
Ok(PyPreTokenizer { PyPreTokenizer {
pretok: PyPreTokenizerWrapper::Custom(CustomPreTokenizer::new(pretok)).into(), pretok: PyPreTokenizerWrapper::Custom(CustomPreTokenizer::new(pretok)).into(),
}) }
} }
fn __getstate__(&self, py: Python) -> PyResult<PyObject> { fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -244,13 +244,13 @@ impl PyByteLevel {
#[new] #[new]
#[args(add_prefix_space = "true", _kwargs = "**")] #[args(add_prefix_space = "true", _kwargs = "**")]
fn new(add_prefix_space: bool, _kwargs: Option<&PyDict>) -> PyResult<(Self, PyPreTokenizer)> { fn new(add_prefix_space: bool, _kwargs: Option<&PyDict>) -> (Self, PyPreTokenizer) {
Ok(( (
PyByteLevel {}, PyByteLevel {},
ByteLevel::default() ByteLevel::default()
.add_prefix_space(add_prefix_space) .add_prefix_space(add_prefix_space)
.into(), .into(),
)) )
} }
/// Returns the alphabet used by this PreTokenizer. /// Returns the alphabet used by this PreTokenizer.
@ -278,8 +278,8 @@ pub struct PyWhitespace {}
#[pymethods] #[pymethods]
impl PyWhitespace { impl PyWhitespace {
#[new] #[new]
fn new() -> PyResult<(Self, PyPreTokenizer)> { fn new() -> (Self, PyPreTokenizer) {
Ok((PyWhitespace {}, Whitespace::default().into())) (PyWhitespace {}, Whitespace::default().into())
} }
} }
@ -290,8 +290,8 @@ pub struct PyWhitespaceSplit {}
#[pymethods] #[pymethods]
impl PyWhitespaceSplit { impl PyWhitespaceSplit {
#[new] #[new]
fn new() -> PyResult<(Self, PyPreTokenizer)> { fn new() -> (Self, PyPreTokenizer) {
Ok((PyWhitespaceSplit {}, WhitespaceSplit.into())) (PyWhitespaceSplit {}, WhitespaceSplit.into())
} }
} }
@ -332,8 +332,8 @@ impl PySplit {
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> { fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
Ok(PyTuple::new(py, &[" ", "removed"])) PyTuple::new(py, &[" ", "removed"])
} }
} }
@ -364,8 +364,8 @@ impl PyCharDelimiterSplit {
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> { fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
Ok(PyTuple::new(py, &[" "])) PyTuple::new(py, &[" "])
} }
} }
@ -379,8 +379,8 @@ pub struct PyBertPreTokenizer {}
#[pymethods] #[pymethods]
impl PyBertPreTokenizer { impl PyBertPreTokenizer {
#[new] #[new]
fn new() -> PyResult<(Self, PyPreTokenizer)> { fn new() -> (Self, PyPreTokenizer) {
Ok((PyBertPreTokenizer {}, BertPreTokenizer.into())) (PyBertPreTokenizer {}, BertPreTokenizer.into())
} }
} }
@ -391,8 +391,8 @@ pub struct PyPunctuation {}
#[pymethods] #[pymethods]
impl PyPunctuation { impl PyPunctuation {
#[new] #[new]
fn new() -> PyResult<(Self, PyPreTokenizer)> { fn new() -> (Self, PyPreTokenizer) {
Ok((PyPunctuation {}, Punctuation.into())) (PyPunctuation {}, Punctuation.into())
} }
} }
@ -420,8 +420,8 @@ impl PySequence {
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> { fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
Ok(PyTuple::new(py, &[PyList::empty(py)])) PyTuple::new(py, &[PyList::empty(py)])
} }
} }
@ -465,11 +465,11 @@ impl PyMetaspace {
#[new] #[new]
#[args(replacement = "PyChar('▁')", add_prefix_space = "true")] #[args(replacement = "PyChar('▁')", add_prefix_space = "true")]
fn new(replacement: PyChar, add_prefix_space: bool) -> PyResult<(Self, PyPreTokenizer)> { fn new(replacement: PyChar, add_prefix_space: bool) -> (Self, PyPreTokenizer) {
Ok(( (
PyMetaspace {}, PyMetaspace {},
Metaspace::new(replacement.0, add_prefix_space).into(), Metaspace::new(replacement.0, add_prefix_space).into(),
)) )
} }
} }
@ -501,8 +501,8 @@ impl PyDigits {
#[new] #[new]
#[args(individual_digits = false)] #[args(individual_digits = false)]
fn new(individual_digits: bool) -> PyResult<(Self, PyPreTokenizer)> { fn new(individual_digits: bool) -> (Self, PyPreTokenizer) {
Ok((PyDigits {}, Digits::new(individual_digits).into())) (PyDigits {}, Digits::new(individual_digits).into())
} }
} }
@ -516,8 +516,8 @@ pub struct PyUnicodeScripts {}
#[pymethods] #[pymethods]
impl PyUnicodeScripts { impl PyUnicodeScripts {
#[new] #[new]
fn new() -> PyResult<(Self, PyPreTokenizer)> { fn new() -> (Self, PyPreTokenizer) {
Ok((PyUnicodeScripts {}, UnicodeScripts::new().into())) (PyUnicodeScripts {}, UnicodeScripts::new().into())
} }
} }

View File

@ -155,15 +155,15 @@ pub struct PyBertProcessing {}
#[pymethods] #[pymethods]
impl PyBertProcessing { impl PyBertProcessing {
#[new] #[new]
fn new(sep: (String, u32), cls: (String, u32)) -> PyResult<(Self, PyPostProcessor)> { fn new(sep: (String, u32), cls: (String, u32)) -> (Self, PyPostProcessor) {
Ok(( (
PyBertProcessing {}, PyBertProcessing {},
PyPostProcessor::new(Arc::new(BertProcessing::new(sep, cls).into())), PyPostProcessor::new(Arc::new(BertProcessing::new(sep, cls).into())),
)) )
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> { fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
Ok(PyTuple::new(py, &[("", 0), ("", 0)])) PyTuple::new(py, &[("", 0), ("", 0)])
} }
} }
@ -203,18 +203,18 @@ impl PyRobertaProcessing {
cls: (String, u32), cls: (String, u32),
trim_offsets: bool, trim_offsets: bool,
add_prefix_space: bool, add_prefix_space: bool,
) -> PyResult<(Self, PyPostProcessor)> { ) -> (Self, PyPostProcessor) {
let proc = RobertaProcessing::new(sep, cls) let proc = RobertaProcessing::new(sep, cls)
.trim_offsets(trim_offsets) .trim_offsets(trim_offsets)
.add_prefix_space(add_prefix_space); .add_prefix_space(add_prefix_space);
Ok(( (
PyRobertaProcessing {}, PyRobertaProcessing {},
PyPostProcessor::new(Arc::new(proc.into())), PyPostProcessor::new(Arc::new(proc.into())),
)) )
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> { fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
Ok(PyTuple::new(py, &[("", 0), ("", 0)])) PyTuple::new(py, &[("", 0), ("", 0)])
} }
} }
@ -233,20 +233,17 @@ pub struct PyByteLevel {}
impl PyByteLevel { impl PyByteLevel {
#[new] #[new]
#[args(trim_offsets = "None", _kwargs = "**")] #[args(trim_offsets = "None", _kwargs = "**")]
fn new( fn new(trim_offsets: Option<bool>, _kwargs: Option<&PyDict>) -> (Self, PyPostProcessor) {
trim_offsets: Option<bool>,
_kwargs: Option<&PyDict>,
) -> PyResult<(Self, PyPostProcessor)> {
let mut byte_level = ByteLevel::default(); let mut byte_level = ByteLevel::default();
if let Some(to) = trim_offsets { if let Some(to) = trim_offsets {
byte_level = byte_level.trim_offsets(to); byte_level = byte_level.trim_offsets(to);
} }
Ok(( (
PyByteLevel {}, PyByteLevel {},
PyPostProcessor::new(Arc::new(byte_level.into())), PyPostProcessor::new(Arc::new(byte_level.into())),
)) )
} }
} }

View File

@ -25,21 +25,21 @@ impl PyToken {
} }
#[getter] #[getter]
fn get_id(&self) -> PyResult<u32> { fn get_id(&self) -> u32 {
Ok(self.token.id) self.token.id
} }
#[getter] #[getter]
fn get_value(&self) -> PyResult<&str> { fn get_value(&self) -> &str {
Ok(&self.token.value) &self.token.value
} }
#[getter] #[getter]
fn get_offsets(&self) -> PyResult<(usize, usize)> { fn get_offsets(&self) -> (usize, usize) {
Ok(self.token.offsets) self.token.offsets
} }
fn as_tuple(&self) -> PyResult<(u32, &str, (usize, usize))> { fn as_tuple(&self) -> (u32, &str, (usize, usize)) {
Ok((self.token.id, &self.token.value, self.token.offsets)) (self.token.id, &self.token.value, self.token.offsets)
} }
} }

View File

@ -487,10 +487,9 @@ impl PyTokenizer {
} }
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> { fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
let model = PyModel::from(BPE::default()).into_py(py); let model = PyModel::from(BPE::default()).into_py(py);
let args = PyTuple::new(py, vec![model]); PyTuple::new(py, vec![model])
Ok(args)
} }
/// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string. /// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string.
@ -577,11 +576,10 @@ impl PyTokenizer {
/// :param is_pair: Boolean indicating if the input would be a single sentence or a pair /// :param is_pair: Boolean indicating if the input would be a single sentence or a pair
/// :return: /// :return:
#[text_signature = "(self, is_pair)"] #[text_signature = "(self, is_pair)"]
fn num_special_tokens_to_add(&self, is_pair: bool) -> PyResult<usize> { fn num_special_tokens_to_add(&self, is_pair: bool) -> usize {
Ok(self self.tokenizer
.tokenizer
.get_post_processor() .get_post_processor()
.map_or(0, |p| p.added_tokens(is_pair))) .map_or(0, |p| p.added_tokens(is_pair))
} }
/// Get the underlying vocabulary /// Get the underlying vocabulary
@ -594,8 +592,8 @@ impl PyTokenizer {
/// :obj:`Dict[str, int]`: The vocabulary /// :obj:`Dict[str, int]`: The vocabulary
#[args(with_added_tokens = true)] #[args(with_added_tokens = true)]
#[text_signature = "(self, with_added_tokens=True)"] #[text_signature = "(self, with_added_tokens=True)"]
fn get_vocab(&self, with_added_tokens: bool) -> PyResult<HashMap<String, u32>> { fn get_vocab(&self, with_added_tokens: bool) -> HashMap<String, u32> {
Ok(self.tokenizer.get_vocab(with_added_tokens)) self.tokenizer.get_vocab(with_added_tokens)
} }
/// Get the size of the underlying vocabulary /// Get the size of the underlying vocabulary
@ -608,8 +606,8 @@ impl PyTokenizer {
/// :obj:`int`: The size of the vocabulary /// :obj:`int`: The size of the vocabulary
#[args(with_added_tokens = true)] #[args(with_added_tokens = true)]
#[text_signature = "(self, with_added_tokens=True)"] #[text_signature = "(self, with_added_tokens=True)"]
fn get_vocab_size(&self, with_added_tokens: bool) -> PyResult<usize> { fn get_vocab_size(&self, with_added_tokens: bool) -> usize {
Ok(self.tokenizer.get_vocab_size(with_added_tokens)) self.tokenizer.get_vocab_size(with_added_tokens)
} }
/// Enable truncation /// Enable truncation

View File

@ -177,7 +177,7 @@ impl UnigramTrainer {
&self, &self,
sentences: &[Sentence], sentences: &[Sentence],
_progress: &Option<ProgressBar>, _progress: &Option<ProgressBar>,
) -> Result<Vec<SentencePiece>> { ) -> Vec<SentencePiece> {
// Put all sentences in a string, separated by \0 // Put all sentences in a string, separated by \0
let total: usize = sentences let total: usize = sentences
.iter() .iter()
@ -245,7 +245,7 @@ impl UnigramTrainer {
} }
} }
to_log_prob(&mut seed_sentencepieces); to_log_prob(&mut seed_sentencepieces);
Ok(seed_sentencepieces) seed_sentencepieces
} }
fn prune_sentence_pieces( fn prune_sentence_pieces(
&self, &self,
@ -469,7 +469,7 @@ impl UnigramTrainer {
// We use a UNK token when training, whatever the `self.unk_token` // We use a UNK token when training, whatever the `self.unk_token`
pieces.push(("<UNK>".into(), f64::NAN)); pieces.push(("<UNK>".into(), f64::NAN));
pieces.extend(self.make_seed_sentence_pieces(&sentences, &progress)?); pieces.extend(self.make_seed_sentence_pieces(&sentences, &progress));
self.finalize_progress(&progress, sentences.len()); self.finalize_progress(&progress, sentences.len());
// Useful to check compatibility with spm. // Useful to check compatibility with spm.
@ -604,9 +604,7 @@ mod tests {
assert_eq!(required_chars.len(), 13); assert_eq!(required_chars.len(), 13);
let progress = None; let progress = None;
let table = trainer let table = trainer.make_seed_sentence_pieces(&sentences, &progress);
.make_seed_sentence_pieces(&sentences, &progress)
.unwrap();
let target_strings = vec![ let target_strings = vec![
"s", "i", " ", "", "", "", "", "", "", "", "h", "a", "T", "is ", "s ", "s", "i", " ", "", "", "", "", "", "", "", "h", "a", "T", "is ", "s ",