mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-05 12:18:20 +00:00
Fix clippy warnings
This commit is contained in:
@@ -57,11 +57,9 @@ impl Decoder for PyDecoder {
|
||||
#[pymethods]
|
||||
impl PyDecoder {
|
||||
#[staticmethod]
|
||||
fn custom(decoder: PyObject) -> PyResult<Self> {
|
||||
let decoder = PyDecoderWrapper::Custom(
|
||||
CustomDecoder::new(decoder).map(|d| Arc::new(RwLock::new(d)))?,
|
||||
);
|
||||
Ok(PyDecoder::new(decoder))
|
||||
fn custom(decoder: PyObject) -> Self {
|
||||
let decoder = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(decoder))));
|
||||
PyDecoder::new(decoder)
|
||||
}
|
||||
|
||||
fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
|
||||
@@ -147,8 +145,8 @@ pub struct PyByteLevelDec {}
|
||||
#[pymethods]
|
||||
impl PyByteLevelDec {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyDecoder)> {
|
||||
Ok((PyByteLevelDec {}, ByteLevel::default().into()))
|
||||
fn new() -> (Self, PyDecoder) {
|
||||
(PyByteLevelDec {}, ByteLevel::default().into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,8 +186,8 @@ impl PyWordPieceDec {
|
||||
|
||||
#[new]
|
||||
#[args(prefix = "String::from(\"##\")", cleanup = "true")]
|
||||
fn new(prefix: String, cleanup: bool) -> PyResult<(Self, PyDecoder)> {
|
||||
Ok((PyWordPieceDec {}, WordPiece::new(prefix, cleanup).into()))
|
||||
fn new(prefix: String, cleanup: bool) -> (Self, PyDecoder) {
|
||||
(PyWordPieceDec {}, WordPiece::new(prefix, cleanup).into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,11 +228,11 @@ impl PyMetaspaceDec {
|
||||
|
||||
#[new]
|
||||
#[args(replacement = "PyChar('▁')", add_prefix_space = "true")]
|
||||
fn new(replacement: PyChar, add_prefix_space: bool) -> PyResult<(Self, PyDecoder)> {
|
||||
Ok((
|
||||
fn new(replacement: PyChar, add_prefix_space: bool) -> (Self, PyDecoder) {
|
||||
(
|
||||
PyMetaspaceDec {},
|
||||
Metaspace::new(replacement.0, add_prefix_space).into(),
|
||||
))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -261,8 +259,8 @@ impl PyBPEDecoder {
|
||||
|
||||
#[new]
|
||||
#[args(suffix = "String::from(\"</w>\")")]
|
||||
fn new(suffix: String) -> PyResult<(Self, PyDecoder)> {
|
||||
Ok((PyBPEDecoder {}, BPEDecoder::new(suffix).into()))
|
||||
fn new(suffix: String) -> (Self, PyDecoder) {
|
||||
(PyBPEDecoder {}, BPEDecoder::new(suffix).into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -272,8 +270,8 @@ pub(crate) struct CustomDecoder {
|
||||
}
|
||||
|
||||
impl CustomDecoder {
|
||||
pub(crate) fn new(inner: PyObject) -> PyResult<Self> {
|
||||
Ok(CustomDecoder { inner })
|
||||
pub(crate) fn new(inner: PyObject) -> Self {
|
||||
CustomDecoder { inner }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -387,8 +385,7 @@ mod test {
|
||||
let obj: PyObject = Py::new(py, py_msp).unwrap().into_py(py);
|
||||
obj
|
||||
});
|
||||
let py_seq =
|
||||
PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj).unwrap())));
|
||||
let py_seq = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj))));
|
||||
assert!(serde_json::to_string(&py_seq).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,10 +41,10 @@ impl PySequenceProtocol for PyEncoding {
|
||||
#[pymethods]
|
||||
impl PyEncoding {
|
||||
#[new]
|
||||
fn new() -> PyResult<Self> {
|
||||
Ok(Self {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
encoding: tk::tokenizer::Encoding::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
|
||||
@@ -441,8 +441,7 @@ impl PyEncoding {
|
||||
/// The length of previous content to be included in each overflowing piece
|
||||
#[args(stride = "0")]
|
||||
#[text_signature = "(self, max_length, stride=0)"]
|
||||
fn truncate(&mut self, max_length: usize, stride: usize) -> PyResult<()> {
|
||||
fn truncate(&mut self, max_length: usize, stride: usize) {
|
||||
self.encoding.truncate(max_length, stride);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,12 +91,12 @@ where
|
||||
#[pymethods]
|
||||
impl PyModel {
|
||||
#[new]
|
||||
fn __new__() -> PyResult<Self> {
|
||||
fn __new__() -> Self {
|
||||
// Instantiate a default empty model. This doesn't really make sense, but we need
|
||||
// to be able to instantiate an empty model for pickle capabilities.
|
||||
Ok(PyModel {
|
||||
PyModel {
|
||||
model: Arc::new(RwLock::new(BPE::default().into())),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
|
||||
|
||||
@@ -102,10 +102,10 @@ impl Normalizer for PyNormalizer {
|
||||
#[pymethods]
|
||||
impl PyNormalizer {
|
||||
#[staticmethod]
|
||||
fn custom(obj: PyObject) -> PyResult<Self> {
|
||||
Ok(Self {
|
||||
fn custom(obj: PyObject) -> Self {
|
||||
Self {
|
||||
normalizer: PyNormalizerWrapper::Custom(CustomNormalizer::new(obj)).into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
|
||||
@@ -279,10 +279,10 @@ impl PyBertNormalizer {
|
||||
handle_chinese_chars: bool,
|
||||
strip_accents: Option<bool>,
|
||||
lowercase: bool,
|
||||
) -> PyResult<(Self, PyNormalizer)> {
|
||||
) -> (Self, PyNormalizer) {
|
||||
let normalizer =
|
||||
BertNormalizer::new(clean_text, handle_chinese_chars, strip_accents, lowercase);
|
||||
Ok((PyBertNormalizer {}, normalizer.into()))
|
||||
(PyBertNormalizer {}, normalizer.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -293,8 +293,8 @@ pub struct PyNFD {}
|
||||
#[pymethods]
|
||||
impl PyNFD {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyNFD {}, PyNormalizer::new(NFD.into())))
|
||||
fn new() -> (Self, PyNormalizer) {
|
||||
(PyNFD {}, PyNormalizer::new(NFD.into()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -305,8 +305,8 @@ pub struct PyNFKD {}
|
||||
#[pymethods]
|
||||
impl PyNFKD {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyNFKD {}, NFKD.into()))
|
||||
fn new() -> (Self, PyNormalizer) {
|
||||
(PyNFKD {}, NFKD.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -317,8 +317,8 @@ pub struct PyNFC {}
|
||||
#[pymethods]
|
||||
impl PyNFC {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyNFC {}, NFC.into()))
|
||||
fn new() -> (Self, PyNormalizer) {
|
||||
(PyNFC {}, NFC.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -329,8 +329,8 @@ pub struct PyNFKC {}
|
||||
#[pymethods]
|
||||
impl PyNFKC {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyNFKC {}, NFKC.into()))
|
||||
fn new() -> (Self, PyNormalizer) {
|
||||
(PyNFKC {}, NFKC.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -360,8 +360,8 @@ impl PySequence {
|
||||
))
|
||||
}
|
||||
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
|
||||
Ok(PyTuple::new(py, &[PyList::empty(py)]))
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
|
||||
PyTuple::new(py, &[PyList::empty(py)])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -379,8 +379,8 @@ pub struct PyLowercase {}
|
||||
#[pymethods]
|
||||
impl PyLowercase {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyLowercase {}, Lowercase.into()))
|
||||
fn new() -> (Self, PyNormalizer) {
|
||||
(PyLowercase {}, Lowercase.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,8 +412,8 @@ impl PyStrip {
|
||||
|
||||
#[new]
|
||||
#[args(left = "true", right = "true")]
|
||||
fn new(left: bool, right: bool) -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyStrip {}, Strip::new(left, right).into()))
|
||||
fn new(left: bool, right: bool) -> (Self, PyNormalizer) {
|
||||
(PyStrip {}, Strip::new(left, right).into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -424,8 +424,8 @@ pub struct PyStripAccents {}
|
||||
#[pymethods]
|
||||
impl PyStripAccents {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyStripAccents {}, StripAccents.into()))
|
||||
fn new() -> (Self, PyNormalizer) {
|
||||
(PyStripAccents {}, StripAccents.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -436,8 +436,8 @@ pub struct PyNmt {}
|
||||
#[pymethods]
|
||||
impl PyNmt {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyNormalizer)> {
|
||||
Ok((PyNmt {}, Nmt.into()))
|
||||
fn new() -> (Self, PyNormalizer) {
|
||||
(PyNmt {}, Nmt.into())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -101,10 +101,10 @@ impl PreTokenizer for PyPreTokenizer {
|
||||
#[pymethods]
|
||||
impl PyPreTokenizer {
|
||||
#[staticmethod]
|
||||
fn custom(pretok: PyObject) -> PyResult<Self> {
|
||||
Ok(PyPreTokenizer {
|
||||
fn custom(pretok: PyObject) -> Self {
|
||||
PyPreTokenizer {
|
||||
pretok: PyPreTokenizerWrapper::Custom(CustomPreTokenizer::new(pretok)).into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
|
||||
@@ -244,13 +244,13 @@ impl PyByteLevel {
|
||||
|
||||
#[new]
|
||||
#[args(add_prefix_space = "true", _kwargs = "**")]
|
||||
fn new(add_prefix_space: bool, _kwargs: Option<&PyDict>) -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((
|
||||
fn new(add_prefix_space: bool, _kwargs: Option<&PyDict>) -> (Self, PyPreTokenizer) {
|
||||
(
|
||||
PyByteLevel {},
|
||||
ByteLevel::default()
|
||||
.add_prefix_space(add_prefix_space)
|
||||
.into(),
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the alphabet used by this PreTokenizer.
|
||||
@@ -278,8 +278,8 @@ pub struct PyWhitespace {}
|
||||
#[pymethods]
|
||||
impl PyWhitespace {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((PyWhitespace {}, Whitespace::default().into()))
|
||||
fn new() -> (Self, PyPreTokenizer) {
|
||||
(PyWhitespace {}, Whitespace::default().into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -290,8 +290,8 @@ pub struct PyWhitespaceSplit {}
|
||||
#[pymethods]
|
||||
impl PyWhitespaceSplit {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((PyWhitespaceSplit {}, WhitespaceSplit.into()))
|
||||
fn new() -> (Self, PyPreTokenizer) {
|
||||
(PyWhitespaceSplit {}, WhitespaceSplit.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -332,8 +332,8 @@ impl PySplit {
|
||||
))
|
||||
}
|
||||
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
|
||||
Ok(PyTuple::new(py, &[" ", "removed"]))
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
|
||||
PyTuple::new(py, &[" ", "removed"])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -364,8 +364,8 @@ impl PyCharDelimiterSplit {
|
||||
))
|
||||
}
|
||||
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
|
||||
Ok(PyTuple::new(py, &[" "]))
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
|
||||
PyTuple::new(py, &[" "])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -379,8 +379,8 @@ pub struct PyBertPreTokenizer {}
|
||||
#[pymethods]
|
||||
impl PyBertPreTokenizer {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((PyBertPreTokenizer {}, BertPreTokenizer.into()))
|
||||
fn new() -> (Self, PyPreTokenizer) {
|
||||
(PyBertPreTokenizer {}, BertPreTokenizer.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -391,8 +391,8 @@ pub struct PyPunctuation {}
|
||||
#[pymethods]
|
||||
impl PyPunctuation {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((PyPunctuation {}, Punctuation.into()))
|
||||
fn new() -> (Self, PyPreTokenizer) {
|
||||
(PyPunctuation {}, Punctuation.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -420,8 +420,8 @@ impl PySequence {
|
||||
))
|
||||
}
|
||||
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
|
||||
Ok(PyTuple::new(py, &[PyList::empty(py)]))
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
|
||||
PyTuple::new(py, &[PyList::empty(py)])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -465,11 +465,11 @@ impl PyMetaspace {
|
||||
|
||||
#[new]
|
||||
#[args(replacement = "PyChar('▁')", add_prefix_space = "true")]
|
||||
fn new(replacement: PyChar, add_prefix_space: bool) -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((
|
||||
fn new(replacement: PyChar, add_prefix_space: bool) -> (Self, PyPreTokenizer) {
|
||||
(
|
||||
PyMetaspace {},
|
||||
Metaspace::new(replacement.0, add_prefix_space).into(),
|
||||
))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -501,8 +501,8 @@ impl PyDigits {
|
||||
|
||||
#[new]
|
||||
#[args(individual_digits = false)]
|
||||
fn new(individual_digits: bool) -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((PyDigits {}, Digits::new(individual_digits).into()))
|
||||
fn new(individual_digits: bool) -> (Self, PyPreTokenizer) {
|
||||
(PyDigits {}, Digits::new(individual_digits).into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -516,8 +516,8 @@ pub struct PyUnicodeScripts {}
|
||||
#[pymethods]
|
||||
impl PyUnicodeScripts {
|
||||
#[new]
|
||||
fn new() -> PyResult<(Self, PyPreTokenizer)> {
|
||||
Ok((PyUnicodeScripts {}, UnicodeScripts::new().into()))
|
||||
fn new() -> (Self, PyPreTokenizer) {
|
||||
(PyUnicodeScripts {}, UnicodeScripts::new().into())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -155,15 +155,15 @@ pub struct PyBertProcessing {}
|
||||
#[pymethods]
|
||||
impl PyBertProcessing {
|
||||
#[new]
|
||||
fn new(sep: (String, u32), cls: (String, u32)) -> PyResult<(Self, PyPostProcessor)> {
|
||||
Ok((
|
||||
fn new(sep: (String, u32), cls: (String, u32)) -> (Self, PyPostProcessor) {
|
||||
(
|
||||
PyBertProcessing {},
|
||||
PyPostProcessor::new(Arc::new(BertProcessing::new(sep, cls).into())),
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
|
||||
Ok(PyTuple::new(py, &[("", 0), ("", 0)]))
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
|
||||
PyTuple::new(py, &[("", 0), ("", 0)])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -203,18 +203,18 @@ impl PyRobertaProcessing {
|
||||
cls: (String, u32),
|
||||
trim_offsets: bool,
|
||||
add_prefix_space: bool,
|
||||
) -> PyResult<(Self, PyPostProcessor)> {
|
||||
) -> (Self, PyPostProcessor) {
|
||||
let proc = RobertaProcessing::new(sep, cls)
|
||||
.trim_offsets(trim_offsets)
|
||||
.add_prefix_space(add_prefix_space);
|
||||
Ok((
|
||||
(
|
||||
PyRobertaProcessing {},
|
||||
PyPostProcessor::new(Arc::new(proc.into())),
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
|
||||
Ok(PyTuple::new(py, &[("", 0), ("", 0)]))
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
|
||||
PyTuple::new(py, &[("", 0), ("", 0)])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -233,20 +233,17 @@ pub struct PyByteLevel {}
|
||||
impl PyByteLevel {
|
||||
#[new]
|
||||
#[args(trim_offsets = "None", _kwargs = "**")]
|
||||
fn new(
|
||||
trim_offsets: Option<bool>,
|
||||
_kwargs: Option<&PyDict>,
|
||||
) -> PyResult<(Self, PyPostProcessor)> {
|
||||
fn new(trim_offsets: Option<bool>, _kwargs: Option<&PyDict>) -> (Self, PyPostProcessor) {
|
||||
let mut byte_level = ByteLevel::default();
|
||||
|
||||
if let Some(to) = trim_offsets {
|
||||
byte_level = byte_level.trim_offsets(to);
|
||||
}
|
||||
|
||||
Ok((
|
||||
(
|
||||
PyByteLevel {},
|
||||
PyPostProcessor::new(Arc::new(byte_level.into())),
|
||||
))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,21 +25,21 @@ impl PyToken {
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_id(&self) -> PyResult<u32> {
|
||||
Ok(self.token.id)
|
||||
fn get_id(&self) -> u32 {
|
||||
self.token.id
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_value(&self) -> PyResult<&str> {
|
||||
Ok(&self.token.value)
|
||||
fn get_value(&self) -> &str {
|
||||
&self.token.value
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_offsets(&self) -> PyResult<(usize, usize)> {
|
||||
Ok(self.token.offsets)
|
||||
fn get_offsets(&self) -> (usize, usize) {
|
||||
self.token.offsets
|
||||
}
|
||||
|
||||
fn as_tuple(&self) -> PyResult<(u32, &str, (usize, usize))> {
|
||||
Ok((self.token.id, &self.token.value, self.token.offsets))
|
||||
fn as_tuple(&self) -> (u32, &str, (usize, usize)) {
|
||||
(self.token.id, &self.token.value, self.token.offsets)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -487,10 +487,9 @@ impl PyTokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
|
||||
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
|
||||
let model = PyModel::from(BPE::default()).into_py(py);
|
||||
let args = PyTuple::new(py, vec![model]);
|
||||
Ok(args)
|
||||
PyTuple::new(py, vec![model])
|
||||
}
|
||||
|
||||
/// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string.
|
||||
@@ -577,11 +576,10 @@ impl PyTokenizer {
|
||||
/// :param is_pair: Boolean indicating if the input would be a single sentence or a pair
|
||||
/// :return:
|
||||
#[text_signature = "(self, is_pair)"]
|
||||
fn num_special_tokens_to_add(&self, is_pair: bool) -> PyResult<usize> {
|
||||
Ok(self
|
||||
.tokenizer
|
||||
fn num_special_tokens_to_add(&self, is_pair: bool) -> usize {
|
||||
self.tokenizer
|
||||
.get_post_processor()
|
||||
.map_or(0, |p| p.added_tokens(is_pair)))
|
||||
.map_or(0, |p| p.added_tokens(is_pair))
|
||||
}
|
||||
|
||||
/// Get the underlying vocabulary
|
||||
@@ -594,8 +592,8 @@ impl PyTokenizer {
|
||||
/// :obj:`Dict[str, int]`: The vocabulary
|
||||
#[args(with_added_tokens = true)]
|
||||
#[text_signature = "(self, with_added_tokens=True)"]
|
||||
fn get_vocab(&self, with_added_tokens: bool) -> PyResult<HashMap<String, u32>> {
|
||||
Ok(self.tokenizer.get_vocab(with_added_tokens))
|
||||
fn get_vocab(&self, with_added_tokens: bool) -> HashMap<String, u32> {
|
||||
self.tokenizer.get_vocab(with_added_tokens)
|
||||
}
|
||||
|
||||
/// Get the size of the underlying vocabulary
|
||||
@@ -608,8 +606,8 @@ impl PyTokenizer {
|
||||
/// :obj:`int`: The size of the vocabulary
|
||||
#[args(with_added_tokens = true)]
|
||||
#[text_signature = "(self, with_added_tokens=True)"]
|
||||
fn get_vocab_size(&self, with_added_tokens: bool) -> PyResult<usize> {
|
||||
Ok(self.tokenizer.get_vocab_size(with_added_tokens))
|
||||
fn get_vocab_size(&self, with_added_tokens: bool) -> usize {
|
||||
self.tokenizer.get_vocab_size(with_added_tokens)
|
||||
}
|
||||
|
||||
/// Enable truncation
|
||||
|
||||
Reference in New Issue
Block a user