get_added_tokens_decoder returns BTREEMap

This commit is contained in:
Arthur Zucker
2023-09-06 12:24:30 +00:00
parent a7ace4480d
commit efec086f35

View File

@ -25,6 +25,7 @@ use super::pre_tokenizers::PyPreTokenizer;
use super::trainers::PyTrainer; use super::trainers::PyTrainer;
use crate::processors::PyPostProcessor; use crate::processors::PyPostProcessor;
use crate::utils::{MaybeSizedIterator, PyBufferedIterator}; use crate::utils::{MaybeSizedIterator, PyBufferedIterator};
use std::collections::BTreeMap;
/// Represents a token that can be be added to a :class:`~tokenizers.Tokenizer`. /// Represents a token that can be be added to a :class:`~tokenizers.Tokenizer`.
/// It can have special options that defines the way it should behave. /// It can have special options that defines the way it should behave.
@ -668,12 +669,14 @@ impl PyTokenizer {
/// :obj:`Dict[int, AddedToken]`: The vocabulary /// :obj:`Dict[int, AddedToken]`: The vocabulary
#[pyo3(signature = ())] #[pyo3(signature = ())]
#[pyo3(text_signature = "(self)")] #[pyo3(text_signature = "(self)")]
fn get_added_tokens_decoder(&self) -> HashMap<u32, PyAddedToken> { fn get_added_tokens_decoder(&self) -> BTreeMap<u32, PyAddedToken> {
self.tokenizer let mut sorted_map = BTreeMap::new();
.get_added_tokens_decoder()
.into_iter() for (key, value) in self.tokenizer.get_added_tokens_decoder() {
.map(|(key, value)| (key, value.into())) sorted_map.insert(key, value.into());
.collect() }
sorted_map
} }
/// Get the size of the underlying vocabulary /// Get the size of the underlying vocabulary