mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-31 12:39:21 +00:00
get_added_tokens_decoder
returns BTREEMap
This commit is contained in:
@ -25,6 +25,7 @@ use super::pre_tokenizers::PyPreTokenizer;
|
|||||||
use super::trainers::PyTrainer;
|
use super::trainers::PyTrainer;
|
||||||
use crate::processors::PyPostProcessor;
|
use crate::processors::PyPostProcessor;
|
||||||
use crate::utils::{MaybeSizedIterator, PyBufferedIterator};
|
use crate::utils::{MaybeSizedIterator, PyBufferedIterator};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
/// Represents a token that can be be added to a :class:`~tokenizers.Tokenizer`.
|
/// Represents a token that can be be added to a :class:`~tokenizers.Tokenizer`.
|
||||||
/// It can have special options that defines the way it should behave.
|
/// It can have special options that defines the way it should behave.
|
||||||
@ -668,12 +669,14 @@ impl PyTokenizer {
|
|||||||
/// :obj:`Dict[int, AddedToken]`: The vocabulary
|
/// :obj:`Dict[int, AddedToken]`: The vocabulary
|
||||||
#[pyo3(signature = ())]
|
#[pyo3(signature = ())]
|
||||||
#[pyo3(text_signature = "(self)")]
|
#[pyo3(text_signature = "(self)")]
|
||||||
fn get_added_tokens_decoder(&self) -> HashMap<u32, PyAddedToken> {
|
fn get_added_tokens_decoder(&self) -> BTreeMap<u32, PyAddedToken> {
|
||||||
self.tokenizer
|
let mut sorted_map = BTreeMap::new();
|
||||||
.get_added_tokens_decoder()
|
|
||||||
.into_iter()
|
for (key, value) in self.tokenizer.get_added_tokens_decoder() {
|
||||||
.map(|(key, value)| (key, value.into()))
|
sorted_map.insert(key, value.into());
|
||||||
.collect()
|
}
|
||||||
|
|
||||||
|
sorted_map
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the size of the underlying vocabulary
|
/// Get the size of the underlying vocabulary
|
||||||
|
Reference in New Issue
Block a user