mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Rust - Prepare for release 0.11.0 (#789)
This commit is contained in:
@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## [Unreleased]
|
## [0.11.0]
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- [#236]: Fix a bug with offsets being shifted when there are sub-sequences (Usually with
|
- [#236]: Fix a bug with offsets being shifted when there are sub-sequences (Usually with
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.10.1"
|
version = "0.11.0"
|
||||||
homepage = "https://github.com/huggingface/tokenizers"
|
homepage = "https://github.com/huggingface/tokenizers"
|
||||||
repository = "https://github.com/huggingface/tokenizers"
|
repository = "https://github.com/huggingface/tokenizers"
|
||||||
documentation = "https://docs.rs/tokenizers/"
|
documentation = "https://docs.rs/tokenizers/"
|
||||||
|
@ -1,15 +1,15 @@
|
|||||||
pub mod cache;
|
pub(crate) mod cache;
|
||||||
pub mod from_pretrained;
|
pub(crate) mod from_pretrained;
|
||||||
pub mod iter;
|
pub mod iter;
|
||||||
pub mod padding;
|
pub mod padding;
|
||||||
pub mod parallelism;
|
pub mod parallelism;
|
||||||
pub mod progress;
|
pub(crate) mod progress;
|
||||||
pub mod truncation;
|
pub mod truncation;
|
||||||
|
|
||||||
use serde::{Serialize, Serializer};
|
use serde::{Serialize, Serializer};
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
|
|
||||||
pub fn ordered_map<S, K, V>(
|
pub(crate) fn ordered_map<S, K, V>(
|
||||||
value: &HashMap<K, V>,
|
value: &HashMap<K, V>,
|
||||||
serializer: S,
|
serializer: S,
|
||||||
) -> std::result::Result<S::Ok, S::Error>
|
) -> std::result::Result<S::Ok, S::Error>
|
||||||
|
@ -26,8 +26,6 @@ pub enum TruncationError {
|
|||||||
SecondSequenceNotProvided,
|
SecondSequenceNotProvided,
|
||||||
/// We cannot truncate the target sequence enough to respect the provided max length.
|
/// We cannot truncate the target sequence enough to respect the provided max length.
|
||||||
SequenceTooShort,
|
SequenceTooShort,
|
||||||
/// We cannot truncate with the given constraints.
|
|
||||||
MaxLengthTooLow,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for TruncationError {
|
impl std::fmt::Display for TruncationError {
|
||||||
@ -41,10 +39,6 @@ impl std::fmt::Display for TruncationError {
|
|||||||
fmt,
|
fmt,
|
||||||
"Truncation error: Sequence to truncate too short to respect the provided max_length"
|
"Truncation error: Sequence to truncate too short to respect the provided max_length"
|
||||||
),
|
),
|
||||||
MaxLengthTooLow => write!(
|
|
||||||
fmt,
|
|
||||||
"Truncation error: Specified max length is too low \
|
|
||||||
to respect the various constraints"),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user