mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Fix off-by-one error in tokenizer::normalizer::Range::len (#1638)
This commit is contained in:
@ -45,8 +45,8 @@ where
|
|||||||
|
|
||||||
match range.start_bound() {
|
match range.start_bound() {
|
||||||
Bound::Unbounded => Some(end),
|
Bound::Unbounded => Some(end),
|
||||||
Bound::Included(i) => Some(end - (*i + 1)),
|
Bound::Included(i) => Some(end - *i),
|
||||||
Bound::Excluded(i) => Some(end - *i),
|
Bound::Excluded(i) => Some(end - (*i + 1)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1013,6 +1013,20 @@ mod tests {
|
|||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use unicode_categories::UnicodeCategories;
|
use unicode_categories::UnicodeCategories;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_len_range_inclusive() {
|
||||||
|
let range = Range::Original(3..=7);
|
||||||
|
let len = range.len();
|
||||||
|
assert_eq!(len, Some(5)); // 7 - 3 + 1 = 5
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_len_range_exclusive() {
|
||||||
|
let range = Range::Original(3..7);
|
||||||
|
let len = range.len();
|
||||||
|
assert_eq!(len, Some(4)); // 7 - 3 = 4
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn nfd_adds_new_chars() {
|
fn nfd_adds_new_chars() {
|
||||||
let mut n = NormalizedString::from("élégant");
|
let mut n = NormalizedString::from("élégant");
|
||||||
|
Reference in New Issue
Block a user