mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Fix off-by-one error in tokenizer::normalizer::Range::len (#1638)
This commit is contained in:
@ -45,8 +45,8 @@ where
|
||||
|
||||
match range.start_bound() {
|
||||
Bound::Unbounded => Some(end),
|
||||
Bound::Included(i) => Some(end - (*i + 1)),
|
||||
Bound::Excluded(i) => Some(end - *i),
|
||||
Bound::Included(i) => Some(end - *i),
|
||||
Bound::Excluded(i) => Some(end - (*i + 1)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -1013,6 +1013,20 @@ mod tests {
|
||||
use regex::Regex;
|
||||
use unicode_categories::UnicodeCategories;
|
||||
|
||||
#[test]
|
||||
fn test_len_range_inclusive() {
|
||||
let range = Range::Original(3..=7);
|
||||
let len = range.len();
|
||||
assert_eq!(len, Some(5)); // 7 - 3 + 1 = 5
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_len_range_exclusive() {
|
||||
let range = Range::Original(3..7);
|
||||
let len = range.len();
|
||||
assert_eq!(len, Some(4)); // 7 - 3 = 4
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nfd_adds_new_chars() {
|
||||
let mut n = NormalizedString::from("élégant");
|
||||
|
Reference in New Issue
Block a user