Fix off-by-one error in tokenizer::normalizer::Range::len (#1638)

This commit is contained in:
Ryan Landay
2024-10-14 02:40:17 -04:00
committed by GitHub
parent bce68a60cb
commit 9b77c054ef

View File

@ -45,8 +45,8 @@ where
match range.start_bound() { match range.start_bound() {
Bound::Unbounded => Some(end), Bound::Unbounded => Some(end),
Bound::Included(i) => Some(end - (*i + 1)), Bound::Included(i) => Some(end - *i),
Bound::Excluded(i) => Some(end - *i), Bound::Excluded(i) => Some(end - (*i + 1)),
} }
} }
@ -1013,6 +1013,20 @@ mod tests {
use regex::Regex; use regex::Regex;
use unicode_categories::UnicodeCategories; use unicode_categories::UnicodeCategories;
#[test]
fn test_len_range_inclusive() {
let range = Range::Original(3..=7);
let len = range.len();
assert_eq!(len, Some(5)); // 7 - 3 + 1 = 5
}
#[test]
fn test_len_range_exclusive() {
let range = Range::Original(3..7);
let len = range.len();
assert_eq!(len, Some(4)); // 7 - 3 = 4
}
#[test] #[test]
fn nfd_adds_new_chars() { fn nfd_adds_new_chars() {
let mut n = NormalizedString::from("élégant"); let mut n = NormalizedString::from("élégant");