mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 08:15:49 +00:00
Fixing NormalizedString append when normalized is empty. (#1717)
Co-authored-by: Anantha Kandrapu <anantkan@amazon.com>
This commit is contained in:
@ -517,6 +517,9 @@ impl NormalizedString {
|
||||
if let Some((b, prev)) = self.normalized.char_indices().last() {
|
||||
let transformations = std::iter::once((prev, 0)).chain(s.chars().map(|c| (c, 1)));
|
||||
self.transform_range(Range::Normalized(b..), transformations, 0);
|
||||
} else {
|
||||
let transformations = s.chars().map(|c| (c, 1));
|
||||
self.transform_range(Range::Normalized(..), transformations, 0);
|
||||
}
|
||||
self
|
||||
}
|
||||
@ -2284,4 +2287,24 @@ mod tests {
|
||||
s.lowercase();
|
||||
assert_eq!(s.get(), "a...");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_after_clear() {
|
||||
let mut n = NormalizedString::from("Hello");
|
||||
assert_eq!(n.get(), "Hello");
|
||||
|
||||
n.clear();
|
||||
assert_eq!(n.get(), "");
|
||||
|
||||
n.append(" World");
|
||||
assert_eq!(n.get(), " World");
|
||||
|
||||
assert_eq!(n.len_original(), 5);
|
||||
assert_eq!(n.len(), 6);
|
||||
|
||||
assert_eq!(n.get_range_original(Range::Original(0..5)), Some("Hello"));
|
||||
assert_eq!(n.get_range_original(Range::Normalized(0..6)), Some(""));
|
||||
|
||||
assert_eq!(n.get_range(Range::Normalized(0..6)), Some(" World"));
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user