mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
NormalizedString - Fix added chars at beginning
This commit is contained in:
@ -91,10 +91,10 @@ impl NormalizedString {
|
||||
// This is a newly inserted character, so we use the alignment from the
|
||||
// previous one
|
||||
Ordering::Greater => {
|
||||
offset += 1;
|
||||
if idx < 1 {
|
||||
Some((0, 0))
|
||||
} else {
|
||||
offset += 1;
|
||||
self.alignments.get(idx - 1).copied()
|
||||
}
|
||||
}
|
||||
@ -330,4 +330,25 @@ mod tests {
|
||||
assert_eq!(world_n, "world");
|
||||
assert_eq!(world_o, "World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn added_around_edges() {
|
||||
let mut n = NormalizedString::from("Hello");
|
||||
n.transform(
|
||||
vec![
|
||||
(' ', 1),
|
||||
('H', 0),
|
||||
('e', 0),
|
||||
('l', 0),
|
||||
('l', 0),
|
||||
('o', 0),
|
||||
(' ', 1),
|
||||
]
|
||||
.into_iter(),
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(&n.normalized, " Hello ");
|
||||
assert_eq!(n.get_range_original(0..n.normalized.len()), Some("Hello"));
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user