mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Cleans up a few pattern-matches into their Option/Result equivalent
This commit is contained in:
committed by
Anthony MOI
parent
cca5d43038
commit
7bee825238
@ -439,10 +439,7 @@ impl Model for Unigram {
|
||||
}
|
||||
|
||||
fn id_to_token(&self, id: u32) -> Option<String> {
|
||||
match self.vocab.get(id as usize) {
|
||||
Some(item) => Some(item.0.clone()),
|
||||
None => None,
|
||||
}
|
||||
self.vocab.get(id as usize).map(|item| item.0.clone())
|
||||
}
|
||||
|
||||
fn save(&self, folder: &Path, name: Option<&str>) -> Result<Vec<PathBuf>> {
|
||||
|
@ -247,14 +247,13 @@ impl Encoding {
|
||||
/// Get the offsets of the word at the given index in the input sequence.
|
||||
pub fn word_to_chars(&self, word: u32, sequence_id: usize) -> Option<Offsets> {
|
||||
self.word_to_tokens(word, sequence_id)
|
||||
.map(|(start, end)| {
|
||||
.and_then(|(start, end)| {
|
||||
if end == 0 {
|
||||
None
|
||||
} else {
|
||||
Some((self.offsets[start].0, self.offsets[end - 1].1))
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
}
|
||||
|
||||
/// Get the offsets of the token at the given index.
|
||||
@ -288,8 +287,7 @@ impl Encoding {
|
||||
pub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32> {
|
||||
Some(
|
||||
self.char_to_token(pos, sequence_id)
|
||||
.map(|token| self.token_to_word(token))
|
||||
.flatten()?
|
||||
.and_then(|token| self.token_to_word(token))?
|
||||
.1,
|
||||
)
|
||||
}
|
||||
|
@ -694,10 +694,9 @@ where
|
||||
|
||||
// Encode each sequence
|
||||
let encoding = self.encode_single_sequence(sequence, 0, OffsetType::Byte)?;
|
||||
let pair_encoding = match pair {
|
||||
Some(sequence) => Some(self.encode_single_sequence(sequence, 1, OffsetType::Byte)?),
|
||||
None => None,
|
||||
};
|
||||
let pair_encoding = pair
|
||||
.map(|sequence| self.encode_single_sequence(sequence, 1, OffsetType::Byte))
|
||||
.transpose()?;
|
||||
|
||||
// And finally post process
|
||||
self.post_process(encoding, pair_encoding, add_special_tokens)
|
||||
@ -738,10 +737,9 @@ where
|
||||
|
||||
// Encode each sequence
|
||||
let encoding = self.encode_single_sequence(sequence, 0, OffsetType::Char)?;
|
||||
let pair_encoding = match pair {
|
||||
Some(sequence) => Some(self.encode_single_sequence(sequence, 1, OffsetType::Char)?),
|
||||
None => None,
|
||||
};
|
||||
let pair_encoding = pair
|
||||
.map(|sequence| self.encode_single_sequence(sequence, 1, OffsetType::Char))
|
||||
.transpose()?;
|
||||
|
||||
// And finally post process
|
||||
self.post_process(encoding, pair_encoding, add_special_tokens)
|
||||
|
Reference in New Issue
Block a user