mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Fix Clippy warnings for Rust 1.51
This commit is contained in:
@ -89,10 +89,9 @@ impl<'c, T: neon::object::This> Extract for CallContext<'c, T> {
|
||||
self.argument_opt(pos)
|
||||
.map(|v| {
|
||||
let vec = v.downcast::<JsArray>()?.to_vec(self)?;
|
||||
Ok(vec
|
||||
.into_iter()
|
||||
vec.into_iter()
|
||||
.map(|v| E::from_value(v, self))
|
||||
.collect::<LibResult<Vec<_>>>()?)
|
||||
.collect::<LibResult<Vec<_>>>()
|
||||
})
|
||||
.map_or(Ok(None), |v| v.map(Some))
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#![warn(clippy::all)]
|
||||
// We need to allow these to use !declare_types
|
||||
#![allow(clippy::unnecessary_wraps)]
|
||||
#![allow(clippy::upper_case_acronyms)]
|
||||
|
||||
extern crate neon;
|
||||
extern crate neon_serde;
|
||||
|
@ -23,10 +23,9 @@ impl Display for PyError {
|
||||
impl std::error::Error for PyError {}
|
||||
|
||||
pub struct ToPyResult<T>(pub Result<T>);
|
||||
impl<T> std::convert::Into<PyResult<T>> for ToPyResult<T> {
|
||||
fn into(self) -> PyResult<T> {
|
||||
self.0
|
||||
.map_err(|e| exceptions::PyException::new_err(format!("{}", e)))
|
||||
impl<T> From<ToPyResult<T>> for PyResult<T> {
|
||||
fn from(v: ToPyResult<T>) -> Self {
|
||||
v.0.map_err(|e| exceptions::PyException::new_err(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
impl<T> ToPyResult<T> {
|
||||
|
@ -1,4 +1,5 @@
|
||||
#![warn(clippy::all)]
|
||||
#![allow(clippy::upper_case_acronyms)]
|
||||
|
||||
extern crate tokenizers as tk;
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#![warn(clippy::all)]
|
||||
#![allow(clippy::upper_case_acronyms)]
|
||||
#![doc(html_favicon_url = "https://huggingface.co/favicon.ico")]
|
||||
#![doc(html_logo_url = "https://huggingface.co/landing/assets/huggingface_logo.svg")]
|
||||
|
||||
|
@ -252,7 +252,7 @@ impl Unigram {
|
||||
/// The starting position (in utf-8) of this node. The entire best
|
||||
/// path can be constructed by backtracking along this link.
|
||||
starts_at: Option<usize>,
|
||||
};
|
||||
}
|
||||
impl Default for BestPathNode {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
@ -44,16 +44,16 @@ impl PostProcessor for BertProcessing {
|
||||
return PostProcessor::default_process(encoding, pair_encoding, add_special_tokens);
|
||||
}
|
||||
|
||||
let ids = [&[self.cls.1], &encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], &encoding.get_type_ids()[..], &[0]].concat();
|
||||
let ids = [&[self.cls.1], encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], encoding.get_type_ids(), &[0]].concat();
|
||||
let tokens = [
|
||||
&[self.cls.0.clone()],
|
||||
&encoding.get_tokens()[..],
|
||||
encoding.get_tokens(),
|
||||
&[self.sep.0.clone()],
|
||||
]
|
||||
.concat();
|
||||
let words = [&[None], &encoding.get_word_ids()[..], &[None]].concat();
|
||||
let offsets = [&[(0, 0)], &encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let words = [&[None], encoding.get_word_ids(), &[None]].concat();
|
||||
let offsets = [&[(0, 0)], encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let special_tokens = [&[1u32], &vec![0; encoding.get_ids().len()][..], &[1]].concat();
|
||||
let attention_mask = vec![1; ids.len()];
|
||||
|
||||
@ -72,16 +72,16 @@ impl PostProcessor for BertProcessing {
|
||||
.take_overflowing()
|
||||
.into_iter()
|
||||
.map(|encoding| {
|
||||
let ids = [&[self.cls.1], &encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], &encoding.get_type_ids()[..], &[0]].concat();
|
||||
let ids = [&[self.cls.1], encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], encoding.get_type_ids(), &[0]].concat();
|
||||
let tokens = [
|
||||
&[self.cls.0.clone()],
|
||||
&encoding.get_tokens()[..],
|
||||
encoding.get_tokens(),
|
||||
&[self.sep.0.clone()],
|
||||
]
|
||||
.concat();
|
||||
let words = [&[None], &encoding.get_word_ids()[..], &[None]].concat();
|
||||
let offsets = [&[(0, 0)], &encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let words = [&[None], encoding.get_word_ids(), &[None]].concat();
|
||||
let offsets = [&[(0, 0)], encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let special_tokens =
|
||||
[&[1u32], &vec![0; encoding.get_ids().len()][..], &[1]].concat();
|
||||
let attention_mask = vec![1; ids.len()];
|
||||
@ -106,11 +106,11 @@ impl PostProcessor for BertProcessing {
|
||||
);
|
||||
|
||||
if let Some(mut encoding) = pair_encoding {
|
||||
let pair_ids = [&encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let pair_type_ids = [&encoding.get_type_ids()[..], &[1]].concat();
|
||||
let pair_tokens = [&encoding.get_tokens()[..], &[self.sep.0.clone()]].concat();
|
||||
let pair_words = [&encoding.get_word_ids()[..], &[None]].concat();
|
||||
let pair_offsets = [&encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let pair_ids = [encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let pair_type_ids = [encoding.get_type_ids(), &[1]].concat();
|
||||
let pair_tokens = [encoding.get_tokens(), &[self.sep.0.clone()]].concat();
|
||||
let pair_words = [encoding.get_word_ids(), &[None]].concat();
|
||||
let pair_offsets = [encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let pair_special_tokens =
|
||||
[&vec![0u32; encoding.get_type_ids().len()][..], &[1]].concat();
|
||||
let pair_attention_mask = vec![1; pair_ids.len()];
|
||||
@ -130,12 +130,11 @@ impl PostProcessor for BertProcessing {
|
||||
.take_overflowing()
|
||||
.into_iter()
|
||||
.map(|encoding| {
|
||||
let pair_ids = [&encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let pair_type_ids = [&encoding.get_type_ids()[..], &[1]].concat();
|
||||
let pair_tokens =
|
||||
[&encoding.get_tokens()[..], &[self.sep.0.clone()]].concat();
|
||||
let pair_words = [&encoding.get_word_ids()[..], &[None]].concat();
|
||||
let pair_offsets = [&encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let pair_ids = [encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let pair_type_ids = [encoding.get_type_ids(), &[1]].concat();
|
||||
let pair_tokens = [encoding.get_tokens(), &[self.sep.0.clone()]].concat();
|
||||
let pair_words = [encoding.get_word_ids(), &[None]].concat();
|
||||
let pair_offsets = [encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let pair_special_tokens =
|
||||
[&vec![0u32; encoding.get_type_ids().len()][..], &[1]].concat();
|
||||
let pair_attention_mask = vec![1; pair_ids.len()];
|
||||
|
@ -77,16 +77,16 @@ impl PostProcessor for RobertaProcessing {
|
||||
return PostProcessor::default_process(encoding, pair_encoding, add_special_tokens);
|
||||
}
|
||||
|
||||
let ids = [&[self.cls.1], &encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], &encoding.get_type_ids()[..], &[0]].concat();
|
||||
let ids = [&[self.cls.1], encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], encoding.get_type_ids(), &[0]].concat();
|
||||
let tokens = [
|
||||
&[self.cls.0.clone()],
|
||||
&encoding.get_tokens()[..],
|
||||
encoding.get_tokens(),
|
||||
&[self.sep.0.clone()],
|
||||
]
|
||||
.concat();
|
||||
let words = [&[None], &encoding.get_word_ids()[..], &[None]].concat();
|
||||
let offsets = [&[(0, 0)], &encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let words = [&[None], encoding.get_word_ids(), &[None]].concat();
|
||||
let offsets = [&[(0, 0)], encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let special_tokens = [&[1u32], &vec![0; encoding.get_ids().len()][..], &[1]].concat();
|
||||
let attention_mask = vec![1; ids.len()];
|
||||
|
||||
@ -105,16 +105,16 @@ impl PostProcessor for RobertaProcessing {
|
||||
.take_overflowing()
|
||||
.into_iter()
|
||||
.map(|encoding| {
|
||||
let ids = [&[self.cls.1], &encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], &encoding.get_type_ids()[..], &[0]].concat();
|
||||
let ids = [&[self.cls.1], encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let type_ids = [&[0], encoding.get_type_ids(), &[0]].concat();
|
||||
let tokens = [
|
||||
&[self.cls.0.clone()],
|
||||
&encoding.get_tokens()[..],
|
||||
encoding.get_tokens(),
|
||||
&[self.sep.0.clone()],
|
||||
]
|
||||
.concat();
|
||||
let words = [&[None], &encoding.get_word_ids()[..], &[None]].concat();
|
||||
let offsets = [&[(0, 0)], &encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let words = [&[None], encoding.get_word_ids(), &[None]].concat();
|
||||
let offsets = [&[(0, 0)], encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let special_tokens =
|
||||
[&[1u32], &vec![0; encoding.get_ids().len()][..], &[1]].concat();
|
||||
let attention_mask = vec![1; ids.len()];
|
||||
@ -139,16 +139,16 @@ impl PostProcessor for RobertaProcessing {
|
||||
);
|
||||
|
||||
if let Some(mut encoding) = pair_encoding {
|
||||
let pair_ids = [&[self.sep.1], &encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let pair_ids = [&[self.sep.1], encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let pair_type_ids = vec![0; encoding.get_ids().len() + 2];
|
||||
let pair_tokens = [
|
||||
&[self.sep.0.clone()],
|
||||
&encoding.get_tokens()[..],
|
||||
encoding.get_tokens(),
|
||||
&[self.sep.0.clone()],
|
||||
]
|
||||
.concat();
|
||||
let pair_words = [&[None], &encoding.get_word_ids()[..], &[None]].concat();
|
||||
let pair_offsets = [&[(0, 0)], &encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let pair_words = [&[None], encoding.get_word_ids(), &[None]].concat();
|
||||
let pair_offsets = [&[(0, 0)], encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let pair_special_tokens =
|
||||
[&[1], &vec![0u32; encoding.get_type_ids().len()][..], &[1]].concat();
|
||||
let pair_attention_mask = vec![1; pair_ids.len()];
|
||||
@ -168,18 +168,16 @@ impl PostProcessor for RobertaProcessing {
|
||||
.take_overflowing()
|
||||
.into_iter()
|
||||
.map(|encoding| {
|
||||
let pair_ids =
|
||||
[&[self.sep.1], &encoding.get_ids()[..], &[self.sep.1]].concat();
|
||||
let pair_ids = [&[self.sep.1], encoding.get_ids(), &[self.sep.1]].concat();
|
||||
let pair_type_ids = vec![0; encoding.get_ids().len() + 2];
|
||||
let pair_tokens = [
|
||||
&[self.sep.0.clone()],
|
||||
&encoding.get_tokens()[..],
|
||||
encoding.get_tokens(),
|
||||
&[self.sep.0.clone()],
|
||||
]
|
||||
.concat();
|
||||
let pair_words = [&[None], &encoding.get_word_ids()[..], &[None]].concat();
|
||||
let pair_offsets =
|
||||
[&[(0, 0)], &encoding.get_offsets()[..], &[(0, 0)]].concat();
|
||||
let pair_words = [&[None], encoding.get_word_ids(), &[None]].concat();
|
||||
let pair_offsets = [&[(0, 0)], encoding.get_offsets(), &[(0, 0)]].concat();
|
||||
let pair_special_tokens =
|
||||
[&[1], &vec![0u32; encoding.get_type_ids().len()][..], &[1]].concat();
|
||||
let pair_attention_mask = vec![1; pair_ids.len()];
|
||||
|
@ -526,15 +526,13 @@ impl TemplateProcessing {
|
||||
.take_overflowing()
|
||||
.into_iter()
|
||||
.flat_map(|encoding| {
|
||||
let mut overflowings = vec![];
|
||||
|
||||
// 1. The pair itself
|
||||
overflowings.push(self.apply_template(
|
||||
let mut overflowings = vec![self.apply_template(
|
||||
template,
|
||||
encoding.clone(),
|
||||
pair.clone(),
|
||||
add_special_tokens,
|
||||
));
|
||||
)];
|
||||
|
||||
// 2. Its overflowings
|
||||
for other_o in &pair_overflowing {
|
||||
|
Reference in New Issue
Block a user