Fix clippy + feature test management. (#1580)

* Fix clippy + feature test management.

* That example was local oops.

* CLippy fix.

* Readme indentation.

* README update.
This commit is contained in:
Nicolas Patry
2024-07-26 12:16:30 +02:00
committed by GitHub
parent 4ea2f235b0
commit a3ad85b3e8
7 changed files with 19 additions and 21 deletions

View File

@ -11,6 +11,7 @@
//! sequences. The final result looks like this: //! sequences. The final result looks like this:
//! - Single sequence: `[CLS] Hello there [SEP]` //! - Single sequence: `[CLS] Hello there [SEP]`
//! - Pair sequences: `[CLS] My name is Anthony [SEP] What is my name? [SEP]` //! - Pair sequences: `[CLS] My name is Anthony [SEP] What is my name? [SEP]`
//!
//! With the type ids as following: //! With the type ids as following:
//! ```markdown //! ```markdown
//! [CLS] ... [SEP] ... [SEP] //! [CLS] ... [SEP] ... [SEP]

View File

@ -1297,17 +1297,13 @@ where
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::AddedToken;
use crate::Tokenizer;
#[cfg(feature = "http")] #[cfg(feature = "http")]
#[test] #[test]
fn test_decoding_with_added_bpe() { fn test_decoding_with_added_bpe() {
use crate::{ use crate::{
normalizers, normalizers,
pre_tokenizers::split::{Split, SplitPattern}, pre_tokenizers::split::{Split, SplitPattern},
NormalizerWrapper, PreTokenizerWrapper, SplitDelimiterBehavior, AddedToken, NormalizerWrapper, PreTokenizerWrapper, SplitDelimiterBehavior, Tokenizer,
}; };
let mut tokenizer = Tokenizer::from_pretrained("meta-llama/Meta-Llama-3-8B", None).unwrap(); let mut tokenizer = Tokenizer::from_pretrained("meta-llama/Meta-Llama-3-8B", None).unwrap();

View File

@ -305,6 +305,7 @@ impl NormalizedString {
/// - `1` if this is a new char /// - `1` if this is a new char
/// - `-N` if the char is right before N removed chars /// - `-N` if the char is right before N removed chars
/// - `0` if the char is replacing the existing one /// - `0` if the char is replacing the existing one
///
/// Since it is possible that the normalized string doesn't include some of the characters at /// Since it is possible that the normalized string doesn't include some of the characters at
/// the beginning of the original one, we need an `initial_offset` which represents the number /// the beginning of the original one, we need an `initial_offset` which represents the number
/// of removed chars at the very beginning. /// of removed chars at the very beginning.
@ -424,6 +425,7 @@ impl NormalizedString {
/// - `1` if this is a new char /// - `1` if this is a new char
/// - `-N` if the char is right before N removed chars /// - `-N` if the char is right before N removed chars
/// - `0` if the char is replacing the existing one /// - `0` if the char is replacing the existing one
///
/// Since it is possible that the normalized string doesn't include some of the characters at /// Since it is possible that the normalized string doesn't include some of the characters at
/// the beginning of the original one, we need an `initial_offset` which represents the number /// the beginning of the original one, we need an `initial_offset` which represents the number
/// of removed chars at the very beginning. /// of removed chars at the very beginning.

View File

@ -65,9 +65,9 @@ impl PreTokenizedString {
/// ///
/// There are only one constraint that *MUST* be respected: /// There are only one constraint that *MUST* be respected:
/// > The produced `NormalizedString`, if combined back together, must have the /// > The produced `NormalizedString`, if combined back together, must have the
/// same `original` string as the original one given to `split_fn`. This concretely /// > same `original` string as the original one given to `split_fn`. This concretely
/// means that for the offset tracking to work as expected, `split_fn` must produce /// > means that for the offset tracking to work as expected, `split_fn` must produce
/// "splits" of the original string. /// > "splits" of the original string.
pub fn split<F, U, R>(&mut self, mut split_fn: F) -> Result<()> pub fn split<F, U, R>(&mut self, mut split_fn: F) -> Result<()>
where where
F: FnMut(usize, NormalizedString) -> Result<U>, F: FnMut(usize, NormalizedString) -> Result<U>,

View File

@ -177,7 +177,6 @@ where
mod tests { mod tests {
use crate::tokenizer::Tokenizer; use crate::tokenizer::Tokenizer;
use std::str::FromStr; use std::str::FromStr;
use tracing_subscriber::fmt;
#[test] #[test]
fn test_deserialization_serialization_invariant() { fn test_deserialization_serialization_invariant() {
@ -236,7 +235,7 @@ mod tests {
#[cfg(feature = "http")] #[cfg(feature = "http")]
#[test] #[test]
fn test_from_pretrained() { fn test_from_pretrained() {
fmt() tracing_subscriber::fmt()
.with_max_level(tracing::Level::DEBUG) .with_max_level(tracing::Level::DEBUG)
.with_target(false) .with_target(false)
.init(); .init();