Fix clippy + feature test management. (#1580)

* Fix clippy + feature test management. * That example was local oops. * CLippy fix. * Readme indentation. * README update.
2025-08-23 00:35:35 +00:00 · 2024-07-26 12:16:30 +02:00
parent 4ea2f235b0
commit a3ad85b3e8
7 changed files with 19 additions and 21 deletions
--- a/tokenizers/src/processors/template.rs
+++ b/tokenizers/src/processors/template.rs
@ -11,6 +11,7 @@
 //! sequences. The final result looks like this:
 //! - Single sequence: `[CLS] Hello there [SEP]`
 //! - Pair sequences: `[CLS] My name is Anthony [SEP] What is my name? [SEP]`
 //!
 //! With the type ids as following:
 //! ```markdown
 //! [CLS]   ...   [SEP]   ...   [SEP]
--- a/tokenizers/src/tokenizer/mod.rs
+++ b/tokenizers/src/tokenizer/mod.rs
@ -1297,17 +1297,13 @@ where
 #[cfg(test)]
 mod test {
    use crate::AddedToken;
    use crate::Tokenizer;
    #[cfg(feature = "http")]
    #[test]
    fn test_decoding_with_added_bpe() {
        use crate::{
            normalizers,
            pre_tokenizers::split::{Split, SplitPattern},
-            NormalizerWrapper, PreTokenizerWrapper, SplitDelimiterBehavior,
+            AddedToken, NormalizerWrapper, PreTokenizerWrapper, SplitDelimiterBehavior, Tokenizer,
        };
        let mut tokenizer = Tokenizer::from_pretrained("meta-llama/Meta-Llama-3-8B", None).unwrap();
--- a/tokenizers/src/tokenizer/normalizer.rs
+++ b/tokenizers/src/tokenizer/normalizer.rs
@ -305,6 +305,7 @@ impl NormalizedString {
    ///   - `1` if this is a new char
    ///   - `-N` if the char is right before N removed chars
    ///   - `0` if the char is replacing the existing one
    ///
    /// Since it is possible that the normalized string doesn't include some of the characters at
    /// the beginning of the original one, we need an `initial_offset` which represents the number
    /// of removed chars at the very beginning.
@ -424,6 +425,7 @@ impl NormalizedString {
    ///   - `1` if this is a new char
    ///   - `-N` if the char is right before N removed chars
    ///   - `0` if the char is replacing the existing one
    ///
    /// Since it is possible that the normalized string doesn't include some of the characters at
    /// the beginning of the original one, we need an `initial_offset` which represents the number
    /// of removed chars at the very beginning.
--- a/tokenizers/src/tokenizer/pre_tokenizer.rs
+++ b/tokenizers/src/tokenizer/pre_tokenizer.rs
@ -65,9 +65,9 @@ impl PreTokenizedString {
    ///
    /// There are only one constraint that *MUST* be respected:
    /// > The produced `NormalizedString`, if combined back together, must have the
-    /// same `original` string as the original one given to `split_fn`. This concretely
+    /// > same `original` string as the original one given to `split_fn`. This concretely
-    /// means that for the offset tracking to work as expected, `split_fn` must produce
+    /// > means that for the offset tracking to work as expected, `split_fn` must produce
-    /// "splits" of the original string.
+    /// > "splits" of the original string.
    pub fn split<F, U, R>(&mut self, mut split_fn: F) -> Result<()>
    where
        F: FnMut(usize, NormalizedString) -> Result<U>,
--- a/tokenizers/src/tokenizer/serialization.rs
+++ b/tokenizers/src/tokenizer/serialization.rs
@ -177,7 +177,6 @@ where
 mod tests {
    use crate::tokenizer::Tokenizer;
    use std::str::FromStr;
    use tracing_subscriber::fmt;
    #[test]
    fn test_deserialization_serialization_invariant() {
@ -236,7 +235,7 @@ mod tests {
    #[cfg(feature = "http")]
    #[test]
    fn test_from_pretrained() {
-        fmt()
+        tracing_subscriber::fmt()
            .with_max_level(tracing::Level::DEBUG)
            .with_target(false)
            .init();