From bfa842e06357cb9fe2a91c75f73900d721428f5d Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 19 Dec 2022 13:50:48 +0100
Subject: [PATCH] Adding stale bot ? (#1123)

* Adding stale bot ?

* Clippy.
---
 .github/stale.yml                              | 17 +++++++++++++++++
 tokenizers/src/models/unigram/serialization.rs |  2 +-
 tokenizers/src/models/unigram/trainer.rs       |  2 +-
 tokenizers/src/processors/template.rs          |  2 +-
 tokenizers/src/tokenizer/normalizer.rs         | 14 +++-----------
 5 files changed, 23 insertions(+), 14 deletions(-)
 create mode 100644 .github/stale.yml

diff --git a/.github/stale.yml b/.github/stale.yml
new file mode 100644
index 00000000..dc90e5a1
--- /dev/null
+++ b/.github/stale.yml
@@ -0,0 +1,17 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 60
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 7
+# Issues with these labels will never be considered stale
+exemptLabels:
+  - pinned
+  - security
+# Label to use when marking an issue as stale
+staleLabel: wontfix
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+  This issue has been automatically marked as stale because it has not had
+  recent activity. It will be closed if no further activity occurs. Thank you
+  for your contributions.
+# Comment to post when closing a stale issue. Set to `false` to disable
+closeComment: false
diff --git a/tokenizers/src/models/unigram/serialization.rs b/tokenizers/src/models/unigram/serialization.rs
index 5ebc8154..a04fc17f 100644
--- a/tokenizers/src/models/unigram/serialization.rs
+++ b/tokenizers/src/models/unigram/serialization.rs
@@ -63,7 +63,7 @@ impl<'de> Visitor<'de> for UnigramVisitor {
         }
         match (vocab, unk_id) {
             (Some(vocab), unk_id) => Ok(Unigram::from(vocab, unk_id)
-                .map_err(|err| Error::custom(&format!("Unable to load vocab {:?}", err)))?),
+                .map_err(|err| Error::custom(format!("Unable to load vocab {:?}", err)))?),
             (None, _) => Err(Error::custom("Missing vocab")),
         }
     }
diff --git a/tokenizers/src/models/unigram/trainer.rs b/tokenizers/src/models/unigram/trainer.rs
index 444133fb..d4562b28 100644
--- a/tokenizers/src/models/unigram/trainer.rs
+++ b/tokenizers/src/models/unigram/trainer.rs
@@ -501,7 +501,7 @@ impl UnigramTrainer {
         let expected_loops = (((desired_vocab_size as f64).ln() - (pieces.len() as f64).ln())
             / self.shrinking_factor.ln()) as usize
             + 1;
-        let expected_updates = expected_loops as usize * self.n_sub_iterations as usize;
+        let expected_updates = expected_loops * self.n_sub_iterations as usize;
         self.update_progress(&progress, expected_updates, "EM training");
         let required_chars = self.required_chars(&sentences);
         let mut new_model = Unigram::from(pieces.clone(), Some(0))?;
diff --git a/tokenizers/src/processors/template.rs b/tokenizers/src/processors/template.rs
index 13bc91e5..313b4911 100644
--- a/tokenizers/src/processors/template.rs
+++ b/tokenizers/src/processors/template.rs
@@ -487,7 +487,7 @@ impl TemplateProcessing {
             .flat_map(|piece| {
                 match piece {
                     Piece::Sequence { id, type_id } => {
-                        let i = if *id == Sequence::A { 0 } else { 1 };
+                        let i = usize::from(*id != Sequence::A);
                         let encoding = &mut encodings[i];
                         encoding.set_type_ids(vec![*type_id; encoding.len()]);
                         encoding.set_sequence_id(i);
diff --git a/tokenizers/src/tokenizer/normalizer.rs b/tokenizers/src/tokenizer/normalizer.rs
index ac16ce92..39c2c85d 100644
--- a/tokenizers/src/tokenizer/normalizer.rs
+++ b/tokenizers/src/tokenizer/normalizer.rs
@@ -507,7 +507,7 @@ impl NormalizedString {
             let transformations = s
                 .chars()
                 .enumerate()
-                .map(|(i, c)| (c, if i == 0 { 0 } else { 1 }))
+                .map(|(i, c)| (c, isize::from(i != 0)))
                 .chain(std::iter::once((next, 1)));
 
             self.transform_range(Range::Normalized(0..next.len_utf8()), transformations, 0);
@@ -853,16 +853,8 @@ pub fn get_range_of<T: RangeBounds<usize>>(s: &str, range: T) -> Option<&str> {
     } else if start >= len || end > len || start >= end {
         None
     } else {
-        let start_b = s
-            .char_indices()
-            .map(|(i, _)| i)
-            .nth(start as usize)
-            .unwrap_or(0);
-        let end_b = s
-            .char_indices()
-            .map(|(i, _)| i)
-            .nth(end as usize)
-            .unwrap_or(s.len());
+        let start_b = s.char_indices().map(|(i, _)| i).nth(start).unwrap_or(0);
+        let end_b = s.char_indices().map(|(i, _)| i).nth(end).unwrap_or(s.len());
         Some(&s[start_b..end_b])
     }
 }