Adding stale bot ? (#1123)

* Adding stale bot ? * Clippy.
2025-08-22 16:25:30 +00:00 · 2022-12-19 13:50:48 +01:00
parent 1649d74536
commit bfa842e063
5 changed files with 23 additions and 14 deletions
--- a/.github/stale.yml
+++ b/.github/stale.yml
@ -0,0 +1,17 @@
 # Number of days of inactivity before an issue becomes stale
 daysUntilStale: 60
 # Number of days of inactivity before a stale issue is closed
 daysUntilClose: 7
 # Issues with these labels will never be considered stale
 exemptLabels:
  - pinned
  - security
 # Label to use when marking an issue as stale
 staleLabel: wontfix
 # Comment to post when marking an issue as stale. Set to `false` to disable
 markComment: >
  This issue has been automatically marked as stale because it has not had
  recent activity. It will be closed if no further activity occurs. Thank you
  for your contributions.
 # Comment to post when closing a stale issue. Set to `false` to disable
 closeComment: false
--- a/tokenizers/src/models/unigram/serialization.rs
+++ b/tokenizers/src/models/unigram/serialization.rs
@ -63,7 +63,7 @@ impl<'de> Visitor<'de> for UnigramVisitor {
        }
        match (vocab, unk_id) {
            (Some(vocab), unk_id) => Ok(Unigram::from(vocab, unk_id)
-                .map_err(|err| Error::custom(&format!("Unable to load vocab {:?}", err)))?),
+                .map_err(|err| Error::custom(format!("Unable to load vocab {:?}", err)))?),
            (None, _) => Err(Error::custom("Missing vocab")),
        }
    }
--- a/tokenizers/src/models/unigram/trainer.rs
+++ b/tokenizers/src/models/unigram/trainer.rs
@ -501,7 +501,7 @@ impl UnigramTrainer {
        let expected_loops = (((desired_vocab_size as f64).ln() - (pieces.len() as f64).ln())
            / self.shrinking_factor.ln()) as usize
            + 1;
-        let expected_updates = expected_loops as usize * self.n_sub_iterations as usize;
+        let expected_updates = expected_loops * self.n_sub_iterations as usize;
        self.update_progress(&progress, expected_updates, "EM training");
        let required_chars = self.required_chars(&sentences);
        let mut new_model = Unigram::from(pieces.clone(), Some(0))?;
--- a/tokenizers/src/processors/template.rs
+++ b/tokenizers/src/processors/template.rs
@ -487,7 +487,7 @@ impl TemplateProcessing {
            .flat_map(|piece| {
                match piece {
                    Piece::Sequence { id, type_id } => {
-                        let i = if *id == Sequence::A { 0 } else { 1 };
+                        let i = usize::from(*id != Sequence::A);
                        let encoding = &mut encodings[i];
                        encoding.set_type_ids(vec![*type_id; encoding.len()]);
                        encoding.set_sequence_id(i);
--- a/tokenizers/src/tokenizer/normalizer.rs
+++ b/tokenizers/src/tokenizer/normalizer.rs
@ -507,7 +507,7 @@ impl NormalizedString {
            let transformations = s
                .chars()
                .enumerate()
-                .map(|(i, c)| (c, if i == 0 { 0 } else { 1 }))
+                .map(|(i, c)| (c, isize::from(i != 0)))
                .chain(std::iter::once((next, 1)));
            self.transform_range(Range::Normalized(0..next.len_utf8()), transformations, 0);
@ -853,16 +853,8 @@ pub fn get_range_of<T: RangeBounds<usize>>(s: &str, range: T) -> Option<&str> {
    } else if start >= len || end > len || start >= end {
        None
    } else {
-        let start_b = s
+        let start_b = s.char_indices().map(|(i, _)| i).nth(start).unwrap_or(0);
-            .char_indices()
+        let end_b = s.char_indices().map(|(i, _)| i).nth(end).unwrap_or(s.len());
            .map(|(i, _)| i)
            .nth(start as usize)
            .unwrap_or(0);
        let end_b = s
            .char_indices()
            .map(|(i, _)| i)
            .nth(end as usize)
            .unwrap_or(s.len());
        Some(&s[start_b..end_b])
    }
 }