Preparing release. (#1355)

* Preparing release.

* Fix new clippy
This commit is contained in:
Nicolas Patry
2023-10-06 12:56:36 +02:00
committed by GitHub
parent aed491df8c
commit 4322056e6e
7 changed files with 27 additions and 27 deletions

View File

@ -2,7 +2,7 @@
authors = ["Nicolas Patry <nicolas@huggingface.co>"] authors = ["Nicolas Patry <nicolas@huggingface.co>"]
edition = "2021" edition = "2021"
name = "node" name = "node"
version = "0.14.1-dev.0" version = "0.14.2-dev.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -175,7 +175,7 @@ export class Encoding {
getSequenceIds(): Array<number | undefined | null> getSequenceIds(): Array<number | undefined | null>
tokenToSequence(token: number): number | null tokenToSequence(token: number): number | null
} }
export class Model { } export class Model {}
export type Bpe = BPE export type Bpe = BPE
export class BPE { export class BPE {
static empty(): Model static empty(): Model
@ -204,7 +204,7 @@ export class Normalizer {
export class PreTokenizer { export class PreTokenizer {
preTokenizeString(sequence: string): [string, [number, number]][] preTokenizeString(sequence: string): [string, [number, number]][]
} }
export class Processor { } export class Processor {}
export class AddedToken { export class AddedToken {
constructor(token: string, isSpecial: boolean, options?: AddedTokenOptions | undefined | null) constructor(token: string, isSpecial: boolean, options?: AddedTokenOptions | undefined | null)
getContent(): string getContent(): string
@ -229,7 +229,6 @@ export class Tokenizer {
decodeBatch(ids: Array<Array<number>>, skipSpecialTokens: boolean): Promise<string[]> decodeBatch(ids: Array<Array<number>>, skipSpecialTokens: boolean): Promise<string[]>
static fromString(s: string): Tokenizer static fromString(s: string): Tokenizer
static fromFile(file: string): Tokenizer static fromFile(file: string): Tokenizer
// static fromPretrained(file: string, parameters?: JsFromPretrainedParameters | undefined | null): Tokenizer
addSpecialTokens(tokens: Array<string>): void addSpecialTokens(tokens: Array<string>): void
setTruncation(maxLength: number, options?: TruncationOptions | undefined | null): void setTruncation(maxLength: number, options?: TruncationOptions | undefined | null): void
disableTruncation(): void disableTruncation(): void
@ -251,4 +250,4 @@ export class Tokenizer {
addSpecialTokens?: boolean | undefined | null, addSpecialTokens?: boolean | undefined | null,
): Encoding ): Encoding
} }
export class Trainer { } export class Trainer {}

View File

@ -1,6 +1,6 @@
[package] [package]
name = "tokenizers-python" name = "tokenizers-python"
version = "0.14.1-dev.0" version = "0.14.2-dev.0"
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"] authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
edition = "2021" edition = "2021"
@ -21,7 +21,7 @@ onig = { version = "6.4", default-features = false }
itertools = "0.11" itertools = "0.11"
[dependencies.tokenizers] [dependencies.tokenizers]
version = "0.14.1-dev.0" version = "0.14.2-dev.0"
path = "../../tokenizers" path = "../../tokenizers"
[dev-dependencies] [dev-dependencies]

View File

@ -2,7 +2,7 @@
authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"] authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
edition = "2018" edition = "2018"
name = "tokenizers" name = "tokenizers"
version = "0.14.1-dev.0" version = "0.14.2-dev.0"
homepage = "https://github.com/huggingface/tokenizers" homepage = "https://github.com/huggingface/tokenizers"
repository = "https://github.com/huggingface/tokenizers" repository = "https://github.com/huggingface/tokenizers"
documentation = "https://docs.rs/tokenizers/" documentation = "https://docs.rs/tokenizers/"

View File

@ -21,17 +21,17 @@ impl PartialEq for Merge {
} }
impl PartialOrd for Merge { impl PartialOrd for Merge {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if self.count != other.count { Some(self.cmp(other))
Some(self.count.cmp(&other.count))
} else {
// Here we want ascending order
Some(other.pair.cmp(&self.pair))
}
} }
} }
impl Ord for Merge { impl Ord for Merge {
fn cmp(&self, other: &Self) -> Ordering { fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap() if self.count != other.count {
self.count.cmp(&other.count)
} else {
// Here we want ascending order
other.pair.cmp(&self.pair)
}
} }
} }
@ -533,15 +533,16 @@ impl BpeTrainer {
let changes = top let changes = top
.pos .pos
.maybe_par_iter() .maybe_par_iter()
.flat_map(|i| { .flat_map(|&i| {
let w = &words[*i] as *const _ as *mut _; let word = &words[i] as *const _ as *mut Word;
// We can merge each of these words in parallel here because each position // We can merge each of these words in parallel here because each position
// can be there only once (HashSet). So this is safe. // can be there only once (HashSet). So this is safe.
unsafe { unsafe {
let word: &mut Word = &mut (*w); // let word: &mut Word = &mut (*word);
word.merge(top.pair.0, top.pair.1, new_token_id, max_token_length) (*word)
.merge(top.pair.0, top.pair.1, new_token_id, max_token_length)
.into_iter() .into_iter()
.map(|c| (c, *i)) .map(|c| (c, i))
.collect::<Vec<_>>() .collect::<Vec<_>>()
} }
}) })

View File

@ -20,17 +20,17 @@ impl PartialOrd for Merge {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
// By manually implementing this, we make the containing BinaryHeap a // By manually implementing this, we make the containing BinaryHeap a
// min-heap ordered first on the rank, and the pos otherwise // min-heap ordered first on the rank, and the pos otherwise
if self.rank != other.rank { Some(self.cmp(other))
Some(other.rank.cmp(&self.rank))
} else {
Some(other.pos.cmp(&self.pos))
}
} }
} }
impl Ord for Merge { impl Ord for Merge {
fn cmp(&self, other: &Self) -> Ordering { fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap() if self.rank != other.rank {
other.rank.cmp(&self.rank)
} else {
other.pos.cmp(&self.pos)
}
} }
} }

View File

@ -25,7 +25,7 @@ impl<Label: Eq + Hash + Copy> Trie<Label> {
pub fn push(&mut self, element: &[Label]) { pub fn push(&mut self, element: &[Label]) {
let mut node = &mut self.root; let mut node = &mut self.root;
for label in element.iter() { for label in element.iter() {
node = node.children.entry(*label).or_insert_with(Node::default); node = node.children.entry(*label).or_default();
} }
node.is_leaf = true; node.is_leaf = true;
} }