diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index ca6eb48d..24b6e796 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -456,14 +456,14 @@ dependencies = [ "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.44 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.11 (git+https://github.com/n1t0/unicode-normalization)", + "unicode-normalization-alignments 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", "unicode_categories 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "unicode-normalization" -version = "0.1.11" -source = "git+https://github.com/n1t0/unicode-normalization#894053d92493c55c89fe9b188c0fb2babaa9a84c" +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -570,7 +570,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)" = "dff0acdb207ae2fe6d5976617f887eb1e35a2ba52c13c7234c790960cdad9238" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -"checksum unicode-normalization 0.1.11 (git+https://github.com/n1t0/unicode-normalization)" = "" +"checksum unicode-normalization-alignments 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" "checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" "checksum unicode_categories 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" diff --git a/tokenizers/Cargo.toml b/tokenizers/Cargo.toml index bbf57233..27bd5323 100644 --- a/tokenizers/Cargo.toml +++ b/tokenizers/Cargo.toml @@ -19,7 +19,7 @@ regex-syntax = "0.6.12" rayon = "1.2.0" serde_json = "1.0" clap = "2.33.0" -unicode-normalization = { git = "https://github.com/n1t0/unicode-normalization" } +unicode-normalization-alignments = "0.1.12" unicode_categories = "0.1.1" [dev-dependencies] diff --git a/tokenizers/src/tokenizer/normalizer.rs b/tokenizers/src/tokenizer/normalizer.rs index e67dadbf..1adea003 100644 --- a/tokenizers/src/tokenizer/normalizer.rs +++ b/tokenizers/src/tokenizer/normalizer.rs @@ -1,6 +1,6 @@ use super::Result; use std::cmp::Ordering; -use unicode_normalization::UnicodeNormalization; +use unicode_normalization_alignments::UnicodeNormalization; /// A Normalizer takes care of pre-processing strings pub trait Normalizer {