mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-18 06:19:14 +00:00
* New PR to fix #270 (not #157). Reduce drastically the number of required compilation flags. I think it's good enough for merge right now. We disable progress altogether when the `progressbar` flag is disabled which is perfectly fine compared to not being able to build. Future PR could include. - Better encapsulation of `progress` in training call sites (less direct calls to `indicatif` and common code for `setup_progress`, `finalize` and so on. - We can have a raw `print` Progress bar when compilation flag is disabled ? - Having better control of progressbars in bindings would require use to change a bunch of code around which might be overkill in the short term. Either we start by defining a trait for our ProgressBar, and the bindings can implement the traits with custom `tqdm` and `cli-progress` (It's not even 100% sure it's doable) - The easiest way would be to enable some sort of iterator in Rust so that calling of progressbars can happen in client code which would be the most lenient for all plateforms. The hard part is that leveraging parallelism in that setting would be hard probably. * Remove external visibility of progressbar. * Remove dead import.
This commit is contained in:
4
bindings/node/native/Cargo.lock
generated
4
bindings/node/native/Cargo.lock
generated
@@ -305,9 +305,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "indicatif"
|
||||
version = "0.14.0"
|
||||
version = "0.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49a68371cf417889c9d7f98235b7102ea7c54fc59bcbd22f3dea785be9d27e40"
|
||||
checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4"
|
||||
dependencies = [
|
||||
"console",
|
||||
"lazy_static",
|
||||
|
||||
720
bindings/python/Cargo.lock
generated
720
bindings/python/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -47,13 +47,17 @@ clap = "2.33"
|
||||
unicode-normalization-alignments = "0.1"
|
||||
unicode_categories = "0.1"
|
||||
unicode-segmentation = "1.6"
|
||||
indicatif = "0.14"
|
||||
indicatif = {version = "0.15", optional = true}
|
||||
itertools = "0.9"
|
||||
log = "0.4"
|
||||
esaxx-rs = "0.1"
|
||||
derive_builder = "0.9"
|
||||
spm_precompiled = "0.1"
|
||||
|
||||
[features]
|
||||
default = ["progressbar"]
|
||||
progressbar = ["indicatif"]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
tempfile = "3.1"
|
||||
|
||||
@@ -113,3 +113,8 @@ fn main() -> Result<()> {
|
||||
by the total number of core/threads your CPU provides but this can be tuned by setting the `RAYON_RS_NUM_CPUS`
|
||||
environment variable. As an example setting `RAYON_RS_NUM_CPUS=4` will allocate a maximum of 4 threads.
|
||||
**_Please note this behavior may evolve in the future_**
|
||||
|
||||
## Features
|
||||
**progressbar**: The progress bar visualization is enabled by default. It might be disabled if
|
||||
compilation for certain targets is not supported by the [termios](https://crates.io/crates/termios)
|
||||
dependency of the [indicatif](https://crates.io/crates/indicatif) progress bar.
|
||||
|
||||
@@ -100,6 +100,11 @@
|
||||
//! by the total number of core/threads your CPU provides but this can be tuned by setting the `RAYON_RS_NUM_CPUS`
|
||||
//! environment variable. As an example setting `RAYON_RS_NUM_CPUS=4` will allocate a maximum of 4 threads.
|
||||
//! **_Please note this behavior may evolve in the future_**
|
||||
//!
|
||||
//! # Features
|
||||
//! **progressbar**: The progress bar visualization is enabled by default. It might be disabled if
|
||||
//! compilation for certain targets is not supported by the [termios](https://crates.io/crates/termios)
|
||||
//! dependency of the [indicatif](https://crates.io/crates/indicatif) progress bar.
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use super::{Pair, WithFirstLastIterator, Word, BPE};
|
||||
use crate::parallelism::*;
|
||||
use crate::tokenizer::{AddedToken, Result, Trainer};
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use crate::utils::progress::{ProgressBar, ProgressStyle};
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet};
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::models::unigram::{lattice::Lattice, model::Unigram};
|
||||
use crate::tokenizer::{AddedToken, Result, Trainer};
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use crate::utils::progress::{ProgressBar, ProgressStyle};
|
||||
use log::debug;
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
@@ -19,12 +19,12 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::export::Formatter;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::utils::parallelism::*;
|
||||
use crate::utils::progress::{ProgressBar, ProgressStyle};
|
||||
|
||||
mod added_vocabulary;
|
||||
mod encoding;
|
||||
|
||||
@@ -2,6 +2,7 @@ pub mod cache;
|
||||
pub mod iter;
|
||||
pub mod padding;
|
||||
pub mod parallelism;
|
||||
pub mod progress;
|
||||
pub mod truncation;
|
||||
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
32
tokenizers/src/utils/progress.rs
Normal file
32
tokenizers/src/utils/progress.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
#[cfg(feature = "progressbar")]
|
||||
pub(crate) use indicatif::{ProgressBar, ProgressStyle};
|
||||
|
||||
#[cfg(not(feature = "progressbar"))]
|
||||
mod progressbar {
|
||||
pub struct ProgressBar;
|
||||
impl ProgressBar {
|
||||
pub fn new(_length: u64) -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
pub fn set_length(&self, _length: u64) {}
|
||||
pub fn set_draw_delta(&self, _draw_delta: u64) {}
|
||||
pub fn set_message(&self, _message: &str) {}
|
||||
pub fn finish(&self) {}
|
||||
pub fn reset(&self) {}
|
||||
pub fn inc(&self, _inc: u64) {}
|
||||
pub fn set_style(&self, _style: ProgressStyle) {}
|
||||
}
|
||||
|
||||
pub struct ProgressStyle {}
|
||||
impl ProgressStyle {
|
||||
pub fn default_bar() -> Self {
|
||||
Self {}
|
||||
}
|
||||
pub fn template(self, _template: &str) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(not(feature = "progressbar"))]
|
||||
pub(crate) use progressbar::{ProgressBar, ProgressStyle};
|
||||
Reference in New Issue
Block a user