mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-05 20:28:22 +00:00
Improve parallelism tracking and warning
This commit is contained in:
@@ -20,15 +20,19 @@ use pyo3::wrap_pymodule;
|
||||
// we register a callback to be called in the event of a fork so that we can warn the user.
|
||||
static mut REGISTERED_FORK_CALLBACK: bool = false;
|
||||
extern "C" fn child_after_fork() {
|
||||
if !tk::parallelism::is_parallelism_configured() {
|
||||
use tk::parallelism::*;
|
||||
if has_parallelism_been_used() && !is_parallelism_configured() {
|
||||
println!(
|
||||
"The current process just got forked. Disabling parallelism to avoid deadlocks..."
|
||||
"huggingface/tokenizers: The current process just got forked, after parallelism has \
|
||||
already been used. Disabling parallelism to avoid deadlocks..."
|
||||
);
|
||||
println!("To disable this warning, you can either:");
|
||||
println!(
|
||||
"To disable this warning, please explicitly set {}=(true | false)",
|
||||
tk::parallelism::ENV_VARIABLE
|
||||
"\t- Avoid using `tokenizers` before the fork if possible\n\
|
||||
\t- Explicitly set the environment variable {}=(true | false)",
|
||||
ENV_VARIABLE
|
||||
);
|
||||
tk::parallelism::set_parallelism(false);
|
||||
set_parallelism(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,11 +8,19 @@ use rayon_cond::CondIterator;
|
||||
|
||||
pub const ENV_VARIABLE: &str = "TOKENIZERS_PARALLELISM";
|
||||
|
||||
// Reading/Writing this variable should always happen on the main thread
|
||||
static mut USED_PARALLELISM: bool = false;
|
||||
|
||||
/// Check if the TOKENIZERS_PARALLELISM env variable has been explicitly set
|
||||
pub fn is_parallelism_configured() -> bool {
|
||||
std::env::var(ENV_VARIABLE).is_ok()
|
||||
}
|
||||
|
||||
/// Check if at some point we used a parallel iterator
|
||||
pub fn has_parallelism_been_used() -> bool {
|
||||
unsafe { USED_PARALLELISM }
|
||||
}
|
||||
|
||||
/// Get the currently set value for `TOKENIZERS_PARALLELISM` env variable
|
||||
pub fn get_parallelism() -> bool {
|
||||
match std::env::var(ENV_VARIABLE) {
|
||||
@@ -60,7 +68,11 @@ where
|
||||
S: Iterator<Item = P::Item>,
|
||||
{
|
||||
fn into_maybe_par_iter(self) -> CondIterator<P, S> {
|
||||
CondIterator::new(self, get_parallelism())
|
||||
let parallelism = get_parallelism();
|
||||
if parallelism {
|
||||
unsafe { USED_PARALLELISM = true };
|
||||
}
|
||||
CondIterator::new(self, parallelism)
|
||||
}
|
||||
|
||||
fn into_maybe_par_iter_cond(self, cond: bool) -> CondIterator<P, S> {
|
||||
@@ -147,6 +159,7 @@ where
|
||||
let iter = CondIterator::from_serial(self);
|
||||
|
||||
if get_parallelism() {
|
||||
unsafe { USED_PARALLELISM = true };
|
||||
CondIterator::from_parallel(iter.into_parallel().right().unwrap())
|
||||
} else {
|
||||
iter
|
||||
|
||||
Reference in New Issue
Block a user