mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-03 11:18:29 +00:00
Use train_from_files in benchmarks
This commit is contained in:
@@ -70,7 +70,7 @@ pub fn bench_bert(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
fn bench_train(c: &mut Criterion) {
|
||||
let trainer = WordPieceTrainerBuilder::default()
|
||||
let mut trainer = WordPieceTrainerBuilder::default()
|
||||
.show_progress(false)
|
||||
.build();
|
||||
type Tok = TokenizerImpl<
|
||||
@@ -87,7 +87,7 @@ fn bench_train(c: &mut Criterion) {
|
||||
iter_bench_train(
|
||||
iters,
|
||||
&mut tokenizer,
|
||||
&trainer,
|
||||
&mut trainer,
|
||||
vec!["data/small.txt".to_string()],
|
||||
)
|
||||
})
|
||||
@@ -100,7 +100,7 @@ fn bench_train(c: &mut Criterion) {
|
||||
iter_bench_train(
|
||||
iters,
|
||||
&mut tokenizer,
|
||||
&trainer,
|
||||
&mut trainer,
|
||||
vec!["data/big.txt".to_string()],
|
||||
)
|
||||
})
|
||||
|
||||
@@ -69,7 +69,7 @@ fn bench_gpt2(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
fn bench_train(c: &mut Criterion) {
|
||||
let trainer: TrainerWrapper = BpeTrainerBuilder::default()
|
||||
let mut trainer: TrainerWrapper = BpeTrainerBuilder::default()
|
||||
.show_progress(false)
|
||||
.build()
|
||||
.into();
|
||||
@@ -80,7 +80,7 @@ fn bench_train(c: &mut Criterion) {
|
||||
iter_bench_train(
|
||||
iters,
|
||||
&mut tokenizer,
|
||||
&trainer,
|
||||
&mut trainer,
|
||||
vec!["data/small.txt".to_string()],
|
||||
)
|
||||
})
|
||||
@@ -93,7 +93,7 @@ fn bench_train(c: &mut Criterion) {
|
||||
iter_bench_train(
|
||||
iters,
|
||||
&mut tokenizer,
|
||||
&trainer,
|
||||
&mut trainer,
|
||||
vec!["data/big.txt".to_string()],
|
||||
)
|
||||
})
|
||||
|
||||
@@ -61,7 +61,7 @@ where
|
||||
pub fn iter_bench_train<T, M, N, PT, PP, D>(
|
||||
iters: u64,
|
||||
tokenizer: &mut TokenizerImpl<M, N, PT, PP, D>,
|
||||
trainer: &T,
|
||||
trainer: &mut T,
|
||||
files: Vec<String>,
|
||||
) -> Duration
|
||||
where
|
||||
@@ -75,7 +75,7 @@ where
|
||||
let mut duration = Duration::new(0, 0);
|
||||
for _i in 0..iters {
|
||||
let start = Instant::now();
|
||||
tokenizer.train(trainer, files.clone()).unwrap();
|
||||
tokenizer.train_from_files(trainer, files.clone()).unwrap();
|
||||
duration = duration.checked_add(start.elapsed()).unwrap();
|
||||
}
|
||||
duration
|
||||
|
||||
Reference in New Issue
Block a user