Use train_from_files in benchmarks

This commit is contained in:
Anthony MOI
2020-11-25 17:25:49 -05:00
committed by Anthony MOI
parent 999067454d
commit 06f6ba3fce
3 changed files with 8 additions and 8 deletions

View File

@@ -70,7 +70,7 @@ pub fn bench_bert(c: &mut Criterion) {
} }
fn bench_train(c: &mut Criterion) { fn bench_train(c: &mut Criterion) {
let trainer = WordPieceTrainerBuilder::default() let mut trainer = WordPieceTrainerBuilder::default()
.show_progress(false) .show_progress(false)
.build(); .build();
type Tok = TokenizerImpl< type Tok = TokenizerImpl<
@@ -87,7 +87,7 @@ fn bench_train(c: &mut Criterion) {
iter_bench_train( iter_bench_train(
iters, iters,
&mut tokenizer, &mut tokenizer,
&trainer, &mut trainer,
vec!["data/small.txt".to_string()], vec!["data/small.txt".to_string()],
) )
}) })
@@ -100,7 +100,7 @@ fn bench_train(c: &mut Criterion) {
iter_bench_train( iter_bench_train(
iters, iters,
&mut tokenizer, &mut tokenizer,
&trainer, &mut trainer,
vec!["data/big.txt".to_string()], vec!["data/big.txt".to_string()],
) )
}) })

View File

@@ -69,7 +69,7 @@ fn bench_gpt2(c: &mut Criterion) {
} }
fn bench_train(c: &mut Criterion) { fn bench_train(c: &mut Criterion) {
let trainer: TrainerWrapper = BpeTrainerBuilder::default() let mut trainer: TrainerWrapper = BpeTrainerBuilder::default()
.show_progress(false) .show_progress(false)
.build() .build()
.into(); .into();
@@ -80,7 +80,7 @@ fn bench_train(c: &mut Criterion) {
iter_bench_train( iter_bench_train(
iters, iters,
&mut tokenizer, &mut tokenizer,
&trainer, &mut trainer,
vec!["data/small.txt".to_string()], vec!["data/small.txt".to_string()],
) )
}) })
@@ -93,7 +93,7 @@ fn bench_train(c: &mut Criterion) {
iter_bench_train( iter_bench_train(
iters, iters,
&mut tokenizer, &mut tokenizer,
&trainer, &mut trainer,
vec!["data/big.txt".to_string()], vec!["data/big.txt".to_string()],
) )
}) })

View File

@@ -61,7 +61,7 @@ where
pub fn iter_bench_train<T, M, N, PT, PP, D>( pub fn iter_bench_train<T, M, N, PT, PP, D>(
iters: u64, iters: u64,
tokenizer: &mut TokenizerImpl<M, N, PT, PP, D>, tokenizer: &mut TokenizerImpl<M, N, PT, PP, D>,
trainer: &T, trainer: &mut T,
files: Vec<String>, files: Vec<String>,
) -> Duration ) -> Duration
where where
@@ -75,7 +75,7 @@ where
let mut duration = Duration::new(0, 0); let mut duration = Duration::new(0, 0);
for _i in 0..iters { for _i in 0..iters {
let start = Instant::now(); let start = Instant::now();
tokenizer.train(trainer, files.clone()).unwrap(); tokenizer.train_from_files(trainer, files.clone()).unwrap();
duration = duration.checked_add(start.elapsed()).unwrap(); duration = duration.checked_add(start.elapsed()).unwrap();
} }
duration duration