formatting

This commit is contained in:
epwalsh
2019-12-19 15:07:27 -08:00
parent a16daa78f1
commit 69212e17e9
3 changed files with 12 additions and 4 deletions

View File

@ -202,6 +202,7 @@ mod tests {
use tempfile::NamedTempFile;
#[test]
// Ensure `BPE::from_files` works as expected.
fn test_bpe_from_files() {
// Set up vocab file.
let mut vocab_file = NamedTempFile::new().unwrap();
@ -224,6 +225,7 @@ mod tests {
}
#[test]
// Ensure `MergeTokenOutOfVocabulary` error is returned when it should be.
fn test_bpe_from_files_merge_token_oov() {
// Set up vocab file.
let mut vocab_file = NamedTempFile::new().unwrap();
@ -253,6 +255,8 @@ mod tests {
}
#[test]
// Ensure `BadMerges` error is returned when there is an invalid line in the
// merges.txt file.
fn test_bpe_from_files_bad_merges() {
// Set up vocab file.
let mut vocab_file = NamedTempFile::new().unwrap();

View File

@ -13,14 +13,15 @@ use std::{
};
pub struct BpeTrainerConfig {
vocab_size: usize,
min_frequency: u32,
vocab_size: usize,
}
impl BpeTrainerConfig {
pub fn new(min_frequency: u32, vocab_size: usize) -> Self {
BpeTrainerConfig {
vocab_size,
min_frequency,
vocab_size,
}
}
@ -32,12 +33,14 @@ impl BpeTrainerConfig {
self.min_frequency = value;
}
}
impl Default for BpeTrainerConfig {
fn default() -> Self {
BpeTrainerConfig::new(0, 30000)
}
}
#[derive(Default)]
pub struct BpeTrainer {
// Training parameters
config: BpeTrainerConfig,

View File

@ -5,6 +5,7 @@ pub struct Word {
chars: Vec<u32>,
sizes: Vec<usize>,
}
impl Word {
pub fn new() -> Self {
Word {
@ -109,8 +110,8 @@ mod tests {
// training. This merge affects the counts for the pairs
// ('e', 'l') ~= (1, 2),
// ('e', 'll') ~= (1, 4),
// ('ll', 'o') ~= (4, 3), and
// ('l', 'o') ~= (2, 3).
// ('l', 'o') ~= (2, 3), and
// ('ll', 'o') ~= (4, 3).
// So the changes should reflect that:
assert_eq!(
changes,