formatting

This commit is contained in:
epwalsh
2019-12-19 15:07:27 -08:00
parent a16daa78f1
commit 69212e17e9
3 changed files with 12 additions and 4 deletions

View File

@ -202,6 +202,7 @@ mod tests {
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
#[test] #[test]
// Ensure `BPE::from_files` works as expected.
fn test_bpe_from_files() { fn test_bpe_from_files() {
// Set up vocab file. // Set up vocab file.
let mut vocab_file = NamedTempFile::new().unwrap(); let mut vocab_file = NamedTempFile::new().unwrap();
@ -224,6 +225,7 @@ mod tests {
} }
#[test] #[test]
// Ensure `MergeTokenOutOfVocabulary` error is returned when it should be.
fn test_bpe_from_files_merge_token_oov() { fn test_bpe_from_files_merge_token_oov() {
// Set up vocab file. // Set up vocab file.
let mut vocab_file = NamedTempFile::new().unwrap(); let mut vocab_file = NamedTempFile::new().unwrap();
@ -253,6 +255,8 @@ mod tests {
} }
#[test] #[test]
// Ensure `BadMerges` error is returned when there is an invalid line in the
// merges.txt file.
fn test_bpe_from_files_bad_merges() { fn test_bpe_from_files_bad_merges() {
// Set up vocab file. // Set up vocab file.
let mut vocab_file = NamedTempFile::new().unwrap(); let mut vocab_file = NamedTempFile::new().unwrap();

View File

@ -13,14 +13,15 @@ use std::{
}; };
pub struct BpeTrainerConfig { pub struct BpeTrainerConfig {
vocab_size: usize,
min_frequency: u32, min_frequency: u32,
vocab_size: usize,
} }
impl BpeTrainerConfig { impl BpeTrainerConfig {
pub fn new(min_frequency: u32, vocab_size: usize) -> Self { pub fn new(min_frequency: u32, vocab_size: usize) -> Self {
BpeTrainerConfig { BpeTrainerConfig {
vocab_size,
min_frequency, min_frequency,
vocab_size,
} }
} }
@ -32,12 +33,14 @@ impl BpeTrainerConfig {
self.min_frequency = value; self.min_frequency = value;
} }
} }
impl Default for BpeTrainerConfig { impl Default for BpeTrainerConfig {
fn default() -> Self { fn default() -> Self {
BpeTrainerConfig::new(0, 30000) BpeTrainerConfig::new(0, 30000)
} }
} }
#[derive(Default)]
pub struct BpeTrainer { pub struct BpeTrainer {
// Training parameters // Training parameters
config: BpeTrainerConfig, config: BpeTrainerConfig,

View File

@ -5,6 +5,7 @@ pub struct Word {
chars: Vec<u32>, chars: Vec<u32>,
sizes: Vec<usize>, sizes: Vec<usize>,
} }
impl Word { impl Word {
pub fn new() -> Self { pub fn new() -> Self {
Word { Word {
@ -109,8 +110,8 @@ mod tests {
// training. This merge affects the counts for the pairs // training. This merge affects the counts for the pairs
// ('e', 'l') ~= (1, 2), // ('e', 'l') ~= (1, 2),
// ('e', 'll') ~= (1, 4), // ('e', 'll') ~= (1, 4),
// ('ll', 'o') ~= (4, 3), and // ('l', 'o') ~= (2, 3), and
// ('l', 'o') ~= (2, 3). // ('ll', 'o') ~= (4, 3).
// So the changes should reflect that: // So the changes should reflect that:
assert_eq!( assert_eq!(
changes, changes,