From fc35e82e992ff5082bcddac6d70848260c32ca23 Mon Sep 17 00:00:00 2001 From: mii Date: Sun, 23 Apr 2023 18:07:27 +0900 Subject: [PATCH] roma --- Cargo.lock | 41 +++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + examples/main.rs | 2 +- src/pronunciation.rs | 21 +++++++++++++++------ 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 298fe97..3c07263 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,47 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "pronunciation" version = "0.1.0" +dependencies = [ + "wana_kana", +] + +[[package]] +name = "wana_kana" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "477976a5c56fb7b014795df5a2ce08d2de8bcd4d5980844c5bd3978a7fd1c30b" +dependencies = [ + "fnv", + "itertools", + "lazy_static", +] diff --git a/Cargo.toml b/Cargo.toml index 080e09a..95dc88f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +wana_kana = "3.0.0" \ No newline at end of file diff --git a/examples/main.rs b/examples/main.rs index 30f62df..253b8ac 100644 --- a/examples/main.rs +++ b/examples/main.rs @@ -4,7 +4,7 @@ use pronunciation::pronunciation::Pronunciation; fn main() { let pronunciation = Pronunciation::new("cmudict-0.7b_baseform"); - let word = "pronunciation"; + let word = "valo"; println!("{}", pronunciation.get_kana(word.to_string())); } diff --git a/src/pronunciation.rs b/src/pronunciation.rs index 1cb480f..0374c4c 100644 --- a/src/pronunciation.rs +++ b/src/pronunciation.rs @@ -1,5 +1,7 @@ use std::{collections::HashMap, fs::File, io::{BufReader, BufRead}}; +use wana_kana::ConvertJapanese; + macro_rules! cm { ($($k:expr => $v:expr),* $(,)?) => {{ core::convert::From::from([$((String::from($k), $v),)*]) @@ -25,14 +27,13 @@ pub struct Pronunciation { } impl Pronunciation { - pub fn get_kana(&self, word: String) -> String { - let pronunciation = self.pronunciation_map.get(&word.to_uppercase()).unwrap(); + pub fn phoneme_to_kana(&self, phonemes: &Vec) -> String { let mut kana = String::default(); let mut bef: Option = None; - for (i, phoneme) in pronunciation.iter().enumerate() { + for (i, phoneme) in phonemes.iter().enumerate() { let m = bef.clone().unwrap_or(String::default()); - if m == String::from("") && self.vowels.contains(pronunciation.get(i + 1).unwrap_or(&String::default())) { + if m == String::from("") && self.vowels.contains(phonemes.get(i + 1).unwrap_or(&String::default())) { bef = Some(phoneme.clone()); continue; } @@ -45,12 +46,20 @@ impl Pronunciation { bef = None; kana += &kanas; - println!("{}:{}:{}", m, phoneme, kanas); + //println!("{}:{}:{}", m, phoneme, kanas); } - println!("{}: {}", word, kana); + //println!("{}: {}", word, kana); kana } + pub fn get_kana(&self, word: String) -> String { + if let Some(phonemes) = self.pronunciation_map.get(&word.to_uppercase()) { + self.phoneme_to_kana(phonemes) + } else { + word.to_kana().to_katakana() + } + } + pub fn new(dict_file: &str) -> Self { let file = File::open(dict_file).unwrap(); let reader = BufReader::new(file);