generate kana

This commit is contained in:
mii
2023-04-23 14:39:22 +09:00
commit f1cc9e9dbb
6 changed files with 679 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
cmudict-0.7b_baseform

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "pronunciation"
version = "0.1.0"

12
Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "pronunciation"
version = "0.1.0"
edition = "2021"
[lib]
name = "pronunciation"
path = "src/lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

10
examples/main.rs Normal file
View File

@ -0,0 +1,10 @@
use pronunciation::pronunciation::Pronunciation;
fn main() {
let pronunciation = Pronunciation::new("cmudict-0.7b_baseform");
let word = "pronunciation";
println!("{}", pronunciation.get_kana(word.to_string()));
}

1
src/lib.rs Normal file
View File

@ -0,0 +1 @@
pub mod pronunciation;

647
src/pronunciation.rs Normal file
View File

@ -0,0 +1,647 @@
use std::{collections::HashMap, fs::File, io::{BufReader, BufRead}};
macro_rules! cm {
($($k:expr => $v:expr),* $(,)?) => {{
core::convert::From::from([$((String::from($k), $v),)*])
}};
}
macro_rules! c {
($($k:expr => $v:expr),* $(,)?) => {{
core::convert::From::from([$((String::from($k), String::from($v)),)*])
}};
}
macro_rules! svec {
($($k:expr),* $(,)?) => {{
vec![$(String::from($k),)*]
}};
}
pub struct Pronunciation {
pub pronunciation_map: HashMap<String, Vec<String>>,
vowels: Vec<String>,
dictionary: HashMap<String, HashMap<String, String>>
}
impl Pronunciation {
pub fn get_kana(&self, word: String) -> String {
let pronunciation = self.pronunciation_map.get(&word.to_uppercase()).unwrap();
let mut kana = String::default();
let mut bef: Option<String> = None;
for (i, phoneme) in pronunciation.iter().enumerate() {
let m = bef.clone().unwrap_or(String::default());
if m == String::from("") && self.vowels.contains(pronunciation.get(i + 1).unwrap_or(&String::default())) {
bef = Some(phoneme.clone());
continue;
}
let kanas = if m == String::from("") {
self.dictionary[phoneme][&String::default()].clone()
} else {
self.dictionary[&m][phoneme].clone()
};
bef = None;
kana += &kanas;
println!("{}:{}:{}", m, phoneme, kanas);
}
println!("{}: {}", word, kana);
kana
}
pub fn new(dict_file: &str) -> Self {
let file = File::open(dict_file).unwrap();
let reader = BufReader::new(file);
let mut pronunciation_map: HashMap<String, Vec<String>> = HashMap::new();
for line in reader.lines() {
if let Ok(line) = line {
let t: Vec<String> = line.split_whitespace().map(str::to_string).collect();
pronunciation_map.insert(t[0].clone(), t[1..].to_vec());
}
}
let vowels = svec!["AA","AH","AE","AW","AY","ER","IY","IH","UH","UW","EH","EY","AO","OW","OY"];
let dictionary: HashMap<String, HashMap<String, String>> = cm! {
"ZH" => c! {
"AA" => "ジャ",
"AH" => "ジョ",
"AE" => "ジャ",
"AW" => "ジャ",
"AY" => "ジャイ",
"ER" => "ジェ",
"IY" => "",
"IH" => "",
"UH" => "ジュ",
"UW" => "ジュ",
"EH" => "ジェ",
"EY" => "ジェ",
"AO" => "ジョ",
"OW" => "ジョ",
"OY" => "ジョ",
"" => "ジュ"
},
"DH" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ザイ",
"ER" => "ザー",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"W" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ワイ",
"ER" => "ウィ",
"IY" => "ウィ",
"IH" => "ウィ",
"UH" => "",
"UW" => "",
"EH" => "ウェ",
"EY" => "ウェ",
"AO" => "ウォ",
"OW" => "ウォ",
"OY" => "ウォ",
"" => ""
},
"NG" => c! {
"AA" => "ンガ",
"AH" => "ンガ",
"AE" => "ンガ",
"AW" => "ンガ",
"AY" => "ンガイ",
"ER" => "ンギ",
"IY" => "ンギ",
"IH" => "ンギ",
"UH" => "ング",
"UW" => "ング",
"EH" => "ンゲ",
"EY" => "ンゲ",
"AO" => "ンゴ",
"OW" => "ンゴ",
"OY" => "ンゴ",
"" => "ング"
},
"Y" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "アイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"TH" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "サイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"G" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ガイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"CH" => c! {
"AA" => "チャ",
"AH" => "チャ",
"AE" => "チャ",
"AW" => "チャ",
"AY" => "チャイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "チュ",
"UW" => "チュ",
"EH" => "チェ",
"EY" => "チェ",
"AO" => "チョ",
"OW" => "チョ",
"OY" => "チョ",
"" => ""
},
"D" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ダイ",
"ER" => "ダー",
"IY" => "ディ",
"IH" => "ディ",
"UH" => "ドゥ",
"UW" => "ドゥ",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"B" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "バウ",
"AY" => "バイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"SH" => c! {
"AA" => "シャ",
"AH" => "ショ",
"AE" => "シャ",
"AW" => "シャ",
"AY" => "シャイ",
"ER" => "",
"IY" => "シー",
"IH" => "",
"UH" => "シュ",
"UW" => "シュ",
"EH" => "シェ",
"EY" => "シェ",
"AO" => "ショ",
"OW" => "ショ",
"OY" => "ショ",
"" => ""
},
"F" => c! {
"AA" => "ファ",
"AH" => "ファ",
"AE" => "ファ",
"AW" => "ファ",
"AY" => "ファイ",
"ER" => "フィ",
"IY" => "フィ",
"IH" => "フィ",
"UH" => "",
"UW" => "",
"EH" => "フェ",
"EY" => "フェ",
"AO" => "フォ",
"OW" => "フォ",
"OY" => "フォ",
"" => ""
},
"K" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "カイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"M" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "マウ",
"AY" => "マイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"R" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ライ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"V" => c! {
"AA" => "",
"AH" => "",
"AE" => "ヴァ",
"AW" => "",
"AY" => "バイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"Z" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ザイ",
"ER" => "ザー",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"N" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ナイ",
"ER" => "",
"IY" => "ニー",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"P" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "パイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"JH" => c! {
"AA" => "ジャ",
"AH" => "ジャ",
"AE" => "ジャ",
"AW" => "ジャ",
"AY" => "ジャイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "ジュ",
"UW" => "ジュ",
"EH" => "ジェ",
"EY" => "ジェ",
"AO" => "ジョ",
"OW" => "ジョ",
"OY" => "ジョ",
"" => ""
},
"L" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "ライ",
"ER" => "ラー",
"IY" => "リー",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "ロー",
"OY" => "",
"" => ""
},
"HH" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "ハウ",
"AY" => "ハイ",
"ER" => "ハリ",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"S" => c! {
"AA" => "",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "サイ",
"ER" => "",
"IY" => "",
"IH" => "",
"UH" => "",
"UW" => "",
"EH" => "",
"EY" => "セイ",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"T" => c! {
"AA" => "トッ",
"AH" => "",
"AE" => "",
"AW" => "",
"AY" => "タイ",
"ER" => "",
"IY" => "ティ",
"IH" => "ティ",
"UH" => "チュ",
"UW" => "チュ",
"EH" => "",
"EY" => "",
"AO" => "",
"OW" => "",
"OY" => "",
"" => ""
},
"EY" => c! {
"" => "エイ"
},
"AW" => c! {
"" => "オウ"
},
"AA" => c! {
"" => ""
},
"AH" => c! {
"ER" => "アエル",
"OW" => "アッアウ",
"" => ""
},
"IH" => c! {
"" => ""
},
"EH" => c! {
"OW" => "エオ",
"" => ""
},
"AE" => c! {
"" => ""
},
"OW" => c! {
"AH" => "オア",
"IH" => "オウィ",
"ER" => "オウェ",
"EH" => "オエ",
"" => ""
},
"IY" => c! {
"AA" => "イア",
"IY" => "イイ",
"EY" => "イー",
"AH" => "イア",
"AE" => "イア",
"AO" => "イオ",
"ER" => "アイヤー",
"EH" => "イエ",
"IH" => "",
"UW" => "イウ",
"OW" => "イオ",
"" => "イー"
},
"AY" => c! {
"AH" => "アイア",
"AA" => "アイ",
"AW" => "アイオウ",
"AE" => "アイェ",
"ER" => "アイア",
"IH" => "アイイ",
"EH" => "アイ",
"IY" => "ウイェ",
"UW" => "アユ",
"OW" => "アイオ",
"EY" => "ウイェ",
"" => "アイ"
},
"ER" => c! {
"AA" => "",
"AY" => "アライ",
"AH" => "",
"AE" => "アラ",
"AW" => "アラウ",
"AO" => "アロ",
"EY" => "アレイ",
"ER" => "アー",
"EH" => "オレ",
"UH" => "オロウ",
"OW" => "アロ",
"OY" => "アロイ",
"UW" => "ウル",
"IH" => "エリ",
"IY" => "エリ",
"" => "アー"
},
"AO" => c! {
"EH" => "アオエ",
"" => ""
},
"EY" => c! {
"AA" => "エイアー",
"AH" => "エイ",
"ER" => "エアー",
"EY" => "アー",
"IY" => "エイ",
"EH" => "エイ",
"AO" => "エイオ",
"OW" => "アオ",
"AW" => "アヨウ",
"" => "エイ"
},
"OY" => c! {
"ER" => "オイヤー",
"OW" => "オヨ",
"IH" => "オイエ",
"" => "オイ"
},
"UW" => c! {
"AA" => "ウア",
"AH" => "ウー",
"ER" => "ウアー",
"EY" => "ウエ",
"IY" => "ウイ",
"IH" => "ウエ",
"" => ""
},
"AA" => c! {
"UW" => "オウ",
"IY" => "アイ",
"" => "アー"
},
"AW" => c! {
"AH" => "アウア",
"IY" => "アオイ",
"UW" => "アオウ",
"ER" => "アワー",
"IH" => "アウィ",
"" => "オウ"
},
"UH" => c! {
"AH" => "ウー",
"" => ""
},
"OW" => c! {
"AA" => "オア",
"AO" => "オウォ",
"AH" => "オア",
"AE" => "オエ",
"IY" => "オイ",
"IH" => "オーウィ",
"UH" => "オウ",
"EY" => "オウエイ",
"EH" => "オフエ",
"" => "オー"
},
};
Self {
pronunciation_map,
vowels,
dictionary
}
}
}