Files
akaza/comb.py
Tokuhiro Matsuno e66521d7eb logging
2020-09-03 07:52:31 +09:00

51 lines
1.4 KiB
Python

import romkan
import re
import sys
def parse_skkdict(path, encoding='euc-jp'):
result = {}
with open(path, 'r', encoding=encoding) as fp:
for line in fp:
if line.startswith(';;'):
continue
m = line.strip().split(' ', 1)
yomi, kanjis = m
kanjis = kanjis.lstrip('/').rstrip('/').split('/')
kanjis = [re.sub(';.*', '', k) for k in kanjis]
result[yomi] = set(kanjis)
return result
class Comb:
def __init__(self, logger):
self.logger = logger
try:
self.l_jisyo = parse_skkdict('/usr/share/skk/SKK-JISYO.L')
self.logger.info("LOADed JISYO")
except:
self.logger.debug("cannot LOAD JISYO %s" % sys.exc_info()[0])
if not self.l_jisyo:
self.l_jisyo = {}
def convert(self, src):
hiragana = romkan.to_hiragana(src).replace('.', '').replace(',', '')
katakana = romkan.to_kana(src).replace('.', '').replace(',', '')
retval = [
# KANA / KANJI KOUHO
(hiragana, hiragana),
(katakana, katakana),
]
if hiragana in self.l_jisyo:
got = self.l_jisyo[hiragana]
self.logger.debug("GOT: %s" % str(got))
for e in got:
retval.append([e, e])
retval.append([src, src])
return retval