This commit is contained in:
Tokuhiro Matsuno
2020-09-08 10:15:03 +09:00
parent c564b83153
commit d538396f18
16 changed files with 85 additions and 43 deletions

2
.gitignore vendored
View File

@@ -1,4 +1,4 @@
/config.py
/comb/config.py
__pycache__
/comb.xml
/hello.*

View File

@@ -8,45 +8,48 @@ DESTDIR ?=
PYTHON ?= /usr/bin/python3
all: comb.xml config.py
all: comb.xml comb
check:
python -m py_compile ibus.py
python -m py_compile combromkan.py
python -m py_compile comb.py
python -m py_compile skkdict.py
python -m py_compile comb/combromkan.py
python -m py_compile comb/comb.py
python -m py_compile comb/skkdict.py
pytest
comb.xml: comb.xml.in
sed -e "s:@PYTHON@:$(PYTHON):g;" \
-e "s:@DATADIR@:$(DATADIR):g" $< > $@
config.py: config.py.in
comb/config.py: comb/config.py.in
sed -e "s:@SYSCONFDIR@:$(SYSCONFDIR):g" $< > $@
install: all check
install -m 0755 -d $(DESTDIR)$(DATADIR)/ibus-comb $(DESTDIR)$(SYSCONFDIR)/xdg/comb $(DESTDIR)$(DATADIR)/ibus/component
install: all check comb/config.py
install -m 0755 -d $(DESTDIR)$(DATADIR)/ibus-comb/comb $(DESTDIR)$(SYSCONFDIR)/xdg/comb $(DESTDIR)$(DATADIR)/ibus/component
install -m 0644 comb.svg $(DESTDIR)$(DATADIR)/ibus-comb
install -m 0644 comb/__init__.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
install -m 0644 comb/graph.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
install -m 0644 comb/skkdict.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
install -m 0644 comb/combromkan.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
install -m 0644 ibus.py $(DESTDIR)$(DATADIR)/ibus-comb
install -m 0644 combromkan.py $(DESTDIR)$(DATADIR)/ibus-comb
install -m 0644 skkdict.py $(DESTDIR)$(DATADIR)/ibus-comb
install -m 0644 comb.py $(DESTDIR)$(DATADIR)/ibus-comb
install -m 0644 comb/comb.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
install -m 0644 comb.xml $(DESTDIR)$(DATADIR)/ibus/component
uninstall:
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb.svg
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/config.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/config.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/comb.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/skkdict.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/combromkan.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/graph.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/ibus.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/skkdict.py
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/combromkan.py
rmdir $(DESTDIR)$(DATADIR)/ibus-comb
rmdir $(DESTDIR)$(SYSCONFDIR)/xdg/comb
rm -f $(DESTDIR)$(DATADIR)/ibus/component/comb.xml
clean:
rm -f comb.xml
rm -f config.py
rm -f comb/config.py
.PHONY: all check install uninstall

1
comb/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/config.py

0
comb/__init__.py Normal file
View File

View File

@@ -1,17 +1,18 @@
from typing import List, Any
import jaconv
import comb
import os
import re
import time
import logging
from gi.repository import GLib
import pathlib
import marisa_trie
import jaconv
import combromkan
from skkdict import parse_skkdict, write_skkdict, merge_skkdict
from gi.repository import GLib
from comb import combromkan
from comb.skkdict import parse_skkdict, write_skkdict, merge_skkdict
import pathlib
BOIN = set(['a', 'i', 'u', 'e', 'o'])

View File

@@ -2,10 +2,10 @@ import sys
from typing import Dict, List
import marisa_trie
import math
import jaconv
import logging
import jaconv
from comb import SystemDict
from comb.comb import SystemDict
DEFAULT_SCORE = [(math.log10(0.00000000001),)]
@@ -113,11 +113,8 @@ def lookup(s, system_dict: SystemDict):
if len(words) > 0:
# print(f"YOMI:::: {yomi} {words}")
for word in words:
yield word, (
system_dict.trie[word][0].decode('utf-8').split('/') + [
word,
jaconv.hira2kata(word)]
)
kanjis = system_dict.trie[word][0].decode('utf-8').split('/')
yield word, (kanjis + [word, jaconv.hira2kata(word)])
else:
# print(f"YOMI~~~~:::: {yomi}")
yield yomi[0], [yomi[0], jaconv.hira2kata(yomi[0])]
@@ -175,9 +172,6 @@ def viterbi(graph: Graph, onegram_trie):
node.prev = shortest_prev
node.cost = cost
# print(graph)
graph.dump('hello.dot')
print("Viterbi phase 2")
node = graph[len(graph) - 1][0]
# print(node)
@@ -261,5 +255,5 @@ def main():
# for ww in ["きょう/橋\tは/は", "きょう/今日\tは/は", "きょう/頃\tは/は", "は/は\tきょう/今日", "は/は\tきょう/頃"]:
# print(f"WWWWW {ww} {bigram_score.get(ww, DEFAULT_SCORE)}")
if __name__ == '__main__':
main()

15
ibus.py
View File

@@ -32,15 +32,21 @@ import sys
import getopt
import locale
import re
from comb import Comb, UserDict, SystemDict
import logging
logging.basicConfig(level=logging.DEBUG, filename='/tmp/ibus-comb.log', filemode='w')
logging.info("Loading ibus-comb")
libpath = os.path.join(os.path.dirname(__file__), "comb")
logging.info(f"library path: {libpath}")
sys.path.append(libpath)
from comb.comb import Comb, UserDict, SystemDict
import pathlib
__base_dir__ = os.path.dirname(__file__)
logging.basicConfig(level=logging.DEBUG, filename='/tmp/ibus-comb.log', filemode='w')
# gee thank you IBus :-)
num_keys = []
@@ -58,6 +64,7 @@ del n
configdir = os.path.join(GLib.get_user_config_dir(), 'ibus-comb')
pathlib.Path(configdir).mkdir(parents=True, exist_ok=True)
user_dict = UserDict(os.path.join(configdir, 'user-dict.txt'), logging.getLogger('UserDict'))
logging.info("Loaded user dictionary")
system_dict = SystemDict()

35
model/bin/naive.py Normal file
View File

@@ -0,0 +1,35 @@
import sys
import re
import marisa_trie
# とりあえずでつくった、1gram のデータをダスやつ。
arpafname = sys.argv[1]
SPACES = re.compile(r'\s+')
# unigram かいていく
retval = []
with open(arpafname, 'r') as fp:
for line in fp:
if line == "\\1-grams:\n":
break
for line in fp:
# process it
line = line.lstrip()
m = SPACES.split(line)
if len(m) >= 2:
score = m[0]
word = m[1]
retval.append((word, (float(score),),))
else:
break
trie = marisa_trie.RecordTrie('<f', retval)
fname = 'jawiki.1gram'
print(f"writing {fname}. size={len(retval)}")
trie.save(fname)

View File

@@ -2,7 +2,7 @@ import os
import pathlib
import re
import sys
import jaconv
import comb
import MeCab
from janome.tokenizer import Tokenizer
@@ -22,7 +22,7 @@ def get_token(node):
m = node.feature.split(',')
if len(m) >= 8:
yomi = node.feature.split(',')[7]
return jaconv.kata2hira(yomi) + "/" + node.surface
return comb.kata2hira(yomi) + "/" + node.surface
else:
return node.surface

View File

@@ -1 +1,2 @@
marisa-trie=0.7.5
jaconv==0.2.4

View File

@@ -1,4 +1,4 @@
from comb import parse_skkdict
from comb.comb import parse_skkdict
import marisa_trie
dictionary = parse_skkdict('/usr/share/skk/SKK-JISYO.L', encoding='euc-jp')

View File

@@ -1,4 +1,4 @@
from comb import parse_skkdict
from comb.comb import parse_skkdict
import pygtrie
dictionary = parse_skkdict('/usr/share/skk/SKK-JISYO.L', encoding='euc-jp')

View File

@@ -1,4 +1,4 @@
from combromkan import to_hiragana
from comb.combromkan import to_hiragana
def test_foo():