mirror of
https://github.com/mii443/akaza.git
synced 2025-12-03 11:08:29 +00:00
import
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,4 +1,4 @@
|
||||
/config.py
|
||||
/comb/config.py
|
||||
__pycache__
|
||||
/comb.xml
|
||||
/hello.*
|
||||
33
Makefile
33
Makefile
@@ -8,45 +8,48 @@ DESTDIR ?=
|
||||
|
||||
PYTHON ?= /usr/bin/python3
|
||||
|
||||
all: comb.xml config.py
|
||||
all: comb.xml comb
|
||||
|
||||
check:
|
||||
python -m py_compile ibus.py
|
||||
python -m py_compile combromkan.py
|
||||
python -m py_compile comb.py
|
||||
python -m py_compile skkdict.py
|
||||
python -m py_compile comb/combromkan.py
|
||||
python -m py_compile comb/comb.py
|
||||
python -m py_compile comb/skkdict.py
|
||||
pytest
|
||||
|
||||
comb.xml: comb.xml.in
|
||||
sed -e "s:@PYTHON@:$(PYTHON):g;" \
|
||||
-e "s:@DATADIR@:$(DATADIR):g" $< > $@
|
||||
|
||||
config.py: config.py.in
|
||||
comb/config.py: comb/config.py.in
|
||||
sed -e "s:@SYSCONFDIR@:$(SYSCONFDIR):g" $< > $@
|
||||
|
||||
install: all check
|
||||
install -m 0755 -d $(DESTDIR)$(DATADIR)/ibus-comb $(DESTDIR)$(SYSCONFDIR)/xdg/comb $(DESTDIR)$(DATADIR)/ibus/component
|
||||
install: all check comb/config.py
|
||||
install -m 0755 -d $(DESTDIR)$(DATADIR)/ibus-comb/comb $(DESTDIR)$(SYSCONFDIR)/xdg/comb $(DESTDIR)$(DATADIR)/ibus/component
|
||||
install -m 0644 comb.svg $(DESTDIR)$(DATADIR)/ibus-comb
|
||||
install -m 0644 comb/__init__.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
|
||||
install -m 0644 comb/graph.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
|
||||
install -m 0644 comb/skkdict.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
|
||||
install -m 0644 comb/combromkan.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
|
||||
install -m 0644 ibus.py $(DESTDIR)$(DATADIR)/ibus-comb
|
||||
install -m 0644 combromkan.py $(DESTDIR)$(DATADIR)/ibus-comb
|
||||
install -m 0644 skkdict.py $(DESTDIR)$(DATADIR)/ibus-comb
|
||||
install -m 0644 comb.py $(DESTDIR)$(DATADIR)/ibus-comb
|
||||
install -m 0644 comb/comb.py $(DESTDIR)$(DATADIR)/ibus-comb/comb/
|
||||
install -m 0644 comb.xml $(DESTDIR)$(DATADIR)/ibus/component
|
||||
|
||||
uninstall:
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb.svg
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/config.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/config.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/comb.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/skkdict.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/combromkan.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb/graph.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/ibus.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/comb.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/skkdict.py
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus-comb/combromkan.py
|
||||
rmdir $(DESTDIR)$(DATADIR)/ibus-comb
|
||||
rmdir $(DESTDIR)$(SYSCONFDIR)/xdg/comb
|
||||
rm -f $(DESTDIR)$(DATADIR)/ibus/component/comb.xml
|
||||
|
||||
clean:
|
||||
rm -f comb.xml
|
||||
rm -f config.py
|
||||
rm -f comb/config.py
|
||||
|
||||
.PHONY: all check install uninstall
|
||||
|
||||
|
||||
1
comb/.gitignore
vendored
Normal file
1
comb/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/config.py
|
||||
0
comb/__init__.py
Normal file
0
comb/__init__.py
Normal file
@@ -1,17 +1,18 @@
|
||||
from typing import List, Any
|
||||
|
||||
import jaconv
|
||||
import comb
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
from gi.repository import GLib
|
||||
import pathlib
|
||||
|
||||
import marisa_trie
|
||||
import jaconv
|
||||
|
||||
import combromkan
|
||||
from skkdict import parse_skkdict, write_skkdict, merge_skkdict
|
||||
from gi.repository import GLib
|
||||
|
||||
from comb import combromkan
|
||||
from comb.skkdict import parse_skkdict, write_skkdict, merge_skkdict
|
||||
import pathlib
|
||||
|
||||
BOIN = set(['a', 'i', 'u', 'e', 'o'])
|
||||
|
||||
@@ -2,10 +2,10 @@ import sys
|
||||
from typing import Dict, List
|
||||
import marisa_trie
|
||||
import math
|
||||
import jaconv
|
||||
import logging
|
||||
import jaconv
|
||||
|
||||
from comb import SystemDict
|
||||
from comb.comb import SystemDict
|
||||
|
||||
DEFAULT_SCORE = [(math.log10(0.00000000001),)]
|
||||
|
||||
@@ -113,11 +113,8 @@ def lookup(s, system_dict: SystemDict):
|
||||
if len(words) > 0:
|
||||
# print(f"YOMI:::: {yomi} {words}")
|
||||
for word in words:
|
||||
yield word, (
|
||||
system_dict.trie[word][0].decode('utf-8').split('/') + [
|
||||
word,
|
||||
jaconv.hira2kata(word)]
|
||||
)
|
||||
kanjis = system_dict.trie[word][0].decode('utf-8').split('/')
|
||||
yield word, (kanjis + [word, jaconv.hira2kata(word)])
|
||||
else:
|
||||
# print(f"YOMI~~~~:::: {yomi}")
|
||||
yield yomi[0], [yomi[0], jaconv.hira2kata(yomi[0])]
|
||||
@@ -175,9 +172,6 @@ def viterbi(graph: Graph, onegram_trie):
|
||||
node.prev = shortest_prev
|
||||
node.cost = cost
|
||||
|
||||
# print(graph)
|
||||
graph.dump('hello.dot')
|
||||
|
||||
print("Viterbi phase 2")
|
||||
node = graph[len(graph) - 1][0]
|
||||
# print(node)
|
||||
@@ -261,5 +255,5 @@ def main():
|
||||
# for ww in ["きょう/橋\tは/は", "きょう/今日\tは/は", "きょう/頃\tは/は", "は/は\tきょう/今日", "は/は\tきょう/頃"]:
|
||||
# print(f"WWWWW {ww} {bigram_score.get(ww, DEFAULT_SCORE)}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
15
ibus.py
15
ibus.py
@@ -32,15 +32,21 @@ import sys
|
||||
import getopt
|
||||
import locale
|
||||
import re
|
||||
|
||||
from comb import Comb, UserDict, SystemDict
|
||||
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, filename='/tmp/ibus-comb.log', filemode='w')
|
||||
logging.info("Loading ibus-comb")
|
||||
|
||||
libpath = os.path.join(os.path.dirname(__file__), "comb")
|
||||
logging.info(f"library path: {libpath}")
|
||||
sys.path.append(libpath)
|
||||
|
||||
from comb.comb import Comb, UserDict, SystemDict
|
||||
|
||||
import pathlib
|
||||
|
||||
__base_dir__ = os.path.dirname(__file__)
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, filename='/tmp/ibus-comb.log', filemode='w')
|
||||
|
||||
# gee thank you IBus :-)
|
||||
num_keys = []
|
||||
@@ -58,6 +64,7 @@ del n
|
||||
configdir = os.path.join(GLib.get_user_config_dir(), 'ibus-comb')
|
||||
pathlib.Path(configdir).mkdir(parents=True, exist_ok=True)
|
||||
user_dict = UserDict(os.path.join(configdir, 'user-dict.txt'), logging.getLogger('UserDict'))
|
||||
logging.info("Loaded user dictionary")
|
||||
|
||||
system_dict = SystemDict()
|
||||
|
||||
|
||||
35
model/bin/naive.py
Normal file
35
model/bin/naive.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import sys
|
||||
import re
|
||||
|
||||
import marisa_trie
|
||||
|
||||
# とりあえずでつくった、1gram のデータをダスやつ。
|
||||
|
||||
arpafname = sys.argv[1]
|
||||
|
||||
SPACES = re.compile(r'\s+')
|
||||
|
||||
# unigram かいていく
|
||||
retval = []
|
||||
with open(arpafname, 'r') as fp:
|
||||
for line in fp:
|
||||
if line == "\\1-grams:\n":
|
||||
break
|
||||
|
||||
for line in fp:
|
||||
# process it
|
||||
line = line.lstrip()
|
||||
m = SPACES.split(line)
|
||||
if len(m) >= 2:
|
||||
score = m[0]
|
||||
word = m[1]
|
||||
retval.append((word, (float(score),),))
|
||||
else:
|
||||
break
|
||||
|
||||
trie = marisa_trie.RecordTrie('<f', retval)
|
||||
fname = 'jawiki.1gram'
|
||||
print(f"writing {fname}. size={len(retval)}")
|
||||
trie.save(fname)
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
import jaconv
|
||||
import comb
|
||||
|
||||
import MeCab
|
||||
from janome.tokenizer import Tokenizer
|
||||
@@ -22,7 +22,7 @@ def get_token(node):
|
||||
m = node.feature.split(',')
|
||||
if len(m) >= 8:
|
||||
yomi = node.feature.split(',')[7]
|
||||
return jaconv.kata2hira(yomi) + "/" + node.surface
|
||||
return comb.kata2hira(yomi) + "/" + node.surface
|
||||
else:
|
||||
return node.surface
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
marisa-trie=0.7.5
|
||||
jaconv==0.2.4
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from comb import parse_skkdict
|
||||
from comb.comb import parse_skkdict
|
||||
import marisa_trie
|
||||
|
||||
dictionary = parse_skkdict('/usr/share/skk/SKK-JISYO.L', encoding='euc-jp')
|
||||
@@ -1,4 +1,4 @@
|
||||
from comb import parse_skkdict
|
||||
from comb.comb import parse_skkdict
|
||||
import pygtrie
|
||||
|
||||
dictionary = parse_skkdict('/usr/share/skk/SKK-JISYO.L', encoding='euc-jp')
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from combromkan import to_hiragana
|
||||
from comb.combromkan import to_hiragana
|
||||
|
||||
|
||||
def test_foo():
|
||||
|
||||
Reference in New Issue
Block a user