mirror of
https://github.com/mii443/akaza.git
synced 2025-08-22 14:55:31 +00:00
64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
import logging
|
|
from tempfile import TemporaryDirectory
|
|
import os
|
|
import pytest
|
|
|
|
from akaza.graph import lookup, graph_construct, viterbi
|
|
from akaza.language_model import LanguageModel
|
|
from akaza.system_dict import SystemDict
|
|
from akaza.system_language_model import SystemLanguageModel
|
|
from akaza.user_language_model import UserLanguageModel
|
|
|
|
system_language_model = SystemLanguageModel.create('../akaza-data/system_language_model.trie')
|
|
|
|
tmpdir = TemporaryDirectory()
|
|
user_language_model = UserLanguageModel(tmpdir.name)
|
|
|
|
language_model = LanguageModel(system_language_model, user_language_model=user_language_model)
|
|
|
|
system_dict = SystemDict('../akaza-data/system_dict.trie')
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
|
|
@pytest.mark.parametrize('src, expected', [
|
|
# Wnn で有名なフレーズ。
|
|
('わたしのなまえはなかのです', '私の名前は中野です'),
|
|
# カタカナ語の処理が出来ていること。
|
|
('わーど', 'ワード'),
|
|
('にほん', '日本'),
|
|
('ややこしい', 'ややこしい'),
|
|
('むずかしくない', '難しくない'),
|
|
('きぞん', '既存'),
|
|
('のぞましい', '望ましい'),
|
|
('こういう', 'こういう'),
|
|
('はやくち', '早口'),
|
|
# ('どっぐふーでぃんぐしづらい', 'ドッグフーディング仕辛い'),
|
|
('しょうがっこう', '小学校'),
|
|
('げすとだけ', 'ゲストだけ'),
|
|
('ぜんぶでてるやつ', '全部でてる奴'),
|
|
('えらべる', '選べる'),
|
|
('そうみたいですね', 'そうみたいですね'),
|
|
# ('きめつのやいば', '鬼滅の刃'),
|
|
# ('れいわ', '令和'),
|
|
])
|
|
def test_wnn(src, expected):
|
|
ht = dict(lookup(src, system_dict, user_language_model, user_dict=None))
|
|
graph = graph_construct(src, ht)
|
|
|
|
clauses = viterbi(graph, language_model)
|
|
got = ''.join([clause[0].word for clause in clauses])
|
|
|
|
assert got == expected
|
|
|
|
|
|
def test_graph_extend():
|
|
src = 'はなか'
|
|
ht = dict(lookup(src, system_dict, user_language_model, user_dict=None))
|
|
# (0,2) の文節を強制指定する
|
|
graph = graph_construct(src, ht, [
|
|
slice(0, 2),
|
|
slice(2, 3)
|
|
])
|
|
assert 1 not in graph.d
|