mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
[Experimental] InputGraphの構造を変更し、LookupGraphによる辞書引きを実装 (#46)
* implement byfix lookup * find failing test * implement backward search * add more test cases * add new file * split files * Improve InputGraph Architecture * implement dictionary search
This commit is contained in:
@ -0,0 +1,67 @@
|
||||
//
|
||||
// CorrectPrefixTree.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2024/02/23.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
@testable import KanaKanjiConverterModule
|
||||
import XCTest
|
||||
|
||||
// 誤字訂正のためのprefix tree
|
||||
enum CorrectPrefixTree {
|
||||
final class Node {
|
||||
init(_ children: [Character: Node] = [:], value: [String] = []) {
|
||||
self.children = children
|
||||
self.value = value
|
||||
}
|
||||
|
||||
static func terminal(_ value: [String]) -> Node {
|
||||
Node(value: value)
|
||||
}
|
||||
|
||||
var children: [Character: Node] = [:]
|
||||
var value: [String]
|
||||
func find(key: Character) -> Node? {
|
||||
return children[key]
|
||||
}
|
||||
func insert(route: some Collection<Character>, value: consuming [String]) {
|
||||
if let first = route.first {
|
||||
if let tree = self.children[first] {
|
||||
tree.insert(route: route.dropFirst(), value: consume value)
|
||||
} else {
|
||||
let tree = Node()
|
||||
tree.insert(route: route.dropFirst(), value: consume value)
|
||||
self.children[first] = tree
|
||||
}
|
||||
} else {
|
||||
self.value = consume value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static let roman2kana: Node = {
|
||||
Node([
|
||||
"t": Node([
|
||||
"s": .terminal(["ta"]),
|
||||
"z": .terminal(["ta"]),
|
||||
"q": .terminal(["ta"]),
|
||||
"p": .terminal(["to"]),
|
||||
]),
|
||||
"g": Node([
|
||||
"s": .terminal(["ga"]),
|
||||
"z": .terminal(["ga"]),
|
||||
"q": .terminal(["ga"]),
|
||||
"p": .terminal(["go"]),
|
||||
])
|
||||
])
|
||||
}()
|
||||
static let direct: Node = {
|
||||
Node([
|
||||
"か": .terminal(["が"]),
|
||||
"は": .terminal(["ば", "ぱ"])
|
||||
])
|
||||
}()
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// InputGraphTests.swift
|
||||
// InputGraph.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2024/02/21.
|
||||
@ -10,112 +10,119 @@ import Foundation
|
||||
@testable import KanaKanjiConverterModule
|
||||
import XCTest
|
||||
|
||||
// 置換のためのprefix tree
|
||||
enum ReplacePrefixTree {
|
||||
static var characterNodes: [InputGraph.InputStyle.ID: [Character: [Node]]] = [:]
|
||||
struct InputGraphStructure {
|
||||
enum Range: Equatable, Sendable {
|
||||
case unknown
|
||||
case startIndex(Int)
|
||||
case endIndex(Int)
|
||||
case range(Int, Int)
|
||||
|
||||
final class Node {
|
||||
init(_ children: [Character: Node] = [:], character: Character = "\0", value: String? = nil, parent: Node? = nil) {
|
||||
self.children = children
|
||||
self.value = value
|
||||
self.character = character
|
||||
self.parent = parent
|
||||
var startIndex: Int? {
|
||||
switch self {
|
||||
case .unknown, .endIndex: nil
|
||||
case .startIndex(let index), .range(let index, _): index
|
||||
}
|
||||
}
|
||||
var parent: Node?
|
||||
var children: [Character: Node] = [:]
|
||||
var character: Character
|
||||
var value: String?
|
||||
func find(key: Character) -> Node? {
|
||||
return children[key]
|
||||
}
|
||||
func insert(route: some Collection<Character>, value: consuming String, inputStyle: InputGraph.InputStyle.ID) {
|
||||
if let first = route.first {
|
||||
if let tree = self.children[first] {
|
||||
tree.insert(route: route.dropFirst(), value: consume value, inputStyle: inputStyle)
|
||||
} else {
|
||||
let tree = Node(character: first, parent: self)
|
||||
tree.insert(route: route.dropFirst(), value: consume value, inputStyle: inputStyle)
|
||||
self.children[first] = tree
|
||||
ReplacePrefixTree.characterNodes[inputStyle, default: [:]][first, default: []].append(tree)
|
||||
}
|
||||
} else {
|
||||
self.value = consume value
|
||||
|
||||
var endIndex: Int? {
|
||||
switch self {
|
||||
case .unknown, .startIndex: nil
|
||||
case .endIndex(let index), .range(_, let index): index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static let roman2kana: Node = {
|
||||
var tree = Node()
|
||||
for item in KanaKanjiConverterModule.Roman2Kana.hiraganaChanges {
|
||||
tree.insert(route: item.key, value: String(item.value), inputStyle: .systemRomanKana)
|
||||
}
|
||||
// additionals
|
||||
for item in ["bb", "cc", "dd", "ff", "gg", "hh", "jj", "kk", "ll", "mm", "pp", "qq", "rr", "ss", "tt", "vv", "ww", "xx", "yy", "zz"] {
|
||||
tree.insert(route: Array(item), value: "っ" + String(item.last!), inputStyle: .systemRomanKana)
|
||||
}
|
||||
// additionals
|
||||
for item in ["nb", "nc", "nd", "nf", "ng", "nh", "nj", "nk", "nl", "nm", "np", "nq", "nr", "ns", "nt", "nv", "nw", "nx", "nz"] {
|
||||
tree.insert(route: Array(item), value: "ん" + String(item.last!), inputStyle: .systemRomanKana)
|
||||
}
|
||||
return tree
|
||||
}()
|
||||
static let direct: Node = Node()
|
||||
}
|
||||
/// `displayedTextStartIndexToNodeIndices[0]`は`displayedTextRange==.startIndex(0)`または`displayedTextRange==.range(0, k)`であるようなノードのindexのセットを返す
|
||||
var displayedTextStartIndexToNodeIndices: [IndexSet] = []
|
||||
var inputElementsStartIndexToNodeIndices: [IndexSet] = []
|
||||
var displayedTextEndIndexToNodeIndices: [IndexSet] = [IndexSet(integer: 0)] // rootノードのindexで初期化
|
||||
var inputElementsEndIndexToNodeIndices: [IndexSet] = [IndexSet(integer: 0)] // rootノードのindexで初期化
|
||||
// 使用されなくなったインデックスの集合
|
||||
var deadNodeIndices: [Int] = []
|
||||
|
||||
// 誤字訂正のためのprefix tree
|
||||
enum CorrectPrefixTree {
|
||||
final class Node {
|
||||
init(_ children: [Character: Node] = [:], value: [String] = []) {
|
||||
self.children = children
|
||||
self.value = value
|
||||
}
|
||||
|
||||
static func terminal(_ value: [String]) -> Node {
|
||||
Node(value: value)
|
||||
}
|
||||
|
||||
var children: [Character: Node] = [:]
|
||||
var value: [String]
|
||||
func find(key: Character) -> Node? {
|
||||
return children[key]
|
||||
}
|
||||
func insert(route: some Collection<Character>, value: consuming [String]) {
|
||||
if let first = route.first {
|
||||
if let tree = self.children[first] {
|
||||
tree.insert(route: route.dropFirst(), value: consume value)
|
||||
} else {
|
||||
let tree = Node()
|
||||
tree.insert(route: route.dropFirst(), value: consume value)
|
||||
self.children[first] = tree
|
||||
}
|
||||
} else {
|
||||
self.value = consume value
|
||||
func nextIndices(displayedTextEndIndex: Int?, inputElementsEndIndex: Int?) -> IndexSet {
|
||||
var indexSet = IndexSet()
|
||||
if let displayedTextEndIndex {
|
||||
if displayedTextEndIndex < self.displayedTextStartIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.displayedTextStartIndexToNodeIndices[displayedTextEndIndex])
|
||||
}
|
||||
}
|
||||
if let inputElementsEndIndex {
|
||||
if inputElementsEndIndex < self.inputElementsStartIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.inputElementsStartIndexToNodeIndices[inputElementsEndIndex])
|
||||
}
|
||||
}
|
||||
return indexSet
|
||||
}
|
||||
|
||||
func prevIndices(displayedTextStartIndex: Int?, inputElementsStartIndex: Int?) -> IndexSet {
|
||||
var indexSet = IndexSet()
|
||||
if let displayedTextStartIndex {
|
||||
if displayedTextStartIndex < self.displayedTextEndIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.displayedTextEndIndexToNodeIndices[displayedTextStartIndex])
|
||||
}
|
||||
}
|
||||
if let inputElementsStartIndex {
|
||||
if inputElementsStartIndex < self.inputElementsEndIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.inputElementsEndIndexToNodeIndices[inputElementsStartIndex])
|
||||
}
|
||||
}
|
||||
return indexSet
|
||||
}
|
||||
|
||||
mutating func insert<T>(_ node: T, nodes: inout [T], displayedTextRange: Range, inputElementsRange: Range) {
|
||||
// 可能ならdeadNodeIndicesを再利用する
|
||||
let index: Int
|
||||
if let deadIndex = self.deadNodeIndices.popLast() {
|
||||
nodes[deadIndex] = node
|
||||
index = deadIndex
|
||||
} else {
|
||||
nodes.append(node)
|
||||
index = nodes.count - 1
|
||||
}
|
||||
if let startIndex = displayedTextRange.startIndex {
|
||||
if self.displayedTextStartIndexToNodeIndices.endIndex <= startIndex {
|
||||
self.displayedTextStartIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: startIndex - self.displayedTextStartIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.displayedTextStartIndexToNodeIndices[startIndex].insert(index)
|
||||
}
|
||||
if let endIndex = displayedTextRange.endIndex {
|
||||
if self.displayedTextEndIndexToNodeIndices.endIndex <= endIndex {
|
||||
self.displayedTextEndIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: endIndex - self.displayedTextEndIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.displayedTextEndIndexToNodeIndices[endIndex].insert(index)
|
||||
}
|
||||
if let startIndex = inputElementsRange.startIndex {
|
||||
if self.inputElementsStartIndexToNodeIndices.endIndex <= startIndex {
|
||||
self.inputElementsStartIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: startIndex - self.inputElementsStartIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.inputElementsStartIndexToNodeIndices[startIndex].insert(index)
|
||||
}
|
||||
if let endIndex = inputElementsRange.endIndex {
|
||||
if self.inputElementsEndIndexToNodeIndices.endIndex <= endIndex {
|
||||
self.inputElementsEndIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: endIndex - self.inputElementsEndIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.inputElementsEndIndexToNodeIndices[endIndex].insert(index)
|
||||
}
|
||||
}
|
||||
|
||||
static let roman2kana: Node = {
|
||||
Node([
|
||||
"t": Node([
|
||||
"s": .terminal(["ta"]),
|
||||
"z": .terminal(["ta"]),
|
||||
"q": .terminal(["ta"]),
|
||||
"p": .terminal(["to"]),
|
||||
]),
|
||||
"g": Node([
|
||||
"s": .terminal(["ga"]),
|
||||
"z": .terminal(["ga"]),
|
||||
"q": .terminal(["ga"]),
|
||||
"p": .terminal(["go"]),
|
||||
])
|
||||
])
|
||||
}()
|
||||
static let direct: Node = {
|
||||
Node([
|
||||
"か": .terminal(["が"]),
|
||||
"は": .terminal(["ば", "ぱ"])
|
||||
])
|
||||
}()
|
||||
mutating func remove(at index: Int) {
|
||||
assert(index != 0, "Node at index 0 is root and must not be removed.")
|
||||
self.deadNodeIndices.append(index)
|
||||
// FIXME: 多分nodeの情報を使えばもっと効率的にremoveできる
|
||||
self.displayedTextStartIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
self.displayedTextEndIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
self.inputElementsStartIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
self.inputElementsEndIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct InputGraph {
|
||||
@ -134,7 +141,7 @@ struct InputGraph {
|
||||
self.replacePrefixTree = replacePrefixTree
|
||||
self.correctPrefixTree = correctPrefixTree
|
||||
}
|
||||
|
||||
|
||||
struct ID: Equatable, Hashable, Sendable, CustomStringConvertible {
|
||||
init(id: UInt8) {
|
||||
self.id = id
|
||||
@ -186,27 +193,6 @@ struct InputGraph {
|
||||
var correctPrefixTree: CorrectPrefixTree.Node
|
||||
}
|
||||
|
||||
enum Range: Equatable, Sendable {
|
||||
case unknown
|
||||
case startIndex(Int)
|
||||
case endIndex(Int)
|
||||
case range(Int, Int)
|
||||
|
||||
var startIndex: Int? {
|
||||
switch self {
|
||||
case .unknown, .endIndex: nil
|
||||
case .startIndex(let index), .range(let index, _): index
|
||||
}
|
||||
}
|
||||
|
||||
var endIndex: Int? {
|
||||
switch self {
|
||||
case .unknown, .startIndex: nil
|
||||
case .endIndex(let index), .range(_, let index): index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum Correction: CustomStringConvertible {
|
||||
/// 訂正ではない
|
||||
case none
|
||||
@ -227,8 +213,8 @@ struct InputGraph {
|
||||
|
||||
struct Node: Equatable, CustomStringConvertible {
|
||||
var character: Character
|
||||
var displayedTextRange: Range
|
||||
var inputElementsRange: Range
|
||||
var displayedTextRange: InputGraphStructure.Range
|
||||
var inputElementsRange: InputGraphStructure.Range
|
||||
var correction: Correction = .none
|
||||
|
||||
var description: String {
|
||||
@ -244,112 +230,44 @@ struct InputGraph {
|
||||
// root node
|
||||
Node(character: "\0", displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
|
||||
]
|
||||
/// `displayedTextStartIndexToNodeIndices[0]`は`displayedTextRange==.startIndex(0)`または`displayedTextRange==.range(0, k)`であるようなノードのindexのセットを返す
|
||||
var displayedTextStartIndexToNodeIndices: [IndexSet] = []
|
||||
var inputElementsStartIndexToNodeIndices: [IndexSet] = []
|
||||
var displayedTextEndIndexToNodeIndices: [IndexSet] = [IndexSet(integer: 0)] // rootノードのindexで初期化
|
||||
var inputElementsEndIndexToNodeIndices: [IndexSet] = [IndexSet(integer: 0)] // rootノードのindexで初期化
|
||||
// 使用されなくなったインデックスの集合
|
||||
var deadNodeIndices: [Int] = []
|
||||
|
||||
var structure: InputGraphStructure = InputGraphStructure()
|
||||
|
||||
var root: Node {
|
||||
nodes[0]
|
||||
}
|
||||
|
||||
func nextIndices(for node: Node) -> IndexSet {
|
||||
self.structure.nextIndices(
|
||||
displayedTextEndIndex: node.displayedTextRange.endIndex,
|
||||
inputElementsEndIndex: node.inputElementsRange.endIndex
|
||||
)
|
||||
}
|
||||
|
||||
func next(for node: Node) -> [Node] {
|
||||
var indexSet = IndexSet()
|
||||
if let endIndex = node.displayedTextRange.endIndex {
|
||||
if endIndex < self.displayedTextStartIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.displayedTextStartIndexToNodeIndices[endIndex])
|
||||
}
|
||||
}
|
||||
if let endIndex = node.inputElementsRange.endIndex {
|
||||
if endIndex < self.inputElementsStartIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.inputElementsStartIndexToNodeIndices[endIndex])
|
||||
}
|
||||
}
|
||||
return indexSet.map{ self.nodes[$0] }
|
||||
nextIndices(for: node).map{ self.nodes[$0] }
|
||||
}
|
||||
|
||||
func prevIndices(for node: Node) -> IndexSet {
|
||||
var indexSet = IndexSet()
|
||||
if let startIndex = node.displayedTextRange.startIndex {
|
||||
if startIndex < self.displayedTextEndIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.displayedTextEndIndexToNodeIndices[startIndex])
|
||||
}
|
||||
}
|
||||
if let startIndex = node.inputElementsRange.startIndex {
|
||||
if startIndex < self.inputElementsEndIndexToNodeIndices.endIndex {
|
||||
indexSet.formUnion(self.inputElementsEndIndexToNodeIndices[startIndex])
|
||||
}
|
||||
}
|
||||
return indexSet
|
||||
self.structure.prevIndices(
|
||||
displayedTextStartIndex: node.displayedTextRange.startIndex,
|
||||
inputElementsStartIndex: node.inputElementsRange.startIndex
|
||||
)
|
||||
}
|
||||
|
||||
func prev(for node: Node) -> [Node] {
|
||||
prevIndices(for: node).map{ self.nodes[$0] }
|
||||
}
|
||||
|
||||
private mutating func _insert(_ node: Node) -> Int {
|
||||
// 可能ならdeadNodeIndicesを再利用する
|
||||
if let deadIndex = self.deadNodeIndices.popLast() {
|
||||
self.nodes[deadIndex] = node
|
||||
return deadIndex
|
||||
} else {
|
||||
self.nodes.append(node)
|
||||
return self.nodes.count - 1
|
||||
}
|
||||
}
|
||||
|
||||
mutating func remove(at index: Int) {
|
||||
assert(index != 0, "Node at index 0 is root and must not be removed.")
|
||||
self.deadNodeIndices.append(index)
|
||||
// FIXME: 多分nodeの情報を使えばもっと効率的にremoveできる
|
||||
self.displayedTextStartIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
self.displayedTextEndIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
self.inputElementsStartIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
self.inputElementsEndIndexToNodeIndices.mutatingForeach {
|
||||
$0.remove(index)
|
||||
}
|
||||
self.structure.remove(at: index)
|
||||
}
|
||||
|
||||
mutating func insert(_ node: Node) {
|
||||
let index = self._insert(node)
|
||||
if let startIndex = node.displayedTextRange.startIndex {
|
||||
if self.displayedTextStartIndexToNodeIndices.endIndex <= startIndex {
|
||||
self.displayedTextStartIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: startIndex - self.displayedTextStartIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.displayedTextStartIndexToNodeIndices[startIndex].insert(index)
|
||||
}
|
||||
if let endIndex = node.displayedTextRange.endIndex {
|
||||
if self.displayedTextEndIndexToNodeIndices.endIndex <= endIndex {
|
||||
self.displayedTextEndIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: endIndex - self.displayedTextEndIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.displayedTextEndIndexToNodeIndices[endIndex].insert(index)
|
||||
}
|
||||
if let startIndex = node.inputElementsRange.startIndex {
|
||||
if self.inputElementsStartIndexToNodeIndices.endIndex <= startIndex {
|
||||
self.inputElementsStartIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: startIndex - self.inputElementsStartIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.inputElementsStartIndexToNodeIndices[startIndex].insert(index)
|
||||
}
|
||||
if let endIndex = node.inputElementsRange.endIndex {
|
||||
if self.inputElementsEndIndexToNodeIndices.endIndex <= endIndex {
|
||||
self.inputElementsEndIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: endIndex - self.inputElementsEndIndexToNodeIndices.endIndex + 1))
|
||||
}
|
||||
self.inputElementsEndIndexToNodeIndices[endIndex].insert(index)
|
||||
}
|
||||
self.structure.insert(node, nodes: &self.nodes, displayedTextRange: node.displayedTextRange, inputElementsRange: node.inputElementsRange)
|
||||
}
|
||||
|
||||
// EOSノードを追加する
|
||||
mutating func finalize() {}
|
||||
|
||||
static func build(input: [ComposingText.InputElement]) -> Self {
|
||||
var inputGraph = Self()
|
||||
// アルゴリズム
|
||||
@ -435,7 +353,7 @@ struct InputGraph {
|
||||
let indices = if let first = cRoute.first {
|
||||
inputGraph.prevIndices(for: inputGraph.nodes[first])
|
||||
} else {
|
||||
index < inputGraph.inputElementsEndIndexToNodeIndices.endIndex ? inputGraph.inputElementsEndIndexToNodeIndices[index] : .init()
|
||||
index < inputGraph.structure.inputElementsEndIndexToNodeIndices.endIndex ? inputGraph.structure.prevIndices(displayedTextStartIndex: nil, inputElementsStartIndex: index) : .init()
|
||||
}
|
||||
for prevGraphNodeIndex in indices {
|
||||
guard inputGraph.nodes[prevGraphNodeIndex].character == pNode.character else {
|
||||
@ -514,75 +432,75 @@ struct InputGraph {
|
||||
}
|
||||
// 誤字訂正を追加する
|
||||
guard continuous else { continue }
|
||||
perItem: for item in altItems[cIndex, default: []] {
|
||||
// itemの対応するinputCountが1でない場合、少しややこしい
|
||||
// altItemはひとまずreplace全体で一塊と考える
|
||||
// 例えばab→an、sn→anなる二つのルールがあるときにabsnと打った場合、anan(あなn)が原理的には作られる
|
||||
// しかし、一般のケースではreplaceで挿入や削除が起こる(例:amn→an)
|
||||
// そこで、一旦はab→anのとき、[an]を一塊で扱う。つまり、現在ノードからa, nと辿った場合に候補が見つかる場合にのみ、stackに追加する
|
||||
// この制限は将来的に取り除ける
|
||||
var node: ReplacePrefixTree.Node? = cNode
|
||||
if item.inputCount != 1 {
|
||||
var chars = Array(item.replace) // FIXME: 本当はQueueにしたい
|
||||
while !chars.isEmpty {
|
||||
if let nNode = node?.find(key: chars.removeFirst()) {
|
||||
node = nNode
|
||||
} else {
|
||||
continue perItem
|
||||
}
|
||||
perItem: for item in altItems[cIndex, default: []] {
|
||||
// itemの対応するinputCountが1でない場合、少しややこしい
|
||||
// altItemはひとまずreplace全体で一塊と考える
|
||||
// 例えばab→an、sn→anなる二つのルールがあるときにabsnと打った場合、anan(あなn)が原理的には作られる
|
||||
// しかし、一般のケースではreplaceで挿入や削除が起こる(例:amn→an)
|
||||
// そこで、一旦はab→anのとき、[an]を一塊で扱う。つまり、現在ノードからa, nと辿った場合に候補が見つかる場合にのみ、stackに追加する
|
||||
// この制限は将来的に取り除ける
|
||||
var node: ReplacePrefixTree.Node? = cNode
|
||||
if item.inputCount != 1 {
|
||||
var chars = Array(item.replace) // FIXME: 本当はQueueにしたい
|
||||
while !chars.isEmpty {
|
||||
if let nNode = node?.find(key: chars.removeFirst()) {
|
||||
node = nNode
|
||||
} else {
|
||||
continue perItem
|
||||
}
|
||||
} else {
|
||||
}
|
||||
} else {
|
||||
stack.append(
|
||||
(
|
||||
.init(),
|
||||
cIndex + item.inputCount,
|
||||
cRoute + Array(item.replace),
|
||||
.init(from: input[cIndex].inputStyle),
|
||||
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, item.replace, .typo)
|
||||
)
|
||||
)
|
||||
}
|
||||
if let node {
|
||||
// valueがあるかないかで分岐
|
||||
if let value = node.value {
|
||||
stack.append(
|
||||
(
|
||||
.init(),
|
||||
node,
|
||||
cIndex + item.inputCount,
|
||||
cRoute + Array(item.replace),
|
||||
.init(from: input[cIndex].inputStyle),
|
||||
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, item.replace, .typo)
|
||||
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, value, .typo)
|
||||
)
|
||||
)
|
||||
} else {
|
||||
stack.append(
|
||||
(
|
||||
node,
|
||||
cIndex + item.inputCount,
|
||||
cRoute + Array(item.replace),
|
||||
.init(from: input[cIndex].inputStyle),
|
||||
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, cLongestMatch.value, .typo)
|
||||
)
|
||||
)
|
||||
}
|
||||
if let node {
|
||||
// valueがあるかないかで分岐
|
||||
if let value = node.value {
|
||||
stack.append(
|
||||
(
|
||||
node,
|
||||
cIndex + item.inputCount,
|
||||
cRoute + Array(item.replace),
|
||||
.init(from: input[cIndex].inputStyle),
|
||||
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, value, .typo)
|
||||
)
|
||||
)
|
||||
} else {
|
||||
stack.append(
|
||||
(
|
||||
node,
|
||||
cIndex + item.inputCount,
|
||||
cRoute + Array(item.replace),
|
||||
.init(from: input[cIndex].inputStyle),
|
||||
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, cLongestMatch.value, .typo)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// matchをinsertする
|
||||
for match in matches {
|
||||
let displayedTextStartIndex = if let d = match.displayedTextStartIndex {
|
||||
d
|
||||
} else if let beforeNodeIndex = inputGraph.inputElementsEndIndexToNodeIndices[index].first,
|
||||
} else if let beforeNodeIndex = inputGraph.structure.inputElementsEndIndexToNodeIndices[index].first,
|
||||
let d = inputGraph.nodes[beforeNodeIndex].displayedTextRange.endIndex {
|
||||
d
|
||||
} else {
|
||||
Int?.none
|
||||
}
|
||||
} else {
|
||||
Int?.none
|
||||
}
|
||||
guard let displayedTextStartIndex else { continue }
|
||||
|
||||
let characters = Array(match.value)
|
||||
for (i, c) in zip(characters.indices, characters) {
|
||||
let inputElementRange: InputGraph.Range = if i == characters.startIndex && i+1 == characters.endIndex {
|
||||
let inputElementRange: InputGraphStructure.Range = if i == characters.startIndex && i+1 == characters.endIndex {
|
||||
if let startIndex = match.inputElementsStartIndex {
|
||||
.range(startIndex, match.inputElementsEndIndex)
|
||||
} else {
|
||||
@ -613,165 +531,3 @@ struct InputGraph {
|
||||
return consume inputGraph
|
||||
}
|
||||
}
|
||||
|
||||
final class InputGraphTests: XCTestCase {
|
||||
func testInsert() throws {
|
||||
var graph = InputGraph()
|
||||
let node1 = InputGraph.Node(character: "a", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1))
|
||||
let node2 = InputGraph.Node(character: "b", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 2))
|
||||
graph.insert(node1)
|
||||
graph.insert(node2)
|
||||
XCTAssertEqual(graph.next(for: node1), [node2])
|
||||
XCTAssertEqual(graph.prev(for: node2), [node1])
|
||||
}
|
||||
|
||||
func testBuild() throws {
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "あ", inputStyle: .direct),
|
||||
.init(character: "い", inputStyle: .direct),
|
||||
.init(character: "う", inputStyle: .direct)
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 4) // Root nodes
|
||||
}
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "あ", inputStyle: .direct),
|
||||
.init(character: "か", inputStyle: .direct),
|
||||
.init(character: "う", inputStyle: .direct)
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 5) // Root nodes
|
||||
}
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 3) // Root nodes
|
||||
}
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 5) // Root nodes
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "い"}),
|
||||
.init(character: "い", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "t"}),
|
||||
.init(character: "t", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 2), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "s"}),
|
||||
.init(character: "s", displayedTextRange: .range(2, 3), inputElementsRange: .range(2, 3), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 3), correction: .typo)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// ts->taの誤字訂正が存在
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 6) // Root nodes
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "い"}),
|
||||
.init(character: "い", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 3), correction: .typo)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "ぁ"}),
|
||||
.init(character: "ぁ", displayedTextRange: .range(2, 3), inputElementsRange: .endIndex(4), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// ts->taの誤字訂正は入力方式を跨いだ場合は発火しない
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .direct),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "t"}),
|
||||
.init(character: "t", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
|
||||
)
|
||||
XCTAssertFalse(graph.nodes.contains(.init(character: "た", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 2), correction: .typo)))
|
||||
}
|
||||
do {
|
||||
// tt→っt
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "っ"}),
|
||||
.init(character: "っ", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "t"}),
|
||||
.init(character: "t", displayedTextRange: .range(1, 2), inputElementsRange: .endIndex(2), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// tt→っt
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "っ"}),
|
||||
.init(character: "っ", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .endIndex(3), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// nt→んt
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "n", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "ん"}),
|
||||
.init(character: "ん", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .endIndex(3), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// t
|
||||
// tt→っt
|
||||
// っts→った (
|
||||
// FIXME: 興味深いテストケースだが実装が重いので保留
|
||||
/*
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
])
|
||||
print(graph)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(2, 3), inputElementsRange: .endIndex(4), correction: .none)
|
||||
)
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,174 @@
|
||||
//
|
||||
// InputGraphTests.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2024/02/21.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
@testable import KanaKanjiConverterModule
|
||||
import XCTest
|
||||
|
||||
|
||||
final class InputGraphTests: XCTestCase {
|
||||
func testInsert() throws {
|
||||
var graph = InputGraph()
|
||||
let node1 = InputGraph.Node(character: "a", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1))
|
||||
let node2 = InputGraph.Node(character: "b", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 2))
|
||||
graph.insert(node1)
|
||||
graph.insert(node2)
|
||||
XCTAssertEqual(graph.next(for: node1), [node2])
|
||||
XCTAssertEqual(graph.prev(for: node2), [node1])
|
||||
}
|
||||
|
||||
func testBuild() throws {
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "あ", inputStyle: .direct),
|
||||
.init(character: "い", inputStyle: .direct),
|
||||
.init(character: "う", inputStyle: .direct)
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 4) // Root nodes
|
||||
}
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "あ", inputStyle: .direct),
|
||||
.init(character: "か", inputStyle: .direct),
|
||||
.init(character: "う", inputStyle: .direct)
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 5) // Root nodes
|
||||
}
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 3) // Root nodes
|
||||
}
|
||||
do {
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 5) // Root nodes
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "い"}),
|
||||
.init(character: "い", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "t"}),
|
||||
.init(character: "t", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 2), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "s"}),
|
||||
.init(character: "s", displayedTextRange: .range(2, 3), inputElementsRange: .range(2, 3), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 3), correction: .typo)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// ts->taの誤字訂正が存在
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(graph.nodes.count, 6) // Root nodes
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "い"}),
|
||||
.init(character: "い", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 3), correction: .typo)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "ぁ"}),
|
||||
.init(character: "ぁ", displayedTextRange: .range(2, 3), inputElementsRange: .endIndex(4), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// ts->taの誤字訂正は入力方式を跨いだ場合は発火しない
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .direct),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "t"}),
|
||||
.init(character: "t", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
|
||||
)
|
||||
XCTAssertFalse(graph.nodes.contains(.init(character: "た", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 2), correction: .typo)))
|
||||
}
|
||||
do {
|
||||
// tt→っt
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "っ"}),
|
||||
.init(character: "っ", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "t"}),
|
||||
.init(character: "t", displayedTextRange: .range(1, 2), inputElementsRange: .endIndex(2), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// tt→っt
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "っ"}),
|
||||
.init(character: "っ", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .endIndex(3), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// nt→んt
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "n", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
])
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "ん"}),
|
||||
.init(character: "ん", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)
|
||||
)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(1, 2), inputElementsRange: .endIndex(3), correction: .none)
|
||||
)
|
||||
}
|
||||
do {
|
||||
// t
|
||||
// tt→っt
|
||||
// っts→った (
|
||||
// FIXME: 興味深いテストケースだが実装が重いので保留
|
||||
/*
|
||||
let graph = InputGraph.build(input: [
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
])
|
||||
print(graph)
|
||||
XCTAssertEqual(
|
||||
graph.nodes.first(where: {$0.character == "た"}),
|
||||
.init(character: "た", displayedTextRange: .range(2, 3), inputElementsRange: .endIndex(4), correction: .none)
|
||||
)
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,213 @@
|
||||
//
|
||||
// LookupGraphTests.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2024/02/23.
|
||||
//
|
||||
|
||||
import XCTest
|
||||
import Foundation
|
||||
@testable import KanaKanjiConverterModule
|
||||
|
||||
|
||||
struct LookupGraph {
|
||||
struct Node: Equatable {
|
||||
var charId: UInt8
|
||||
var loudsNodeIndices: Set<Int> = []
|
||||
var displayedTextRange: InputGraphStructure.Range
|
||||
var inputElementsRange: InputGraphStructure.Range
|
||||
var correction: InputGraph.Correction = .none
|
||||
}
|
||||
|
||||
var nodes: [Node] = [
|
||||
// root node
|
||||
Node(charId: 0x00, displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
|
||||
]
|
||||
|
||||
var structure: InputGraphStructure = InputGraphStructure()
|
||||
|
||||
var root: Node {
|
||||
nodes[0]
|
||||
}
|
||||
|
||||
func nextIndices(for node: Node) -> IndexSet {
|
||||
self.structure.nextIndices(
|
||||
displayedTextEndIndex: node.displayedTextRange.endIndex,
|
||||
inputElementsEndIndex: node.inputElementsRange.endIndex
|
||||
)
|
||||
}
|
||||
|
||||
func next(for node: Node) -> [Node] {
|
||||
nextIndices(for: node).map{ self.nodes[$0] }
|
||||
}
|
||||
|
||||
func prevIndices(for node: Node) -> IndexSet {
|
||||
self.structure.prevIndices(
|
||||
displayedTextStartIndex: node.displayedTextRange.startIndex,
|
||||
inputElementsStartIndex: node.inputElementsRange.startIndex
|
||||
)
|
||||
}
|
||||
|
||||
func prev(for node: Node) -> [Node] {
|
||||
prevIndices(for: node).map{ self.nodes[$0] }
|
||||
}
|
||||
|
||||
mutating func remove(at index: Int) {
|
||||
assert(index != 0, "Node at index 0 is root and must not be removed.")
|
||||
self.structure.remove(at: index)
|
||||
}
|
||||
|
||||
mutating func insert(_ node: Node) {
|
||||
self.structure.insert(node, nodes: &self.nodes, displayedTextRange: node.displayedTextRange, inputElementsRange: node.inputElementsRange)
|
||||
}
|
||||
|
||||
static func build(input: InputGraph, character2CharId: (Character) -> UInt8) -> Self {
|
||||
let nodes = input.nodes.map {
|
||||
Node(charId: character2CharId($0.character), displayedTextRange: $0.displayedTextRange, inputElementsRange: $0.inputElementsRange, correction: $0.correction)
|
||||
}
|
||||
return Self(nodes: nodes, structure: input.structure)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extension LOUDS {
|
||||
func byfixNodeIndices(_ inputGraph: LookupGraph) -> (IndexSet, [Int: Set<Int>]) {
|
||||
var indexSet = IndexSet(integer: 1)
|
||||
// loudsのノードとLookupGraphのノードの対応を取るための辞書
|
||||
var loudsNodeIndex2GraphNodeIndices: [Int: Set<Int>] = [:]
|
||||
typealias SearchItem = (
|
||||
node: LookupGraph.Node,
|
||||
nodeIndex: Int,
|
||||
lastLoudsNodeIndex: Int
|
||||
)
|
||||
var stack: [SearchItem] = inputGraph.nextIndices(for: inputGraph.root).map { (inputGraph.nodes[$0], $0, 1) }
|
||||
while let (cNode, cNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
|
||||
// nextNodesを探索
|
||||
if let loudsNodeIndex = self.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
|
||||
loudsNodeIndex2GraphNodeIndices[loudsNodeIndex, default: []].insert(cNodeIndex)
|
||||
indexSet.insert(loudsNodeIndex)
|
||||
stack.append(contentsOf: inputGraph.nextIndices(for: cNode).map { (inputGraph.nodes[$0], $0, loudsNodeIndex) })
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return (indexSet, loudsNodeIndex2GraphNodeIndices)
|
||||
}
|
||||
}
|
||||
|
||||
extension DicdataStore {
|
||||
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [Int: [DicdataElement]] {
|
||||
// split = 2048
|
||||
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
|
||||
var data: [Int: [DicdataElement]] = [:]
|
||||
for (key, value) in dict {
|
||||
// FIXME: use local value
|
||||
data.merge(LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: option)) {
|
||||
$0 + $1
|
||||
}
|
||||
}
|
||||
return data
|
||||
}
|
||||
}
|
||||
|
||||
final class LookupGraphTests: XCTestCase {
|
||||
static var resourceURL = Bundle.module.resourceURL!.standardizedFileURL.appendingPathComponent("DictionaryMock", isDirectory: true)
|
||||
func requestOptions() -> ConvertRequestOptions {
|
||||
var options: ConvertRequestOptions = .default
|
||||
options.dictionaryResourceURL = Self.resourceURL
|
||||
return options
|
||||
}
|
||||
|
||||
func loadCharIDs() -> [Character: UInt8] {
|
||||
do {
|
||||
let string = try String(contentsOf: Self.resourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false), encoding: String.Encoding.utf8)
|
||||
return [Character: UInt8](uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
|
||||
} catch {
|
||||
print("ファイルが見つかりませんでした")
|
||||
return [:]
|
||||
}
|
||||
}
|
||||
|
||||
func testByfixNodeIndices() throws {
|
||||
let dicdataStore = DicdataStore(requestOptions: requestOptions())
|
||||
let charIDs = loadCharIDs()
|
||||
let louds = LOUDS.load("シ", option: requestOptions())
|
||||
XCTAssertNotNil(louds)
|
||||
guard let louds else { return }
|
||||
do {
|
||||
let inputGraph = InputGraph.build(input: [
|
||||
.init(character: "し", inputStyle: .direct),
|
||||
.init(character: "か", inputStyle: .direct),
|
||||
.init(character: "い", inputStyle: .direct),
|
||||
])
|
||||
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: {charIDs[$0.toKatakana()] ?? 0x00})
|
||||
let (loudsNodeIndices, loudsNodeIndex2GraphNodeIndices) = louds.byfixNodeIndices(lookupGraph)
|
||||
let dicdataWithIndex: [Int: [DicdataElement]] = dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: loudsNodeIndices, option: requestOptions())
|
||||
let dicdata = dicdataWithIndex.values.flatMapSet { $0 }
|
||||
// シ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死"})
|
||||
// シカ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "鹿"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "歯科"})
|
||||
// シガ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "滋賀"})
|
||||
// シカイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "司会"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "視界"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死界"})
|
||||
// シガイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "市外"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "市街"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死骸"})
|
||||
}
|
||||
do {
|
||||
// ts -> ta
|
||||
let inputGraph = InputGraph.build(input: [
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
])
|
||||
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: {charIDs[$0.toKatakana()] ?? 0x00})
|
||||
let (loudsNodeIndices, loudsNodeIndex2GraphNodeIndices) = louds.byfixNodeIndices(lookupGraph)
|
||||
let dicdataWithIndex: [Int: [DicdataElement]] = dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: loudsNodeIndices, option: requestOptions())
|
||||
let dicdata = dicdataWithIndex.values.flatMapSet { $0 }
|
||||
// シ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死"})
|
||||
// [シツ]ィ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "質"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "室"})
|
||||
// シタ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "下"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "舌"})
|
||||
// シタイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死体"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "肢体"})
|
||||
}
|
||||
do {
|
||||
// 「しっ」の候補が存在するかどうかを確認
|
||||
let inputGraph = InputGraph.build(input: [
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
])
|
||||
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: {charIDs[$0.toKatakana()] ?? 0x00})
|
||||
let (loudsNodeIndices, loudsNodeIndex2GraphNodeIndices) = louds.byfixNodeIndices(lookupGraph)
|
||||
let dicdataWithIndex: [Int: [DicdataElement]] = dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: loudsNodeIndices, option: requestOptions())
|
||||
let dicdata = dicdataWithIndex.values.flatMapSet { $0 }
|
||||
// シ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死"})
|
||||
// シッ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "知っ"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "しっ"})
|
||||
// シッタ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "叱咤"})
|
||||
// シッタイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "失態"})
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
//
|
||||
// ReplacePrefixTree.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2024/02/23.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
@testable import KanaKanjiConverterModule
|
||||
import XCTest
|
||||
|
||||
// 置換のためのprefix tree
|
||||
enum ReplacePrefixTree {
|
||||
static var characterNodes: [InputGraph.InputStyle.ID: [Character: [Node]]] = [:]
|
||||
|
||||
final class Node {
|
||||
init(_ children: [Character: Node] = [:], character: Character = "\0", value: String? = nil, parent: Node? = nil) {
|
||||
self.children = children
|
||||
self.value = value
|
||||
self.character = character
|
||||
self.parent = parent
|
||||
}
|
||||
var parent: Node?
|
||||
var children: [Character: Node] = [:]
|
||||
var character: Character
|
||||
var value: String?
|
||||
func find(key: Character) -> Node? {
|
||||
return children[key]
|
||||
}
|
||||
func insert(route: some Collection<Character>, value: consuming String, inputStyle: InputGraph.InputStyle.ID) {
|
||||
if let first = route.first {
|
||||
if let tree = self.children[first] {
|
||||
tree.insert(route: route.dropFirst(), value: consume value, inputStyle: inputStyle)
|
||||
} else {
|
||||
let tree = Node(character: first, parent: self)
|
||||
tree.insert(route: route.dropFirst(), value: consume value, inputStyle: inputStyle)
|
||||
self.children[first] = tree
|
||||
ReplacePrefixTree.characterNodes[inputStyle, default: [:]][first, default: []].append(tree)
|
||||
}
|
||||
} else {
|
||||
self.value = consume value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static let roman2kana: Node = {
|
||||
var tree = Node()
|
||||
for item in KanaKanjiConverterModule.Roman2Kana.hiraganaChanges {
|
||||
tree.insert(route: item.key, value: String(item.value), inputStyle: .systemRomanKana)
|
||||
}
|
||||
// additionals
|
||||
for item in ["bb", "cc", "dd", "ff", "gg", "hh", "jj", "kk", "ll", "mm", "pp", "qq", "rr", "ss", "tt", "vv", "ww", "xx", "yy", "zz"] {
|
||||
tree.insert(route: Array(item), value: "っ" + String(item.last!), inputStyle: .systemRomanKana)
|
||||
}
|
||||
// additionals
|
||||
for item in ["nb", "nc", "nd", "nf", "ng", "nh", "nj", "nk", "nl", "nm", "np", "nq", "nr", "ns", "nt", "nv", "nw", "nx", "nz"] {
|
||||
tree.insert(route: Array(item), value: "ん" + String(item.last!), inputStyle: .systemRomanKana)
|
||||
}
|
||||
return tree
|
||||
}()
|
||||
static let direct: Node = Node()
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
//
|
||||
// extension Kana2Kanji+InputGraph.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2024/02/23.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
@testable import KanaKanjiConverterModule
|
||||
|
||||
|
||||
extension Kana2Kanji {
|
||||
func kana2lattice_all(_ inputData: InputGraph, N_best: Int) {
|
||||
// 辞書ルックアップ
|
||||
|
||||
// 変換
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -4,125 +4,3 @@
|
||||
//
|
||||
// Created by miwa on 2024/02/22.
|
||||
//
|
||||
|
||||
import XCTest
|
||||
import Foundation
|
||||
@testable import KanaKanjiConverterModule
|
||||
|
||||
extension LOUDS {
|
||||
func byfixNodeIndices(_ inputGraph: InputGraph, char2id: (Character) -> UInt8?) -> IndexSet {
|
||||
var indexSet = IndexSet(integer: 1)
|
||||
typealias SearchItem = (
|
||||
node: InputGraph.Node,
|
||||
lastNodeIndex: Int
|
||||
)
|
||||
var stack: [SearchItem] = inputGraph.next(for: inputGraph.root).map { ($0, 1) }
|
||||
while let (cNode, cNodeIndex) = stack.popLast() {
|
||||
// nextNodesを探索
|
||||
if let charId = char2id(cNode.character), let nodeIndex = self.searchCharNodeIndex(from: cNodeIndex, char: charId) {
|
||||
indexSet.insert(nodeIndex)
|
||||
stack.append(contentsOf: inputGraph.next(for: cNode).map { ($0, nodeIndex) })
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return indexSet
|
||||
}
|
||||
}
|
||||
|
||||
final class InputGraphBasedLOUDSTests: XCTestCase {
|
||||
static var resourceURL = Bundle.module.resourceURL!.standardizedFileURL.appendingPathComponent("DictionaryMock", isDirectory: true)
|
||||
func requestOptions() -> ConvertRequestOptions {
|
||||
var options: ConvertRequestOptions = .default
|
||||
options.dictionaryResourceURL = Self.resourceURL
|
||||
return options
|
||||
}
|
||||
|
||||
func loadCharIDs() -> [Character: UInt8] {
|
||||
do {
|
||||
let string = try String(contentsOf: Self.resourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false), encoding: String.Encoding.utf8)
|
||||
return [Character: UInt8](uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
|
||||
} catch {
|
||||
print("ファイルが見つかりませんでした")
|
||||
return [:]
|
||||
}
|
||||
}
|
||||
|
||||
func testByfixNodeIndices() throws {
|
||||
let dicdataStore = DicdataStore(requestOptions: requestOptions())
|
||||
let charIDs = loadCharIDs()
|
||||
let louds = LOUDS.load("シ", option: requestOptions())
|
||||
XCTAssertNotNil(louds)
|
||||
guard let louds else { return }
|
||||
do {
|
||||
let inputGraph = InputGraph.build(input: [
|
||||
.init(character: "し", inputStyle: .direct),
|
||||
.init(character: "か", inputStyle: .direct),
|
||||
.init(character: "い", inputStyle: .direct),
|
||||
])
|
||||
let nodeIndices = louds.byfixNodeIndices(inputGraph, char2id: {charIDs[$0.toKatakana()]})
|
||||
let dicdata: [DicdataElement] = dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: nodeIndices)
|
||||
// シ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死"})
|
||||
// シカ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "鹿"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "歯科"})
|
||||
// シガ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "滋賀"})
|
||||
// シカイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "司会"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "視界"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死界"})
|
||||
// シガイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "市外"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "市街"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死骸"})
|
||||
}
|
||||
do {
|
||||
// ts -> ta
|
||||
let inputGraph = InputGraph.build(input: [
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
])
|
||||
let nodeIndices = louds.byfixNodeIndices(inputGraph, char2id: {charIDs[$0.toKatakana()]})
|
||||
let dicdata: [DicdataElement] = dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: nodeIndices)
|
||||
// シ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死"})
|
||||
// [シツ]ィ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "質"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "室"})
|
||||
// シタ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "下"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "舌"})
|
||||
// シタイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死体"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "肢体"})
|
||||
}
|
||||
do {
|
||||
// 「しっ」の候補が存在するかどうかを確認
|
||||
let inputGraph = InputGraph.build(input: [
|
||||
.init(character: "s", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "t", inputStyle: .roman2kana),
|
||||
.init(character: "a", inputStyle: .roman2kana),
|
||||
.init(character: "i", inputStyle: .roman2kana),
|
||||
])
|
||||
print(inputGraph)
|
||||
let nodeIndices = louds.byfixNodeIndices(inputGraph, char2id: {return charIDs[$0.toKatakana()]})
|
||||
let dicdata: [DicdataElement] = dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: nodeIndices)
|
||||
// シ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "死"})
|
||||
// シッ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "知っ"})
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "しっ"})
|
||||
// シッタ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "叱咤"})
|
||||
// シッタイ
|
||||
XCTAssertTrue(dicdata.contains {$0.word == "失態"})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user