From ebaa31721cc0777332ea37eeadca22e06546554e Mon Sep 17 00:00:00 2001 From: ensan-hcl Date: Sat, 8 Mar 2025 18:17:21 +0900 Subject: [PATCH] refactor: clear Graph-based input impls --- .../ExperimentalTests/InputGraph.md | 94 ----- .../InputGraph/ComposingTextV2.swift | 192 --------- .../InputGraph/ConvertGraph.swift | 383 ----------------- .../InputGraph/CorrectGraph.swift | 167 -------- .../InputGraph/CorrectGraphTests.swift | 106 ----- .../InputGraph/CorrectSuffixTree.swift | 102 ----- .../InputGraph/InputGraph.swift | 253 ----------- .../InputGraph/InputGraphComponents.swift | 125 ------ .../InputGraph/InputGraphTests.swift | 394 ------------------ .../InputGraph/LookupGraph.swift | 279 ------------- .../InputGraph/LookupGraphTests.swift | 307 -------------- .../InputGraph/ReplaceSuffixTree.swift | 73 ---- .../ExperimentalTests/README.md | 3 - .../extension Kana2Kanji+InputGraph.swift | 367 ---------------- 14 files changed, 2845 deletions(-) delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph.md delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ComposingTextV2.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ConvertGraph.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraph.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraphTests.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectSuffixTree.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraph.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphComponents.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphTests.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraph.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraphTests.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ReplaceSuffixTree.swift delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/README.md delete mode 100644 Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/extension Kana2Kanji+InputGraph.swift diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph.md b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph.md deleted file mode 100644 index 99d28f9..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph.md +++ /dev/null @@ -1,94 +0,0 @@ -# グラフによる変換 - -`InputGraph`では、ユーザ入力をグラフとして管理する方式による変換を実装している。 - -## 背景 - -従来、azooKeyではユーザ入力を`ComposingText`で管理し、内部的には`[InputElement]`という操作列でユーザ入力を表し、これを変換アルゴリズムでも利用していた。 - -### ローマ字入力の問題 - -ローマ字入力では「ka→か」のような置換が発生するため、ユーザ操作列を変換アルゴリズム側で利用することは本来的に不可能である。このため、従来実装では「ka→か」の置換を行った仮名文字列を生成後、その仮名文字列が`[InputElement]`のどのrangeに対応するかを記録していた。 - -様々なアドホックな制約を加えることで、この方法はおおよそうまく行くが「tt→っt」のようなケースは対応困難である。なぜなら「っt」という単位はその後さらに「ta→た」などの方法で再度置換されるため、対応rangeという考え方が噛み合わない。 - -そこで従来実装ではこのケースを諦め、`[InputElement]`の時点で「tt」を「っt」と置換し、操作列そのものを歪めることで多段階置換を防ぐことにした。これはおおよそ妥当な実装だが、これにより「attack」のようなローマ字列を入力した際の英単語変換が動作しなくなった。 - -また、ローマ字入力ロジックに強く結びついた実装であるため、カスタムローマ字かな変換のサポートなども困難になっていた。 - -### 誤字訂正の問題 - -また、azooKeyは誤字訂正のサポートを持っているが、これにも問題があった。前述の通り、従来実装では`[InputElement]`の範囲に対して仮名文字列を生成し、それを用いて変換する方式をとっている。 - -誤字訂正はこの仮名文字列生成時に同時に実施している。具体的には、例えば「か」という仮名が出てきたら「が」を追加する。これはグラフベースの手法によって逐次的に構成するが、最終的には訂正候補の文字列がいくつも列挙される。例えば「たいかくせい」と入力した場合、`た`, `か`, `く`, `せ`の4箇所で濁点がつきうるので、2⁴=16候補が仮名文字列として生成される。 - -組合せ爆発を防ぐため、実際には3箇所のみを置換しうると制約しているものの、「は→ば・ぱ」のように2つの候補が存在する候補もある。範囲は最大20文字であるため、$_{20}{\textrm C}_{3} \times 27 = 30780$通りの文字列が最悪ケースで生成される(本当か?)。3万通りの候補の列挙自体も去ることながら、これだけの数の文字列が生成されるとパフォーマンスへの悪影響が非常に大きい。 - -## 新実装 - -これら2つの問題を同時に解決する手法として、グラフベースの入力管理システムを構築する。 - -基本的なアイデアは、操作列に対する誤字訂正とローマ字かな変換をグラフ上で実施することである。新実装では次のような多段階のグラフ変換を通して変換を実施する。 - -```mermaid -graph LR - ComposingText --> CorrectGraph - CorrectGraph --> InputGraph - InputGraph --> LookupGraph - LookupGraph --> ConvertGraph - -``` - -`CorrectGraph`は誤字訂正を実施するグラフである。例えば、`ComposingText`が「たいかくせい」の場合、次のようになる。 - -```mermaid -graph LR - BOS --> た - BOS --> だ - た --> い1[い] - だ --> い1 - い1 --> か - い1 --> が - か --> く - が --> く - か --> ぐ - が --> ぐ - く --> せ - ぐ --> せ - く --> ぜ - ぐ --> ぜ - せ --> い2[い] - ぜ --> い2 -``` - -ローマ字入力で「itsi」の場合、次のようになる。ただし「ts→ta」の訂正ルールが存在するとする。 - -```mermaid -graph LR - BOS --> i1[i] - i1 --> t1[t] - t1 --> s - s --> i2[i] - i1 --> t2[t] - t2 --> a - a --> i2 -``` - -`InputGraph`は`CorrectGraph`に対して置換ルールを適用する。この置換ルールはローマ字かな変換ルールを一般化したものである。この一般化した置換ルールを扱うことによって、カスタム置換ルールへの道が開ける。 - -上記の「itsi」の例に対しては、次のようなグラフが構築される。フリック入力の「たいかくせい」の場合は置換ルールが存在しないため同型である。 - -```mermaid -graph LR - BOS --> い1[い] - い1 --> つ - つ --> ぃ - い1 --> た - た --> い -``` - -この時点で、ローマ字かな変換は実施しているが、誤字訂正後の文字列全体を生成することは行なっていない。従来実装では辞書引きのためにこれを文字列に戻す必要があったが、新実装ではこのまま`LookupGraph`に移る。 - -`LookupGraph`はこのグラフ構造を保ったまま辞書引きを実施する。AzooKeyKanaKanjiConverterが辞書データの構造に利用しているトライ(prefix tree)は接頭辞検索が定数時間で行える。そこで、それぞれのノードから次ノードに遷移しつつ辞書引きを進めることにより、グラフ構造から明示的に文字列を生成する必要がなくなる。さらに、「だいがぐ」のような接頭辞検索で一件もヒットしない文字列を生成した場合、その時点で辞書引きが停止するため、不要な検索を減らすことができる。 - -最後に、`ConvertGraph`を`LookupGraph`に対して構築し、この上でViterbiサーチを実行することで変換を進める。このアルゴリズムは通常のViterbiサーチに対して直感的な拡張として実装できる。 diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ComposingTextV2.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ComposingTextV2.swift deleted file mode 100644 index 610387b..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ComposingTextV2.swift +++ /dev/null @@ -1,192 +0,0 @@ -// -// ComposingTextV2.swift -// -// -// Created by miwa on 2024/04/08. -// - -import Foundation - -struct ComposingTextV2: Hashable, Sendable { - init() { - self.input = [] - self.convertTarget = "" - self.cursorPosition = 0 - } - - var input: [InputElement] - var convertTarget: String - var cursorPosition: Int - - struct InputElement: Equatable, Hashable { - var value: Character - var inputStyle: InputGraphInputStyle.ID - } - - mutating func append(_ element: InputElement) { - self.input.append(element) - self.convertTarget = Self.buildConvertTarget(input) - } - - mutating func append(_ string: String, inputStyle: InputGraphInputStyle.ID) { - self.input.append(contentsOf: string.map {.init(value: $0, inputStyle: inputStyle)}) - self.convertTarget = Self.buildConvertTarget(input) - } - - mutating func removeLast(_ k: Int = 1) { - let rest = self.convertTarget.dropLast(k) - typealias Item = (value: String, inputStyle: InputGraphInputStyle.ID) - var result: [Item] = [] - var maxSuccess = (index: -1, string: "") - for elementIndex in input.indices { - let element = input[elementIndex] - if let last = result.last { - if last.inputStyle.isCompatible(with: element.inputStyle) { - // 一旦inputStyleは継承することにする - result[result.endIndex - 1].value.append(element.value) - } else { - result.append((String(element.value), element.inputStyle)) - } - } else { - result.append((String(element.value), element.inputStyle)) - } - - // 置換適用 - var node = InputGraphInputStyle.init(from: element.inputStyle).replaceSuffixTree - let value = result[result.endIndex - 1].value - var maxMatch = (count: 0, replace: "") - var count = 0 - var stack = Array(value) - while let c = stack.popLast(), let nextNode = node.find(key: c) { - count += 1 - if let replace = nextNode.value { - maxMatch = (count, replace) - } - node = nextNode - } - if maxMatch.count > 0 { - result[result.endIndex - 1].value.removeLast(maxMatch.count) - result[result.endIndex - 1].value.append(contentsOf: maxMatch.replace) - } - let current = result.reduce(into: "") { $0.append(contentsOf: $1.value) } - if rest.hasPrefix(current) { - maxSuccess = (elementIndex, current) - } - } - self.input = Array(self.input.prefix(maxSuccess.index + 1)) - self.input.append(contentsOf: rest.dropFirst(maxSuccess.string.count).map { .init(value: $0, inputStyle: .none) }) - self.convertTarget = String(rest) - } - - static func buildConvertTarget(_ input: [InputElement]) -> String { - typealias Item = (value: String, inputStyle: InputGraphInputStyle.ID) - var result: [Item] = [] - for element in input { - if let last = result.last { - if last.inputStyle.isCompatible(with: element.inputStyle) { - // 一旦inputStyleは継承することにする - result[result.endIndex - 1].value.append(element.value) - } else { - result.append((String(element.value), element.inputStyle)) - } - } else { - result.append((String(element.value), element.inputStyle)) - } - - // 置換適用 - var node = InputGraphInputStyle.init(from: element.inputStyle).replaceSuffixTree - let value = result[result.endIndex - 1].value - var maxMatch = (count: 0, replace: "") - var count = 0 - var stack = Array(value) - while let c = stack.popLast(), let nextNode = node.find(key: c) { - count += 1 - if let replace = nextNode.value { - maxMatch = (count, replace) - } - node = nextNode - } - if maxMatch.count > 0 { - result[result.endIndex - 1].value.removeLast(maxMatch.count) - result[result.endIndex - 1].value.append(contentsOf: maxMatch.replace) - } - } - return result.reduce(into: "") { $0.append(contentsOf: $1.value) } - } -} - - -import XCTest - -class ComposingTextV2Test: XCTestCase { - func testAppend() throws { - var c = ComposingTextV2() - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - XCTAssertEqual(c.convertTarget, "あ") - c.append(.init(value: "t", inputStyle: .systemRomanKana)) - XCTAssertEqual(c.convertTarget, "あt") - c.append(.init(value: "t", inputStyle: .systemRomanKana)) - XCTAssertEqual(c.convertTarget, "あっt") - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - XCTAssertEqual(c.convertTarget, "あった") - } - func testDelete_ata() throws { - var c = ComposingTextV2() - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.append(.init(value: "t", inputStyle: .systemRomanKana)) - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.removeLast() - XCTAssertEqual(c.convertTarget, "あ") - } - func testDelete_asha() throws { - var c = ComposingTextV2() - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.append(.init(value: "s", inputStyle: .systemRomanKana)) - c.append(.init(value: "h", inputStyle: .systemRomanKana)) - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.removeLast() - XCTAssertEqual(c.convertTarget, "あし") - XCTAssertEqual(c.input.count, 2) - XCTAssertEqual(c.input[0], .init(value: "a", inputStyle: .systemRomanKana)) - XCTAssertEqual(c.input[1], .init(value: "し", inputStyle: .none)) - } - func testDelete_atta() throws { - var c = ComposingTextV2() - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.append(.init(value: "t", inputStyle: .systemRomanKana)) - c.append(.init(value: "t", inputStyle: .systemRomanKana)) - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.removeLast() - XCTAssertEqual(c.convertTarget, "あっ") - XCTAssertEqual(c.input.count, 2) - XCTAssertEqual(c.input[0], .init(value: "a", inputStyle: .systemRomanKana)) - XCTAssertEqual(c.input[1], .init(value: "っ", inputStyle: .none)) - } - func testDelete_aita() throws { - var c = ComposingTextV2() - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.append(.init(value: "i", inputStyle: .systemRomanKana)) - c.append(.init(value: "t", inputStyle: .systemRomanKana)) - c.append(.init(value: "a", inputStyle: .systemRomanKana)) - c.removeLast() - XCTAssertEqual(c.convertTarget, "あい") - XCTAssertEqual(c.input.count, 2) - XCTAssertEqual(c.input[0], .init(value: "a", inputStyle: .systemRomanKana)) - XCTAssertEqual(c.input[1], .init(value: "i", inputStyle: .systemRomanKana)) - } - func testBuildConvertTarget() throws { - XCTAssertEqual(ComposingTextV2.buildConvertTarget([.init(value: "a", inputStyle: .systemRomanKana)]), "あ") - XCTAssertEqual(ComposingTextV2.buildConvertTarget([.init(value: "t", inputStyle: .systemRomanKana)]), "t") - XCTAssertEqual( - ComposingTextV2.buildConvertTarget( - [ - .init(value: "a", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ] - ), - "あった" - ) - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ConvertGraph.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ConvertGraph.swift deleted file mode 100644 index 925f7a9..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ConvertGraph.swift +++ /dev/null @@ -1,383 +0,0 @@ -// -// ConvertGraph.swift -// -// -// Created by miwa on 2024/02/23. -// - -import XCTest -import Foundation -@testable import KanaKanjiConverterModule - -struct ConvertGraph { - struct Node { - var value: Character - var latticeNodes: [LatticeNode] - var inputElementsRange: InputGraphRange - var correction: CorrectGraph.Correction = .none - } - - var nodes: [Node] = [ - // root node - Node(value: "\0", latticeNodes: [], inputElementsRange: .endIndex(0)) - ] - - /// 許可されたNextIndex - var allowedNextIndex: [Int: IndexSet] = [:] - /// 許可されたprevIndex - var allowedPrevIndex: [Int: IndexSet] = [:] - - init(input: LookupGraph, nodeIndex2LatticeNode: [Int: [LatticeNode]]) { - let nodes = input.nodes.enumerated().map { (index, node) in - Node( - value: node.character, - latticeNodes: nodeIndex2LatticeNode[index, default: []], - inputElementsRange: node.inputElementsRange, - correction: node.correction - ) - } - self.nodes = nodes - self.allowedPrevIndex = input.allowedPrevIndex - self.allowedNextIndex = input.allowedNextIndex - } -} - -extension ConvertGraph { - /// ラティスのノード。これを用いて計算する。 - final class LatticeNode: CustomStringConvertible { - /// このノードが保持する辞書データ - public let data: DicdataElement - /// このDicdataElementを作った際、終端に対応したConvertGraphのノードのindex - var endNodeIndex: Int - /// このノードの前に来ているノード。`N_best`の分だけ保存する - var prevs: [RegisteredNode] = [] - /// `prevs`の各要素に対応するスコアのデータ - var values: [PValue] = [] - var inputElementsRange: InputGraphRange - - /// `EOS`に対応するノード。 - static var EOSNode: LatticeNode { - LatticeNode(data: DicdataElement.EOSData, endNodeIndex: 0, inputElementsRange: .unknown) - } - - init(data: DicdataElement, endNodeIndex: Int, inputElementsRange: InputGraphRange, prevs: [RegisteredNode] = []) { - self.data = data - self.values = [] - self.endNodeIndex = endNodeIndex - self.inputElementsRange = inputElementsRange - self.prevs = prevs - } - - /// `LatticeNode`の持っている情報を反映した`RegisteredNode`を作成する - /// `LatticeNode`は複数の過去のノードを持つことができるが、`RegisteredNode`は1つしか持たない。 - func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode { - // FIXME: 適当に実装した - RegisteredNode( - data: self.data, - registered: self.prevs[index], - totalValue: value, - inputElementsRange: self.inputElementsRange - ) - } - - var description: String { - "LatticeNode(data: \(data), ...)" - } - } - struct RegisteredNode: RegisteredNodeProtocol { - /// このノードが保持する辞書データ - let data: DicdataElement - /// 1つ前のノードのデータ - let prev: (any RegisteredNodeProtocol)? - /// 始点からこのノードまでのコスト - let totalValue: PValue - /// inputData.input内のrange - var inputElementsRange: InputGraphRange - - init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, inputElementsRange: InputGraphRange) { - self.data = data - self.prev = registered - self.totalValue = totalValue - self.inputElementsRange = inputElementsRange - } - - /// 始点ノードを生成する関数 - /// - Returns: 始点ノードのデータ - static func BOSNode() -> RegisteredNode { - RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, inputElementsRange: .endIndex(0)) - } - } - -} - -/// `struct`の`RegisteredNode`を再帰的に所持できるようにするため、Existential Typeで抽象化する。 -/// - Note: `indirect enum`との比較はまだやっていない。 -protocol RegisteredNodeProtocol { - var data: DicdataElement {get} - var prev: (any RegisteredNodeProtocol)? {get} - var totalValue: PValue {get} - var inputElementsRange: InputGraphRange {get} -} - -extension ConvertGraph { - func convertAll(option: borrowing ConvertRequestOptions, dicdataStore: DicdataStore) -> LatticeNode { - let result: LatticeNode = LatticeNode.EOSNode - result.inputElementsRange = .init(startIndex: self.nodes.compactMap {$0.inputElementsRange.endIndex}.max(), endIndex: nil) - var processStack = Array(self.nodes.enumerated().reversed()) - var processedIndices: IndexSet = [0] // root - var invalidIndices: IndexSet = [] - while let (i, graphNode) = processStack.popLast() { - // 処理済みなら無視する - guard !processedIndices.contains(i), !invalidIndices.contains(i) else { - continue - } - // 全てのprevNodeが処理済みか確かめる - let prevIndices = self.allowedPrevIndex[i, default: []] - guard !prevIndices.isEmpty else { - // 空の場合は無視して次へ - invalidIndices.insert(i) - continue - } - - var unprocessedPrevs: [(Int, Node)] = [] - for prevIndex in prevIndices { - if !processedIndices.contains(prevIndex) && !invalidIndices.contains(prevIndex) { - unprocessedPrevs.append((prevIndex, self.nodes[prevIndex])) - } - } - // 未処理のprevNodeがある場合、それらをstackの末尾に追加してもう一度やり直す - guard unprocessedPrevs.isEmpty else { - processStack.append((i, graphNode)) - processStack.append(contentsOf: unprocessedPrevs) - continue - } - print(i, graphNode.inputElementsRange) - processedIndices.insert(i) - // 処理を実施する - for node in graphNode.latticeNodes { - if node.prevs.isEmpty { - continue - } - if dicdataStore.shouldBeRemoved(data: node.data) { - continue - } - // 生起確率を取得する。 - let wValue: PValue = node.data.value() - if i == 0 { - // valuesを更新する - node.values = node.prevs.map {$0.totalValue + wValue + dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} - } else { - // valuesを更新する - node.values = node.prevs.map {$0.totalValue + wValue} - } - // 終端の場合は終了 - if self.allowedNextIndex[node.endNodeIndex, default: []].isEmpty || result.inputElementsRange.startIndex == node.inputElementsRange.endIndex { - for index in node.prevs.indices { - let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index]) - result.prevs.append(newnode) - } - } else { - for nextIndex in self.allowedNextIndex[node.endNodeIndex, default: []] { - // nodeの繋がる次にあり得る全てのnextnodeに対して - for nextnode in self.nodes[nextIndex].latticeNodes { - // この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。 - if dicdataStore.shouldBeRemoved(data: nextnode.data) { - continue - } - // クラスの連続確率を計算する。 - let ccValue: PValue = dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid) - // nodeの持っている全てのprevnodeに対して - for (index, value) in node.values.enumerated() { - let newValue: PValue = ccValue + value - // 追加すべきindexを取得する - let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1 - if lastindex == option.N_best { - continue - } - let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue) - // カウントがオーバーしている場合は除去する - if nextnode.prevs.count >= option.N_best { - nextnode.prevs.removeLast() - } - // removeしてからinsertした方が速い (insertはO(N)なので) - nextnode.prevs.insert(newnode, at: lastindex) - } - } - } - } - } - } - return result - } - - mutating func convertAllDifferential(cacheConvertGraph: ConvertGraph, option: borrowing ConvertRequestOptions, dicdataStore: DicdataStore, lookupGraphMatchInfo: [Int: Int]) -> LatticeNode { - print(lookupGraphMatchInfo) - // 最初にマッチチェックをする - typealias MatchSearchItem = ( - curNodeIndex: Int, - cacheNodeIndex: Int - ) - // BOSはマッチする - var stack: [MatchSearchItem] = [(0, 0)] - var curNodeToCacheNode: [Int: Int] = [:] - do { - var processedIndices: IndexSet = [] - - while let item = stack.popLast() { - if processedIndices.contains(item.curNodeIndex) { - continue - } - let prevIndices = self.allowedPrevIndex[item.curNodeIndex, default: []] - if prevIndices.allSatisfy(processedIndices.contains) { - if prevIndices.allSatisfy(curNodeToCacheNode.keys.contains) { - // マッチする - curNodeToCacheNode[item.curNodeIndex] = item.cacheNodeIndex - // 子ノードを足す - for nextNodeIndex in self.allowedNextIndex[item.curNodeIndex, default: []] { - let nextNode = self.nodes[nextNodeIndex] - if let cacheNodeIndex = cacheConvertGraph.allowedNextIndex[item.cacheNodeIndex, default: []].first(where: { - cacheConvertGraph.nodes[$0].value == nextNode.value - }) { - stack.append((nextNodeIndex, cacheNodeIndex)) - } - } - } else { - // マッチしない - // この場合、このノードのnextNodeはどの道マッチしないので、探索済みとしても問題ない - processedIndices.formUnion(self.allowedNextIndex[item.curNodeIndex, default: []]) - } - // 処理済み - processedIndices.insert(item.curNodeIndex) - } else { - // prevNodeの直前に移動する - let restIndices = prevIndices.subtracting(IndexSet(processedIndices)) - let firstIndex = stack.firstIndex(where: { restIndices.contains($0.curNodeIndex) }) ?? 0 - stack.insert(item, at: firstIndex) - } - } - } - let lookupGraphCacheNodeToCurNode = Dictionary(lookupGraphMatchInfo.map {(k, v) in (v, k)}, uniquingKeysWith: { (k1, _) in k1 }) - struct HashablePair: Hashable { - init(_ first: T1, _ second: T2) { - self.first = first - self.second = second - } - var first: T1 - var second: T2 - } - // 得たマッチ情報を使ってselfを部分的に構築する - print("curNodeToCacheNode", curNodeToCacheNode) - for (curNodeIndex, cacheNodeIndex) in curNodeToCacheNode { - self.nodes[curNodeIndex].latticeNodes.removeAll { - lookupGraphMatchInfo.keys.contains($0.endNodeIndex) - } - cacheConvertGraph.nodes[cacheNodeIndex].latticeNodes.forEach { - if let e = lookupGraphCacheNodeToCurNode[$0.endNodeIndex] { - $0.endNodeIndex = e - self.nodes[curNodeIndex].latticeNodes.append($0) - } - } - } - - // 構築していない部分を触る - let result: LatticeNode = LatticeNode.EOSNode - result.inputElementsRange = .init(startIndex: self.nodes.compactMap {$0.inputElementsRange.endIndex}.max(), endIndex: nil) - var processStack = Array(self.nodes.enumerated().reversed()) - var processedIndices: IndexSet = [0] // root - var invalidIndices: IndexSet = [] - while let (i, graphNode) = processStack.popLast() { - // 処理済みなら無視する - guard !processedIndices.contains(i), !invalidIndices.contains(i) else { - continue - } - // 全てのprevNodeが処理済みか確かめる - let prevIndices = self.allowedPrevIndex[i, default: []] - guard !prevIndices.isEmpty else { - // 空の場合は無視して次へ - invalidIndices.insert(i) - continue - } - - var unprocessedPrevs: Set = [] - for prevIndex in prevIndices { - if !processedIndices.contains(prevIndex) && !invalidIndices.contains(prevIndex) { - unprocessedPrevs.insert(prevIndex) - } - } - // 未処理のprevNodeがある場合、それらをstackの末尾に追加してもう一度やり直す - guard unprocessedPrevs.isEmpty else { - // prevNodeの直前に移動する - let firstIndex = processStack.firstIndex(where: { unprocessedPrevs.contains($0.offset) }) ?? 0 - processStack.insert((i, graphNode), at: firstIndex) - continue - } - print(i, graphNode.inputElementsRange) - processedIndices.insert(i) - let isMatchedGraphNode = curNodeToCacheNode.keys.contains(i) - // 処理を実施する - for node in graphNode.latticeNodes { - if node.prevs.isEmpty { - continue - } - if dicdataStore.shouldBeRemoved(data: node.data) { - continue - } - let isMatched = isMatchedGraphNode && lookupGraphMatchInfo.keys.contains(node.endNodeIndex) - if !isMatched { - // マッチしていない場合、チェックを走らせる - // 生起確率を取得する。 - let wValue: PValue = node.data.value() - node.values = if i == 0 { - // valuesを更新する - node.prevs.map {$0.totalValue + wValue + dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} - } else { - // valuesを更新する - node.prevs.map {$0.totalValue + wValue} - } - } - // 終端の場合は終了 - if self.allowedNextIndex[node.endNodeIndex, default: []].isEmpty || result.inputElementsRange.startIndex == node.inputElementsRange.endIndex { - for index in node.prevs.indices { - let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index]) - result.prevs.append(newnode) - } - } else { - for nextIndex in self.allowedNextIndex[node.endNodeIndex, default: []] { - // 次のノードがマッチしている場合、呼び出しの必要はない - let nextMatchable = curNodeToCacheNode.keys.contains(nextIndex) - // nodeの繋がる次にあり得る全てのnextnodeに対して - for nextnode in self.nodes[nextIndex].latticeNodes { - if nextMatchable && lookupGraphMatchInfo.keys.contains(nextnode.endNodeIndex) { - continue - } - // この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。 - if dicdataStore.shouldBeRemoved(data: nextnode.data) { - continue - } - // クラスの連続確率を計算する。 - let ccValue: PValue = dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid) - // nodeの持っている全てのprevnodeに対して - for (index, value) in node.values.enumerated() { - let newValue: PValue = ccValue + value - // 追加すべきindexを取得する - let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1 - if lastindex == option.N_best { - continue - } - let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue) - // カウントがオーバーしている場合は除去する - if nextnode.prevs.count >= option.N_best { - nextnode.prevs.removeLast() - } - // removeしてからinsertした方が速い (insertはO(N)なので) - nextnode.prevs.insert(newnode, at: lastindex) - } - } - } - } - } - } - return result - } - -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraph.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraph.swift deleted file mode 100644 index 1f6a28a..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraph.swift +++ /dev/null @@ -1,167 +0,0 @@ -// -// CorrectGraph.swift -// -// -// Created by miwa on 2024/02/25. -// - -import Foundation -import KanaKanjiConverterModule - -struct CorrectGraph { - enum Correction: CustomStringConvertible { - /// 訂正ではない - case none - /// 訂正である - case typo - - var isTypo: Bool { - self == .typo - } - - var description: String { - switch self { - case .none: "none" - case .typo: "typo" - } - } - } - - var nodes: [Node] = [ - // BOSノードは最初から追加 - .init(inputElementsRange: .endIndex(0), inputStyle: .all, correction: .none, value: "\0") - ] - /// 許可されたNextIndex - var allowedNextIndex: [Int: IndexSet] = [:] - /// 許可されたprevIndex - var allowedPrevIndex: [Int: IndexSet] = [:] - /// `ComposingText`の`inputs`に対して、それをendIndexとするノードインデックスの集合を返す - var inputIndexToEndNodeIndices: [Int: IndexSet] = [0: IndexSet(integer: 0)] - - struct Node: Equatable, Sendable { - var inputElementsRange: InputGraphRange - var inputStyle: InputGraphInputStyle.ID - var correction: CorrectGraph.Correction - var value: Character - } - - @discardableResult - mutating func insert(_ node: consuming Node, nextTo prevNodeIndexSet: IndexSet) -> Int { - let index = nodes.count - for prevNodeIndex in prevNodeIndexSet { - self.allowedNextIndex[prevNodeIndex, default: IndexSet()].insert(index) - } - self.allowedPrevIndex[index, default: IndexSet()].formUnion(prevNodeIndexSet) - self.nodes.append(consume node) - return index - } - - private mutating func insertConnectedTypoNodes(values: [Character], startIndex: Int, endIndex: Int, inputStyle: InputGraphInputStyle.ID, lastIndexSet: IndexSet) -> (lastIndex: Int, insertedIndexSet: IndexSet) { - guard !values.isEmpty else { - fatalError("values must not be empty") - } - var insertedIndexSet = IndexSet() - var lastIndexSet = lastIndexSet - for (i, c) in zip(values.indices, values) { - let inputElementRange: InputGraphRange = if i == values.startIndex && i+1 == values.endIndex { - .range(startIndex, endIndex) - } else if i == values.startIndex { - .init(startIndex: startIndex, endIndex: nil) - } else if i+1 == values.endIndex { - .init(startIndex: nil, endIndex: endIndex) - } else { - .unknown - } - let node = Node( - inputElementsRange: inputElementRange, - inputStyle: inputStyle, - correction: .typo, - value: c - ) - let nodeIndex = self.insert(node, nextTo: lastIndexSet) - lastIndexSet = IndexSet(integer: nodeIndex) - insertedIndexSet.insert(nodeIndex) - } - return (lastIndexSet.first!, insertedIndexSet) - } - - @discardableResult - mutating func update(with item: ComposingTextV2.InputElement, index: Int, input: [ComposingTextV2.InputElement]) -> IndexSet { - var insertedIndexSet = IndexSet() - // 訂正のない候補を追加 - do { - let nodeIndex = self.insert( - Node( - inputElementsRange: .range(index, index + 1), - inputStyle: input[index].inputStyle, - correction: .none, - value: item.value - ), - nextTo: self.inputIndexToEndNodeIndices[index, default: IndexSet()] - ) - self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex) - insertedIndexSet.insert(nodeIndex) - } - - // 訂正候補を追加 - let correctSuffixTree = InputGraphInputStyle(from: item.inputStyle).correctSuffixTree - typealias SearchItem = ( - node: CorrectSuffixTree.Node, - nextIndex: Int, - routeCount: Int, - inputStyleId: InputGraphInputStyle.ID - ) - var stack: [SearchItem] = [ - (correctSuffixTree, index, 1, .all) - ] - // backward search - while let (cNode, cIndex, cRouteCount, cInputStyleId) = stack.popLast() { - guard cIndex >= input.startIndex else { - continue - } - let inputStyleId = InputGraphInputStyle(from: input[cIndex].inputStyle).id - guard cInputStyleId.isCompatible(with: inputStyleId) else { - continue - } - if let nNode = cNode.find(key: input[cIndex].value) { - stack.append((nNode, cIndex - 1, cRouteCount + 1, inputStyleId)) - for value in nNode.value { - if value.isEmpty { - continue - } else if value.count > 1 { - let (nodeIndex, indexSet) = self.insertConnectedTypoNodes( - values: Array(value), - startIndex: index - cRouteCount + 1, - endIndex: index + 1, - inputStyle: inputStyleId, - lastIndexSet: self.inputIndexToEndNodeIndices[index - cRouteCount + 1, default: IndexSet()] - ) - self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex) - insertedIndexSet.formUnion(indexSet) - } else { - let nodeIndex = self.insert( - Node( - inputElementsRange: .range(index - cRouteCount + 1, index + 1), - inputStyle: inputStyleId, - correction: .typo, - value: value.first! - ), - nextTo: self.inputIndexToEndNodeIndices[index - cRouteCount + 1, default: IndexSet()] - ) - self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex) - insertedIndexSet.insert(nodeIndex) - } - } - } - } - return insertedIndexSet - } - - static func build(input: [ComposingTextV2.InputElement]) -> Self { - var correctGraph = Self() - for (index, item) in zip(input.indices, input) { - correctGraph.update(with: item, index: index, input: input) - } - return correctGraph - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraphTests.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraphTests.swift deleted file mode 100644 index c61e2d0..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectGraphTests.swift +++ /dev/null @@ -1,106 +0,0 @@ -// -// CorrectGraphTests.swift -// -// -// Created by miwa on 2024/02/21. -// - -import Foundation - -@testable import KanaKanjiConverterModule -import XCTest - -final class CorrectGraphTests: XCTestCase { - func testBuildSimpleDirectInput() throws { - let graph = CorrectGraph.build(input: [ - .init(value: "あ", inputStyle: .systemFlickDirect) - ]) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "あ"}), - .init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .none, value: "あ") - ) - } - func testBuildSimpleDirectInputWithTypo() throws { - let graph = CorrectGraph.build(input: [ - .init(value: "か", inputStyle: .systemFlickDirect) - ]) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "か"}), - .init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .none, value: "か") - ) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "が"}), - .init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .typo, value: "が") - ) - } - func testBuildMultipleDirectInputWithTypo() throws { - let graph = CorrectGraph.build(input: [ - .init(value: "あ", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect), - .init(value: "う", inputStyle: .systemFlickDirect) - ]) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "か"}), - .init(inputElementsRange: .range(1, 2), inputStyle: .systemFlickDirect, correction: .none, value: "か") - ) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "が"}), - .init(inputElementsRange: .range(1, 2), inputStyle: .systemFlickDirect, correction: .typo, value: "が") - ) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "う"}), - .init(inputElementsRange: .range(2, 3), inputStyle: .systemFlickDirect, correction: .none, value: "う") - ) - if let index = graph.nodes.firstIndex(where: {$0.value == "う"}) { - XCTAssertEqual(graph.allowedPrevIndex[index, default: .init()].count, 2) - } else { - XCTAssertThrowsError("Should not be nil") - } - } - func testBuildSimpleRomanInput() throws { - let graph = CorrectGraph.build(input: [ - .init(value: "k", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ]) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "k"}), - .init(inputElementsRange: .range(0, 1), inputStyle: .systemRomanKana, correction: .none, value: "k") - ) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "a"}), - .init(inputElementsRange: .range(1, 2), inputStyle: .systemRomanKana, correction: .none, value: "a") - ) - } - func testBuildSimpleRomanInputWithTypo() throws { - let graph = CorrectGraph.build(input: [ - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana) - ]) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "t" && $0.inputElementsRange == .range(0, 1)}), - .init(inputElementsRange: .range(0, 1), inputStyle: .systemRomanKana, correction: .none, value: "t") - ) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "s"}), - .init(inputElementsRange: .range(1, 2), inputStyle: .systemRomanKana, correction: .none, value: "s") - ) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "t" && $0.inputElementsRange == .startIndex(0)}), - .init(inputElementsRange: .startIndex(0), inputStyle: .systemRomanKana, correction: .typo, value: "t") - ) - XCTAssertEqual( - graph.nodes.first(where: {$0.value == "a"}), - .init(inputElementsRange: .endIndex(2), inputStyle: .systemRomanKana, correction: .typo, value: "a") - ) - if let index = graph.nodes.firstIndex(where: {$0.value == "a"}) { - let indices = graph.allowedPrevIndex[index, default: .init()] - XCTAssertEqual(indices.count, 1) - XCTAssertEqual( - indices.first, - graph.nodes.firstIndex(where: {$0.value == "t" && $0.inputElementsRange == .startIndex(0)}) - ) - } else { - XCTAssertThrowsError("Should not be nil") - } - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectSuffixTree.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectSuffixTree.swift deleted file mode 100644 index 442810d..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/CorrectSuffixTree.swift +++ /dev/null @@ -1,102 +0,0 @@ -// -// CorrectSuffixTree.swift -// -// -// Created by miwa on 2024/02/23. -// - -import Foundation - -@testable import KanaKanjiConverterModule -import XCTest - -/// 誤字訂正のためのsuffix tree -enum CorrectSuffixTree { - final class Node { - init(_ children: [Character: Node] = [:], value: [String] = []) { - self.children = children - self.value = value - } - - static func terminal(_ value: [String]) -> Node { - Node(value: value) - } - - static func terminal(_ value: String) -> Node { - Node(value: [value]) - } - - var children: [Character: Node] = [:] - var value: [String] - func find(key: Character) -> Node? { - return children[key] - } - } - - static let roman2kana: Node = { - Node([ - "s": Node([ - "g": .terminal("ga"), - "m": .terminal("ma"), - "t": .terminal("ta"), - "y": .terminal("ya") - ]), - "q": Node([ - "g": .terminal("ga"), - "m": .terminal("ma"), - "t": .terminal("ta"), - "y": .terminal("ya") - ]), - "d": Node([ - "g": .terminal("ge"), - "m": .terminal("me"), - "t": .terminal("te"), - "y": .terminal("ya") - ]), - "r": Node([ - "g": .terminal("ge"), - "m": .terminal("me"), - "t": .terminal("te"), - "y": .terminal("ya") - ]), - "w": Node([ - "g": .terminal("ge"), - "m": .terminal("me"), - "t": .terminal("te"), - "y": .terminal("ya") - ]), - "k": Node([ - "g": .terminal("gi"), - "m": .terminal("mi"), - "t": .terminal("ti"), - "y": .terminal("ya") - ]), - "l": Node([ - "g": .terminal("go"), - "m": .terminal("mo"), - "t": .terminal("to"), - "y": .terminal("ya") - ]), - "p": Node([ - "g": .terminal("go"), - "m": .terminal("mo"), - "t": .terminal("to"), - "y": .terminal("ya") - ]), - "j": Node([ - "g": .terminal("gu"), - "m": .terminal("mu"), - "t": .terminal("tu"), - "y": .terminal("ya") - ]) - ]) - }() - - static let direct: Node = { - Node([ - "か": .terminal(["が"]), - "た": .terminal(["だ"]), - "は": .terminal(["ば", "ぱ"]) - ]) - }() -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraph.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraph.swift deleted file mode 100644 index 361c2c2..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraph.swift +++ /dev/null @@ -1,253 +0,0 @@ -// -// InputGraph.swift -// -// -// Created by miwa on 2024/02/21. -// - -import Foundation -import DequeModule - -@testable import KanaKanjiConverterModule -import XCTest - -struct InputGraph { - struct Node: Equatable, CustomStringConvertible { - var character: Character - var inputElementsRange: InputGraphRange - var correction: CorrectGraph.Correction = .none - - var description: String { - let `is` = inputElementsRange.startIndex?.description ?? "?" - let ie = inputElementsRange.endIndex?.description ?? "?" - return "Node(\"\(character)\", i(\(`is`)..<\(ie)), isTypo: \(correction.isTypo))" - } - } - - var nodes: [Node] = [ - // root node - Node(character: "\0", inputElementsRange: .endIndex(0), correction: .none) - ] - /// 許可されたNextIndex - var allowedNextIndex: [Int: IndexSet] = [:] - /// 許可されたprevIndex - var allowedPrevIndex: [Int: IndexSet] = [:] - /// correctGraphのノード情報 - var nextCorrectNodeIndices: [Int: IndexSet] = [:] - - mutating func update(_ correctGraph: CorrectGraph, nodeIndex: Int) { - let cgNode = correctGraph.nodes[nodeIndex] - // アルゴリズム - // 1. nodeIndexをnextCorrectNodeIndicesに持っているノードを列挙する - // 2. それぞれのノードにcgNodes[nodeIndex]を追加し、末尾置換が可能であれば実施する - // 3. 可能でない場合、そのまま追加する - // まず、cgNodeをinsertする - let prevNodeIndices: [Int] = self.nextCorrectNodeIndices.lazy.filter { - $0.value.contains(nodeIndex) - }.map { - $0.key - } - let newIndex = self.nodes.endIndex - self.nodes.append(Node(character: cgNode.value, inputElementsRange: cgNode.inputElementsRange, correction: cgNode.correction)) - // 構造の情報を更新 - self.allowedPrevIndex[newIndex] = IndexSet(prevNodeIndices) - for prevNodeIndex in prevNodeIndices { - self.allowedNextIndex[prevNodeIndex, default: IndexSet()].insert(newIndex) - } - // correct graphにおけるnext nodeの情報 - self.nextCorrectNodeIndices[newIndex] = correctGraph.allowedNextIndex[nodeIndex] - - // 次に置換を動かす - let startNode = InputGraphInputStyle.init(from: cgNode.inputStyle).replaceSuffixTree - // nodesをそれぞれ遡っていく必要がある - typealias SearchItem = ( - suffixTreeNode: ReplaceSuffixTree.Node, - // 辿ってきたインデックス - route: [Int], - // 発見された置換 - foundValue: Replacement?, - correction: CorrectGraph.Correction - ) - typealias Match = ( - // 置換 - replacement: Replacement, - // 置換を含むroute - route: [Int] - ) - struct Replacement: Hashable { - var route: [Int] - var value: String - } - var backSearchMatch: [Match] = [] - var stack: [SearchItem] = [(startNode, [newIndex], foundValue: nil, correction: cgNode.correction)] - while let (cSuffixTreeNode, cRoute, cFoundValue, cCorrection) = stack.popLast() { - // must not be empty - let cNodeIndex = cRoute[0] - if let bNode = cSuffixTreeNode.find(key: self.nodes[cNodeIndex].character) { - for prevGraphNodeIndex in self.allowedPrevIndex[cNodeIndex, default: IndexSet()] { - // TODO: InputGraph.NodeにもInputStyle.IDを持たせてここで比較する - stack.append( - ( - bNode, - // FIXME: 配列を生成し直しており、よくない - [prevGraphNodeIndex] + cRoute, - // bNodeがvalueを持っていればそれで置き換え、持っていなければ現在のものを用いる - foundValue: bNode.value.map {Replacement(route: cRoute, value: $0)} ?? cFoundValue, - cCorrection.isTypo ? .typo : self.nodes[prevGraphNodeIndex].correction - ) - ) - } - } else { - // bNodeが見つからない場合、発見された置換をbackSearcMatchに追加する - if let cFoundValue { - backSearchMatch.append((cFoundValue, cRoute)) - } - } - - } - - // backSearchMatchを統合する - let replacementToTarget = Dictionary(grouping: backSearchMatch, by: \.replacement) - for (replacement, matches) in replacementToTarget { - // MARK: replaceを実行する - // 1. valueをnodeとして追加する - // 2. routeに含まれるnodeをinvalidateする - - // MARK: 新規ノードを追加 - let startIndex = self.nodes[replacement.route[0]].inputElementsRange.startIndex - let endIndex = self.nodes[replacement.route[replacement.route.endIndex - 1]].inputElementsRange.endIndex - - let characters = Array(replacement.value) - let correction: CorrectGraph.Correction = if replacement.route.allSatisfy({!self.nodes[$0].correction.isTypo}) { - .none - } else { - .typo - } - let newNodes = characters.indices.map { index in - let range: InputGraphRange = if index == characters.startIndex && index == characters.endIndex - 1 { - .init(startIndex: startIndex, endIndex: endIndex) - } else if index == characters.startIndex { - .init(startIndex: startIndex, endIndex: nil) - } else if index == characters.endIndex - 1 { - .init(startIndex: nil, endIndex: endIndex) - } else { - .unknown - } - return Node(character: characters[index], inputElementsRange: range, correction: correction) - } - let firstIndex = self.nodes.endIndex - let lastIndex = self.nodes.endIndex + newNodes.count - 1 - self.nodes.append(contentsOf: newNodes) - // MARK: next/prevを調整 - // firstIndexの処理: 直前ノードとのつながりをコピーする - // routeからreplaceされる部分を落とし、置換の直前のindexを得る - let prevIndices = matches.compactMap { match in - assert(match.route.hasSuffix(replacement.route)) - return match.route.dropLast(replacement.route.count).last - } - self.allowedPrevIndex[firstIndex] = IndexSet(prevIndices) - for i in prevIndices { - // firstIndexを追加してreplacementの最初を削除する - self.allowedNextIndex[i, default: IndexSet()].insert(firstIndex) - } - // 中央部の処理 - for i in firstIndex ..< lastIndex { - self.allowedNextIndex[i, default: IndexSet()].insert(i + 1) - self.allowedPrevIndex[i + 1, default: IndexSet()].insert(i) - } - // lastIndexの処理: correctGraphの情報を修正する - self.nextCorrectNodeIndices[lastIndex] = correctGraph.allowedNextIndex[nodeIndex] - } - // 上のforループを出てからこの処理を実行する - for replacement in replacementToTarget.keys { - // 置換済みのノードに後ろ向きに迷い込むことを防ぐ - self.nextCorrectNodeIndices[replacement.route.last!] = IndexSet() - self.allowedPrevIndex[replacement.route.last!] = IndexSet() - } - } - - consuming func clean() -> Self { - var newGraph = Self(nodes: []) - var indices: [(nodeIndex: Int, fromIndex: Int?)] = [(0, nil)] - var processedNodeIndices: [Int: Int] = [:] - while let (nodeIndex, fromIndex) = indices.popLast() { - let newIndex = if let newIndex = processedNodeIndices[nodeIndex] { - newIndex - } else { - { - let newIndex = newGraph.nodes.endIndex - newGraph.nodes.append(self.nodes[nodeIndex]) - newGraph.nextCorrectNodeIndices[newIndex] = self.nextCorrectNodeIndices[nodeIndex] - return newIndex - }() - } - if let fromIndex { - newGraph.allowedNextIndex[fromIndex, default: IndexSet()].insert(newIndex) - newGraph.allowedPrevIndex[newIndex, default: IndexSet()].insert(fromIndex) - } - for nextNodeIndex in self.allowedNextIndex[nodeIndex, default: IndexSet()] { - indices.append((nextNodeIndex, newIndex)) - } - processedNodeIndices[nodeIndex] = newIndex - } - return newGraph - } - - /// インクリメンタルな構築のための関数。 - /// - warning: 実装上の問題から使っていない - mutating func _applyAdditionalCorrectGraph(_ newCorrectGraph: CorrectGraph, addedNodeIndices: IndexSet) { - // ノードが末尾に追加されたケースでInputGraphを更新する - // ex. 「t」に対して「s」が追加された場合、correctGraphでは「t」「a」も追加されている - var processedIndices = IndexSet() - var nodeIndices = Array(addedNodeIndices.reversed()) - while let nodeIndex = nodeIndices.popLast() { - if processedIndices.contains(nodeIndex) { - continue - } - // addedNodeIndicesの中で未処理のものがprevに入っているケース - let prevIndices = newCorrectGraph.allowedPrevIndex[nodeIndex, default: IndexSet()].intersection(addedNodeIndices) - // 差がある場合 - let diff = prevIndices.subtracting(processedIndices) - guard diff.isEmpty else { - nodeIndices.append(nodeIndex) - nodeIndices.append(contentsOf: diff) - continue - } - processedIndices.insert(nodeIndex) - // root以外 - assert(nodeIndex != 0) - self.update(newCorrectGraph, nodeIndex: nodeIndex) - nodeIndices.append(contentsOf: newCorrectGraph.allowedNextIndex[nodeIndex, default: IndexSet()]) - } - } - - static func build(input: CorrectGraph) -> Self { - var inputGraph = Self() - // 必ず、ノードより前のすべてのノードが処理済みであることを保証しながら、updateを実行する - var nodeIndices = Array([0]) - var processedIndices = IndexSet() - while let nodeIndex = nodeIndices.popLast() { - if processedIndices.contains(nodeIndex) { - continue - } - let prevIndices = input.allowedPrevIndex[nodeIndex, default: IndexSet()] - // 差がある場合 - let diff = prevIndices.subtracting(processedIndices) - guard diff.isEmpty else { - nodeIndices.append(nodeIndex) - nodeIndices.append(contentsOf: diff) - continue - } - processedIndices.insert(nodeIndex) - // root以外 - if nodeIndex != 0 { - inputGraph.update(input, nodeIndex: nodeIndex) - } else { - // nextCorrectNodeIndicesを更新しておく - inputGraph.nextCorrectNodeIndices[0] = input.allowedNextIndex[0] - } - nodeIndices.append(contentsOf: input.allowedNextIndex[nodeIndex, default: IndexSet()]) - } - return inputGraph - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphComponents.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphComponents.swift deleted file mode 100644 index 10a7a5d..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphComponents.swift +++ /dev/null @@ -1,125 +0,0 @@ -// -// InputGraph.swift -// -// -// Created by miwa on 2024/02/21. -// - -import Foundation -import DequeModule - -@testable import KanaKanjiConverterModule -import XCTest - -enum InputGraphRange: Equatable, Sendable { - case unknown - case startIndex(Int) - case endIndex(Int) - case range(Int, Int) - - init(startIndex: Int?, endIndex: Int?) { - self = switch (startIndex, endIndex) { - case let (s?, e?): .range(s, e) - case (let s?, nil): .startIndex(s) - case (nil, let e?): .endIndex(e) - case (nil, nil): .unknown - } - } - - var startIndex: Int? { - switch self { - case .unknown, .endIndex: nil - case .startIndex(let index), .range(let index, _): index - } - } - - var endIndex: Int? { - switch self { - case .unknown, .startIndex: nil - case .endIndex(let index), .range(_, let index): index - } - } -} - -struct InputGraphInputStyle: Identifiable { - init(from deprecatedInputStyle: KanaKanjiConverterModule.InputStyle) { - switch deprecatedInputStyle { - case .direct: - self = .systemFlickDirect - case .roman2kana: - self = .systemRomanKana - } - } - - init(from id: InputGraphInputStyle.ID) { - self = switch id { - case .all: .all - case .systemFlickDirect: .systemFlickDirect - case .systemRomanKana: .systemRomanKana - case .none: .none - default: fatalError("Unimplemented") - } - } - - private init(id: InputGraphInputStyle.ID, replaceSuffixTree: ReplaceSuffixTree.Node, correctSuffixTree: CorrectSuffixTree.Node) { - self.id = id - self.replaceSuffixTree = replaceSuffixTree - self.correctSuffixTree = correctSuffixTree - } - - struct ID: Equatable, Hashable, Sendable, CustomStringConvertible { - init(id: UInt8) { - self.id = id - } - init(from deprecatedInputStyle: KanaKanjiConverterModule.InputStyle) { - switch deprecatedInputStyle { - case .direct: - self = .systemFlickDirect - case .roman2kana: - self = .systemRomanKana - } - } - static let none = Self(id: 0x00) - static let all = Self(id: 0xFF) - static let systemFlickDirect = Self(id: 0x01) - static let systemRomanKana = Self(id: 0x02) - var id: UInt8 - - func isCompatible(with id: ID) -> Bool { - if self == .all { - true - } else { - self == id - } - } - var description: String { - "ID(\(id))" - } - } - static let none: Self = Self( - id: .none, - replaceSuffixTree: ReplaceSuffixTree.Node(), - correctSuffixTree: CorrectSuffixTree.Node() - ) - static let all: Self = Self( - id: .all, - replaceSuffixTree: ReplaceSuffixTree.Node(), - correctSuffixTree: CorrectSuffixTree.Node() - ) - static let systemFlickDirect: Self = Self( - id: .systemFlickDirect, - replaceSuffixTree: ReplaceSuffixTree.direct, - correctSuffixTree: CorrectSuffixTree.direct - ) - static let systemRomanKana: Self = Self( - id: .systemRomanKana, - replaceSuffixTree: ReplaceSuffixTree.roman2kana, - correctSuffixTree: CorrectSuffixTree.roman2kana - ) - - /// `id` for the input style. - /// - warning: value `0x00-0x7F` is reserved for system space. - var id: ID - var replaceSuffixTree: ReplaceSuffixTree.Node - var correctSuffixTree: CorrectSuffixTree.Node -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphTests.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphTests.swift deleted file mode 100644 index 81dd579..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/InputGraphTests.swift +++ /dev/null @@ -1,394 +0,0 @@ -// -// InputGraphTests.swift -// -// -// Created by miwa on 2024/02/21. -// - -import Foundation - -@testable import KanaKanjiConverterModule -import XCTest - -final class InputGraphTests: XCTestCase { - func testBuildSimpleDirectInput() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "あ", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect), - .init(value: "う", inputStyle: .systemFlickDirect) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual(inputGraph.nodes.count, 4) // Root nodes - } - func testBuildSimpleDirectInput_あかう() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "あ", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect), - .init(value: "う", inputStyle: .systemFlickDirect) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual(inputGraph.nodes.count, 5) // Root nodes - } - - func testBuildSimpleDirectInput_たいか() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "た", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "だ"}), - .init(character: "だ", inputElementsRange: .range(0, 1), correction: .typo) - ) - } - - func testBuildSimpleRoman2KanaInput_1文字だけ() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "i", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "い"}), - .init(character: "い", inputElementsRange: .range(0, 1), correction: .none) - ) - } - func testBuildSimpleRoman2KanaInput_2文字_it() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "い"}), - .init(character: "い", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t"}), - .init(character: "t", inputElementsRange: .range(1, 2), correction: .none) - ) - } - func testBuildSimpleRoman2KanaInput_3文字_ita() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "い"}), - .init(character: "い", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .range(1, 3), correction: .none) - ) - } - func testBuildSimpleRoman2KanaInput_4文字_sits() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "し"}), - .init(character: "し", inputElementsRange: .range(0, 2), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && !$0.correction.isTypo}), - .init(character: "t", inputElementsRange: .range(2, 3), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .range(2, 4), correction: .typo) - ) - } - func testBuildSimpleRoman2KanaInput_3文字_its() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "い"}), - .init(character: "い", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(1, 2)}), - .init(character: "t", inputElementsRange: .range(1, 2), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "s"}), - .init(character: "s", inputElementsRange: .range(2, 3), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .startIndex(1)}), - .init(character: "t", inputElementsRange: .startIndex(1), correction: .typo) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .range(1, 3), correction: .typo) - ) - } - func testBuildSimpleRoman2KanaInput_4文字_itsa() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "い"}), - .init(character: "い", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(1, 2)}), - .init(character: "t", inputElementsRange: .range(1, 2), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "s"}), - .init(character: "s", inputElementsRange: .range(2, 3), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .startIndex(1)}), - .init(character: "t", inputElementsRange: .startIndex(1), correction: .typo) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .range(1, 3), correction: .typo) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "あ"}), - .init(character: "あ", inputElementsRange: .range(3, 4), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "つ"}), - .init(character: "つ", inputElementsRange: .startIndex(1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "ぁ"}), - .init(character: "ぁ", inputElementsRange: .endIndex(4), correction: .none) - ) - // 「さ」の生成は許されない - XCTAssertNil(inputGraph.nodes.first(where: {$0.character == "さ"})) - } - - func testBuildSimpleRoman2KanaInput_7文字_youshou() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "y", inputStyle: .systemRomanKana), - .init(value: "o", inputStyle: .systemRomanKana), - .init(value: "u", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "h", inputStyle: .systemRomanKana), - .init(value: "o", inputStyle: .systemRomanKana), - .init(value: "u", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "よ"}), - .init(character: "よ", inputElementsRange: .range(0, 2), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "う" && $0.inputElementsRange == .range(2, 3)}), - .init(character: "う", inputElementsRange: .range(2, 3), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "し"}), - .init(character: "し", inputElementsRange: .startIndex(3), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "ょ"}), - .init(character: "ょ", inputElementsRange: .endIndex(6), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "う" && $0.inputElementsRange == .range(6, 7)}), - .init(character: "う", inputElementsRange: .range(6, 7), correction: .none) - ) - - } - - func testBuildSimpleRoman2KanaInput_2文字_tt() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "っ"}), - .init(character: "っ", inputElementsRange: .startIndex(0), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(0, 1)}), - .init(character: "t", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(1, 2)}), - .init(character: "t", inputElementsRange: .range(1, 2), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .endIndex(2)}), - .init(character: "t", inputElementsRange: .endIndex(2), correction: .none) - ) - } - func testBuildSimpleRoman2KanaInput_3文字_tta() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "っ"}), - .init(character: "っ", inputElementsRange: .startIndex(0), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .endIndex(3), correction: .none) - ) - // [t(1)t(2) → っt(3)]なので、t(2)に対してaがついて「た」が生じてはならない。 - XCTAssertEqual(inputGraph.nodes.filter({$0.character == "た"}).count, 1) - } - func testBuildSimpleRoman2KanaInput_3文字_nta() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "n", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "ん"}), - .init(character: "ん", inputElementsRange: .startIndex(0), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .endIndex(3), correction: .none) - ) - } - func testBuildSimpleRoman2KanaInput_4文字_itta() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "い"}), - .init(character: "い", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "っ"}), - .init(character: "っ", inputElementsRange: .startIndex(1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .endIndex(4), correction: .none) - ) - } - - func testBuildSimpleRoman2KanaInput_5文字_sitsi() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "i", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "し"}), - .init(character: "し", inputElementsRange: .range(0, 2), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .range(2, 4), correction: .typo) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "い"}), - .init(character: "い", inputElementsRange: .range(4, 5), correction: .none) - ) - - } - - func testBuildSimpleRoman2KanaInput_3文字_tts() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "っ" && $0.correction == .none}), - .init(character: "っ", inputElementsRange: .startIndex(0), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "っ" && $0.correction == .typo}), - .init(character: "っ", inputElementsRange: .startIndex(0), correction: .typo) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "s"}), - .init(character: "s", inputElementsRange: .range(2, 3), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "た"}), - .init(character: "た", inputElementsRange: .endIndex(3), correction: .typo) - ) - } - - func testBuildSimpleRoman2KanaInput_4文字_tysa() throws { - // ちゃあ/tyさ - let correctGraph = CorrectGraph.build(input: [ - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "y", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana) - ]) - // cleanで壊れる - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t"}), - .init(character: "t", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "y" && !$0.correction.isTypo}), - .init(character: "y", inputElementsRange: .range(1, 2), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "さ"}), - .init(character: "さ", inputElementsRange: .range(2, 4), correction: .none) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "ち"}), - .init(character: "ち", inputElementsRange: .startIndex(0), correction: .typo) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "ゃ" && $0.correction == .typo}), - .init(character: "ゃ", inputElementsRange: .endIndex(3), correction: .typo) - ) - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "あ"}), - .init(character: "あ", inputElementsRange: .range(3, 4), correction: .none) - ) - } - - func testBuildMixedInput_2文字_ts() throws { - let correctGraph = CorrectGraph.build(input: [ - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemFlickDirect) - ]) - let inputGraph = InputGraph.build(input: correctGraph).clean() - XCTAssertEqual( - inputGraph.nodes.first(where: {$0.character == "t"}), - .init(character: "t", inputElementsRange: .range(0, 1), correction: .none) - ) - XCTAssertFalse(inputGraph.nodes.contains(.init(character: "た", inputElementsRange: .range(0, 2), correction: .typo))) - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraph.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraph.swift deleted file mode 100644 index 171acec..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraph.swift +++ /dev/null @@ -1,279 +0,0 @@ -// -// LookupGraph.swift -// -// -// Created by miwa on 2024/03/31. -// - -import Foundation -@testable import KanaKanjiConverterModule - -struct LookupGraph { - struct Node: Equatable { - var character: Character - var charId: UInt8 - var inputElementsRange: InputGraphRange - var correction: CorrectGraph.Correction = .none - } - - var nodes: [Node] = [ - // root node - Node(character: "\0", charId: 0x00, inputElementsRange: .endIndex(0)) - ] - /// 許可されたNextIndex - var allowedNextIndex: [Int: IndexSet] = [:] - /// 許可されたprevIndex - var allowedPrevIndex: [Int: IndexSet] = [:] - /// node indexから始まるloudsノードのindex - var loudsNodeIndex: [Int: [Int: Int]] = [:] - - static func build(input: consuming InputGraph, character2CharId: (Character) -> UInt8) -> Self { - let nodes = input.nodes.map { - Node(character: $0.character, charId: character2CharId($0.character), inputElementsRange: $0.inputElementsRange, correction: $0.correction) - } - return Self(nodes: nodes, allowedNextIndex: input.allowedNextIndex, allowedPrevIndex: input.allowedPrevIndex) - } - - func nextIndexWithMatch(_ nodeIndex: Int, cacheNodeIndex: Int, cacheGraph: borrowing LookupGraph) -> [(Int, Int?)] { - let seeds: [Int] = Array(self.allowedNextIndex[nodeIndex, default: []]) - let cached = cacheGraph.allowedNextIndex[cacheNodeIndex, default: []].map {($0, cacheGraph.nodes[$0])} - return seeds.map { seed in - if let first = cached.first(where: {$0.1.charId == self.nodes[seed].charId}) { - (seed, first.0) - } else { - (seed, nil) - } - } - } - - mutating func byfixNodeIndices(in louds: LOUDS, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [Int]]) { - var indexSet = IndexSet(integer: 1) - // loudsのノードとLookupGraphのノードの対応を取るための辞書 - var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:] - // loudsのノードとLookupGraphのノードの対応を取るための辞書 - var graphNodeEndIndexToLoudsNodeIndex: [Int: Int] = [:] - typealias SearchItem = ( - nodeIndex: Int, - lastLoudsNodeIndex: Int - ) - var stack: [SearchItem] = [(startGraphNodeIndex, 1)] - while let (cNodeIndex, cLastLoudsNodeIndex) = stack.popLast() { - let cNode = self.nodes[cNodeIndex] - // nextNodesを探索 - if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) { - graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex - loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex) - indexSet.insert(loudsNodeIndex) - let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()] - stack.append(contentsOf: nextIndices.compactMap { index in - let node = self.nodes[index] - // endIndexをチェックする - // endIndexは単調増加である必要がある - if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex, - let nInputElementsEndIndex = node.inputElementsRange.endIndex { - guard cInputElementsEndIndex < nInputElementsEndIndex else { - return nil - } - } - return (index, loudsNodeIndex) - }) - } else { - continue - } - } - self.loudsNodeIndex[startGraphNodeIndex] = graphNodeEndIndexToLoudsNodeIndex - return (indexSet, loudsNodeIndex2GraphNodeEndIndices) - } - - mutating func differentialByfixSearch( - in louds: LOUDS, - cacheLookupGraph: LookupGraph, - graphNodeIndex: (start: Int, cache: Int), - lookupGraphMatch: inout [Int: Int] - ) -> (IndexSet, [Int: [Int]]) { - guard var graphNodeEndIndexToLoudsNodeIndex = cacheLookupGraph.loudsNodeIndex[graphNodeIndex.cache] else { - return self.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex.start) - } - // lookupGraph.current.nodes[graphNodeIndex.start]とlookupGraph.cache.nodes[graphNodeIndex.cache]はマッチする - - var indexSet = IndexSet(integer: 1) - // loudsのノードとLookupGraphのノードの対応を取るための辞書 - var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:] - typealias SearchItem = ( - nodeIndex: Int, - /// cache側のnodeIndex。ノードがマッチしていればnilではない、マッチしていなければnil - cacheNodeIndex: Int?, - lastLoudsNodeIndex: Int - ) - var stack: [SearchItem] = [(graphNodeIndex.start, graphNodeIndex.cache, 1)] - while let (cNodeIndex, cCacheNodeIndex, cLastLoudsNodeIndex) = stack.popLast() { - let cNode = self.nodes[cNodeIndex] - if let cCacheNodeIndex, let loudsNodeIndex = graphNodeEndIndexToLoudsNodeIndex[cCacheNodeIndex] { - loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex) - indexSet.insert(loudsNodeIndex) - // next nodesを確認する - stack.append(contentsOf: self.nextIndexWithMatch(cNodeIndex, cacheNodeIndex: cCacheNodeIndex, cacheGraph: cacheLookupGraph).map { - ($0.0, $0.1, loudsNodeIndex) - }) - // マッチ情報を更新する - lookupGraphMatch[cNodeIndex] = cCacheNodeIndex - } - // キャッシュが効かないケース - else if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) { - graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex - loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex) - indexSet.insert(loudsNodeIndex) - let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()] - stack.append(contentsOf: nextIndices.compactMap { index in - let node = self.nodes[index] - // endIndexをチェックする - // endIndexは単調増加である必要がある - if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex, - let nInputElementsEndIndex = node.inputElementsRange.endIndex { - guard cInputElementsEndIndex < nInputElementsEndIndex else { - return nil - } - } - return (index, nil, loudsNodeIndex) - }) - } - } - self.loudsNodeIndex[graphNodeIndex.start] = graphNodeEndIndexToLoudsNodeIndex - return (indexSet, loudsNodeIndex2GraphNodeEndIndices) - } - -} - -extension DicdataStore { - func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> (LookupGraph, ConvertGraph) { - var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) }) - var stack = Array(lookupGraph.allowedNextIndex[0, default: []]) - var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:] - var processedIndexSet = IndexSet() - while let graphNodeIndex = stack.popLast() { - // 処理済みのノードは無視 - guard !processedIndexSet.contains(graphNodeIndex) else { - continue - } - let graphNode = lookupGraph.nodes[graphNodeIndex] - guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else { - continue - } - /// graphNodeIndexから始まる辞書エントリを列挙 - /// * loudsNodeIndices: loudsから得たloudstxt内のデータの位置 - /// * loudsNodeIndex2GraphNodeEndIndices: それぞれのloudsNodeIndexがどのgraphNodeIndexを終端とするか - let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex) - let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option) - - // latticeNodesを構築する - var latticeNodes: [ConvertGraph.LatticeNode] = [] - for (loudsNodeIndex, dicdata) in dicdataWithIndex { - for endNodeIndex in loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []] { - let inputElementsRange = InputGraphRange( - startIndex: graphNode.inputElementsRange.startIndex, - endIndex: lookupGraph.nodes[endNodeIndex].inputElementsRange.endIndex - ) - if graphNode.inputElementsRange.startIndex == 0 { - latticeNodes.append(contentsOf: dicdata.map { - .init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange, prevs: [.BOSNode()]) - }) - } else { - latticeNodes.append(contentsOf: dicdata.map { - .init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange) - }) - } - } - } - graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes - - // 続くノードのindexを追加する - processedIndexSet.insert(graphNodeIndex) - stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []]) - } - return (lookupGraph, ConvertGraph(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)) - } - - func buildConvertGraphDifferential( - inputGraph: consuming InputGraph, - cacheLookupGraph: LookupGraph, - option: ConvertRequestOptions - ) -> ( - lookupGraph: LookupGraph, - convertGraph: ConvertGraph, - lookupGraphMatch: [Int: Int] - ) { - var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) }) - typealias StackItem = ( - currentLookupGraphNodeIndex: Int, - cacheLookupGraphNodeIndex: Int? - ) - // BOS同士はマッチする - // BOSの次のノードのうちマッチするもの、しないものを確認 - var stack: [StackItem] = lookupGraph.nextIndexWithMatch(0, cacheNodeIndex: 0, cacheGraph: cacheLookupGraph) - var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:] - var processedIndexSet = IndexSet() - var lookupGraphMatch: [Int: Int] = [:] - - while let (graphNodeIndex, cacheGraphNodeIndex) = stack.popLast() { - // 処理済みのノードは無視 - guard !processedIndexSet.contains(graphNodeIndex) else { - continue - } - let graphNode = lookupGraph.nodes[graphNodeIndex] - guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else { - continue - } - /// graphNodeIndexから始まる辞書エントリを列挙 - /// * loudsNodeIndices: loudsから得たloudstxt内のデータの位置 - /// * loudsNodeIndex2GraphNodeEndIndices: それぞれのloudsNodeIndexがどのgraphNodeIndexを終端とするか - let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = if let cacheGraphNodeIndex { - lookupGraph.differentialByfixSearch(in: louds, cacheLookupGraph: cacheLookupGraph, graphNodeIndex: (graphNodeIndex, cacheGraphNodeIndex), lookupGraphMatch: &lookupGraphMatch) - } else { - lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex) - } - let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option) - - // latticeNodesを構築する - var latticeNodes: [ConvertGraph.LatticeNode] = [] - for (loudsNodeIndex, dicdata) in dicdataWithIndex { - for endNodeIndex in loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []] { - let inputElementsRange = InputGraphRange( - startIndex: graphNode.inputElementsRange.startIndex, - endIndex: lookupGraph.nodes[endNodeIndex].inputElementsRange.endIndex - ) - if graphNode.inputElementsRange.startIndex == 0 { - latticeNodes.append(contentsOf: dicdata.map { - .init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange, prevs: [.BOSNode()]) - }) - } else { - latticeNodes.append(contentsOf: dicdata.map { - .init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange) - }) - } - } - } - graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes - - // 続くノードのindexを追加する - processedIndexSet.insert(graphNodeIndex) - if let cacheGraphNodeIndex { - stack.append(contentsOf: lookupGraph.nextIndexWithMatch(graphNodeIndex, cacheNodeIndex: cacheGraphNodeIndex, cacheGraph: cacheLookupGraph)) - } else { - stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []].map {($0, nil)}) - } - } - return (lookupGraph, ConvertGraph(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes), lookupGraphMatch) - } - - func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] { - // split = 2048 - let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11}) - var data: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = [] - for (key, value) in dict { - // FIXME: use local option - // trueIndexはそのまま、keyIndexはsplit-1=2047で&したものを用いる - data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {(trueIndex: $0, keyIndex: $0 & 2047)}, option: option)) - } - return data - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraphTests.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraphTests.swift deleted file mode 100644 index a4aedff..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/LookupGraphTests.swift +++ /dev/null @@ -1,307 +0,0 @@ -// -// LookupGraphTests.swift -// -// -// Created by miwa on 2024/02/23. -// - -import XCTest -import Foundation -@testable import KanaKanjiConverterModule - -final class LookupGraphTests: XCTestCase { - func requestOptions() -> ConvertRequestOptions { - .withDefaultDictionary(requireJapanesePrediction: false, requireEnglishPrediction: false, keyboardLanguage: .ja_JP, learningType: .nothing, memoryDirectoryURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""), metadata: nil) - } - - func setup() -> (dicdataStore: DicdataStore, character2CharId: (Character) -> UInt8) { - let dicdataStore = DicdataStore(convertRequestOptions: requestOptions()) - let character2CharId: (Character) -> UInt8 = { dicdataStore.character2charId($0.toKatakana()) } - return (dicdataStore, character2CharId) - } - - func testByfixNodeIndices_しかい() throws { - let values = setup() - guard let louds = LOUDS.load("シ", option: requestOptions()) else { - XCTFail() - return - } - let correctGraph = CorrectGraph.build(input: [ - .init(value: "し", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect) - ]) - let inputGraph = InputGraph.build(input: correctGraph) - var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId) - let startNodeIndex = inputGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "し" }) - XCTAssertNotNil(startNodeIndex) - let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0) - let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: loudsNodeIndices, option: requestOptions()) - let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata } - // シ - XCTAssertTrue(dicdata.contains {$0.word == "死"}) - // シカ - XCTAssertTrue(dicdata.contains {$0.word == "鹿"}) - XCTAssertTrue(dicdata.contains {$0.word == "歯科"}) - // シガ - XCTAssertTrue(dicdata.contains {$0.word == "滋賀"}) - // シカイ - XCTAssertTrue(dicdata.contains {$0.word == "司会"}) - XCTAssertTrue(dicdata.contains {$0.word == "視界"}) - XCTAssertTrue(dicdata.contains {$0.word == "死界"}) - // シガイ - XCTAssertTrue(dicdata.contains {$0.word == "市外"}) - XCTAssertTrue(dicdata.contains {$0.word == "市街"}) - XCTAssertTrue(dicdata.contains {$0.word == "死骸"}) - - // all keys - XCTAssertEqual( - dicdata.mapSet {$0.ruby}.symmetricDifference(["シ", "シカ", "シカイ", "シガ", "シガイ"]), - [] - ) - } - - func testByfixNodeIndices_みらい() throws { - let values = setup() - guard let louds = LOUDS.load("ミ", option: requestOptions()) else { - XCTFail() - return - } - let correctGraph = CorrectGraph.build(input: [ - .init(value: "み", inputStyle: .systemFlickDirect), - .init(value: "ら", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect) - ]) - let inputGraph = InputGraph.build(input: correctGraph) - var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId) - let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "み" }) - XCTAssertNotNil(startNodeIndex) - let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0) - let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "ミ", indices: loudsNodeIndices, option: requestOptions()) - let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata } - // ミ - XCTAssertTrue(dicdata.contains {$0.word == "見"}) - // ミラ - XCTAssertTrue(dicdata.contains {$0.word == "ミラ"}) - // ミライ - XCTAssertTrue(dicdata.contains {$0.word == "未来"}) - - // all keys - XCTAssertEqual( - dicdata.mapSet {$0.ruby}.symmetricDifference(["ミ", "ミラ", "ミライ"]), - [] - ) - } - - func testByfixNodeIndices_たいかく() throws { - let values = setup() - guard let louds = LOUDS.load("タ", option: requestOptions()) else { - XCTFail() - return - } - let correctGraph = CorrectGraph.build(input: [ - .init(value: "た", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect), - .init(value: "く", inputStyle: .systemFlickDirect) - ]) - let inputGraph = InputGraph.build(input: correctGraph) - var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId) - let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "た" }) - XCTAssertNotNil(startNodeIndex) - let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0) - let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "タ", indices: loudsNodeIndices, option: requestOptions()) - let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata } - // タ - XCTAssertTrue(dicdata.contains {$0.word == "他"}) - // タイ - XCTAssertTrue(dicdata.contains {$0.word == "タイ"}) - XCTAssertTrue(dicdata.contains {$0.word == "他意"}) - // タイカ - XCTAssertTrue(dicdata.contains {$0.word == "対価"}) - // タイガ - XCTAssertTrue(dicdata.contains {$0.word == "大河"}) - // タイカク - XCTAssertTrue(dicdata.contains {$0.word == "体格"}) - // タイガク - XCTAssertTrue(dicdata.contains {$0.word == "退学"}) - // all keys - XCTAssertEqual( - dicdata.mapSet {$0.ruby}.symmetricDifference(["タ", "タイ", "タイカ", "タイガ", "タイカク", "タイガク"]), - [] - ) - } - - func testByfixNodeIndices_sittai() throws { - let values = setup() - guard let louds = LOUDS.load("シ", option: requestOptions()) else { - XCTFail() - return - } - // 「しっ」の候補が存在するかどうかを確認 - let correctGraph = CorrectGraph.build(input: [ - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "a", inputStyle: .systemRomanKana), - .init(value: "i", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph) - var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId) - let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "し" }) - XCTAssertNotNil(startNodeIndex) - let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0) - let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: loudsNodeIndices, option: requestOptions()) - let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata } - // シ - XCTAssertTrue(dicdata.contains {$0.word == "死"}) - // シッ - XCTAssertTrue(dicdata.contains {$0.word == "知っ"}) - XCTAssertTrue(dicdata.contains {$0.word == "しっ"}) - // シッタ - XCTAssertTrue(dicdata.contains {$0.word == "叱咤"}) - // シッタイ - XCTAssertTrue(dicdata.contains {$0.word == "失態"}) - // all keys - XCTAssertEqual( - dicdata.mapSet {$0.ruby}.symmetricDifference(["シ", "シッ", "シッタ", "シッタイ"]), - [] - ) - } - - func testByfixNodeIndices_sitsi() throws { - let values = setup() - guard let louds = LOUDS.load("シ", option: requestOptions()) else { - XCTFail() - return - } - // ts -> ta - let correctGraph = CorrectGraph.build(input: [ - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "i", inputStyle: .systemRomanKana), - .init(value: "t", inputStyle: .systemRomanKana), - .init(value: "s", inputStyle: .systemRomanKana), - .init(value: "i", inputStyle: .systemRomanKana) - ]) - let inputGraph = InputGraph.build(input: correctGraph) - var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId) - let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "し" }) - XCTAssertNotNil(startNodeIndex) - let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0) - let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "シ", indices: loudsNodeIndices, option: requestOptions()) - let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata } - // シ - XCTAssertTrue(dicdata.contains {$0.word == "死"}) - // [シツ]ィ - XCTAssertTrue(dicdata.contains {$0.word == "質"}) - XCTAssertTrue(dicdata.contains {$0.word == "室"}) - // シタ - XCTAssertTrue(dicdata.contains {$0.word == "下"}) - XCTAssertTrue(dicdata.contains {$0.word == "舌"}) - // シタイ - XCTAssertTrue(dicdata.contains {$0.word == "死体"}) - XCTAssertTrue(dicdata.contains {$0.word == "肢体"}) - // all keys - XCTAssertEqual( - dicdata.mapSet {$0.ruby}.symmetricDifference(["シ", "シツ", "シタ", "シタイ"]), - [] - ) - } - - func testByfixNodeIndices_たいか_add_く() throws { - let values = setup() - guard let louds = LOUDS.load("タ", option: requestOptions()) else { - XCTFail() - return - } - let correctGraph1 = CorrectGraph.build(input: [ - .init(value: "た", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect) - ]) - let inputGraph1 = InputGraph.build(input: correctGraph1) - var lookupGraph1 = LookupGraph.build(input: inputGraph1, character2CharId: values.character2CharId) - let startNodeIndex1 = lookupGraph1.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph1.nodes[$0].character == "た" }) - XCTAssertNotNil(startNodeIndex1) - _ = lookupGraph1.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex1 ?? 0) - - let correctGraph2 = CorrectGraph.build(input: [ - .init(value: "た", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect), - .init(value: "く", inputStyle: .systemFlickDirect) // added - ]) - let inputGraph2 = InputGraph.build(input: correctGraph2) - var lookupGraph2 = LookupGraph.build(input: inputGraph2, character2CharId: values.character2CharId) - let startNodeIndex2 = lookupGraph2.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph2.nodes[$0].character == "た" }) - XCTAssertNotNil(startNodeIndex2) - var matchInfo: [Int: Int] = [:] - let (loudsNodeIndices2, _) = lookupGraph2.differentialByfixSearch(in: louds, cacheLookupGraph: lookupGraph1, graphNodeIndex: (startNodeIndex2 ?? 0, startNodeIndex1 ?? 0), lookupGraphMatch: &matchInfo) - let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "タ", indices: loudsNodeIndices2, option: requestOptions()) - let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata } - // タ - XCTAssertTrue(dicdata.contains {$0.word == "他"}) - // タイ - XCTAssertTrue(dicdata.contains {$0.word == "タイ"}) - XCTAssertTrue(dicdata.contains {$0.word == "他意"}) - // タイカ - XCTAssertTrue(dicdata.contains {$0.word == "対価"}) - // タイガ - XCTAssertTrue(dicdata.contains {$0.word == "大河"}) - // タイカク - XCTAssertTrue(dicdata.contains {$0.word == "体格"}) - // タイガク - XCTAssertTrue(dicdata.contains {$0.word == "退学"}) - // all keys - XCTAssertEqual( - dicdata.mapSet {$0.ruby}.symmetricDifference(["タ", "タイ", "タイカ", "タイガ", "タイカク", "タイガク"]), - [] - ) - } - - func testByfixNodeIndices_たいか_remove_く() throws { - let values = setup() - guard let louds = LOUDS.load("タ", option: requestOptions()) else { - XCTFail() - return - } - let correctGraph1 = CorrectGraph.build(input: [ - .init(value: "た", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect), - .init(value: "か", inputStyle: .systemFlickDirect) - ]) - let inputGraph1 = InputGraph.build(input: correctGraph1) - var lookupGraph1 = LookupGraph.build(input: inputGraph1, character2CharId: values.character2CharId) - let startNodeIndex1 = lookupGraph1.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph1.nodes[$0].character == "た" }) - XCTAssertNotNil(startNodeIndex1) - _ = lookupGraph1.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex1 ?? 0) - - let correctGraph2 = CorrectGraph.build(input: [ - .init(value: "た", inputStyle: .systemFlickDirect), - .init(value: "い", inputStyle: .systemFlickDirect) - ]) - let inputGraph2 = InputGraph.build(input: correctGraph2) - var lookupGraph2 = LookupGraph.build(input: inputGraph2, character2CharId: values.character2CharId) - let startNodeIndex2 = lookupGraph2.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph2.nodes[$0].character == "た" }) - XCTAssertNotNil(startNodeIndex2) - var matchInfo: [Int: Int] = [:] - let (loudsNodeIndices2, _) = lookupGraph2.differentialByfixSearch(in: louds, cacheLookupGraph: lookupGraph1, graphNodeIndex: (startNodeIndex2 ?? 0, startNodeIndex1 ?? 0), lookupGraphMatch: &matchInfo) - let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "タ", indices: loudsNodeIndices2, option: requestOptions()) - let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata } - // タ - XCTAssertTrue(dicdata.contains {$0.word == "他"}) - // タイ - XCTAssertTrue(dicdata.contains {$0.word == "タイ"}) - XCTAssertTrue(dicdata.contains {$0.word == "他意"}) - // タイカ - XCTAssertFalse(dicdata.contains {$0.ruby == "タイカ"}) - // タイガ - XCTAssertFalse(dicdata.contains {$0.ruby == "タイガ"}) - // all keys - XCTAssertEqual( - dicdata.mapSet {$0.ruby}.symmetricDifference(["タ", "タイ"]), - [] - ) - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ReplaceSuffixTree.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ReplaceSuffixTree.swift deleted file mode 100644 index a54b0df..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/InputGraph/ReplaceSuffixTree.swift +++ /dev/null @@ -1,73 +0,0 @@ -// -// ReplaceSuffixTree.swift -// -// -// Created by miwa on 2024/02/23. -// - -import Foundation - -@testable import KanaKanjiConverterModule -import XCTest - -// 置換のためのsuffix tree -enum ReplaceSuffixTree { - - final class Node { - init(_ children: [Character: Node] = [:], character: Character = "\0", value: String? = nil) { - self.children = children - self.value = value - self.character = character - } - var children: [Character: Node] = [:] - var character: Character - var value: String? - func find(key: Character) -> Node? { - return children[key] - } - func insert(route: some Collection, value: consuming String, inputStyle: InputGraphInputStyle.ID) { - if let first = route.first { - if let tree = self.children[first] { - tree.insert(route: route.dropFirst(), value: consume value, inputStyle: inputStyle) - } else { - let tree = Node(character: first) - tree.insert(route: route.dropFirst(), value: consume value, inputStyle: inputStyle) - self.children[first] = tree - } - } else { - self.value = consume value - } - } - } - - static let roman2kana: Node = { - var tree = Node() - for item in KanaKanjiConverterModule.Roman2Kana.hiraganaChanges { - tree.insert(route: item.key.reversed(), value: String(item.value), inputStyle: .systemRomanKana) - } - // additionals - for item in ["bb", "cc", "dd", "ff", "gg", "hh", "jj", "kk", "ll", "mm", "pp", "qq", "rr", "ss", "tt", "vv", "ww", "xx", "yy", "zz"] { - tree.insert(route: Array(item.reversed()), value: "っ" + String(item.last!), inputStyle: .systemRomanKana) - } - // additionals - for item in ["nb", "nc", "nd", "nf", "ng", "nh", "nj", "nk", "nl", "nm", "np", "nq", "nr", "ns", "nt", "nv", "nw", "nx", "nz"] { - tree.insert(route: Array(item.reversed()), value: "ん" + String(item.last!), inputStyle: .systemRomanKana) - } - return tree - }() - static let direct: Node = Node() -} - -final class ReplaceTreeTests: XCTestCase { - func testRoman2Kana() throws { - let t = ReplaceSuffixTree.roman2kana.find(key: "t") - let tt = t?.find(key: "t") - XCTAssertEqual(tt?.value, "っt") - let t2 = ReplaceSuffixTree.roman2kana.find(key: "t") - let tt2 = t2?.find(key: "t") - XCTAssertEqual(tt2?.value, "っt") - let a = ReplaceSuffixTree.roman2kana.find(key: "a") - let ta = a?.find(key: "t") - XCTAssertEqual(ta?.value, "た") - } -} diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/README.md b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/README.md deleted file mode 100644 index 560a0db..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Experimental Tests - -実験的な実装をテスト駆動で開発するためのディレクトリ。 diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/extension Kana2Kanji+InputGraph.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/extension Kana2Kanji+InputGraph.swift deleted file mode 100644 index a9c6194..0000000 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/ExperimentalTests/extension Kana2Kanji+InputGraph.swift +++ /dev/null @@ -1,367 +0,0 @@ -// -// extension Kana2Kanji+InputGraph.swift -// -// -// Created by miwa on 2024/02/23. -// - -import Foundation -@testable import KanaKanjiConverterModule - -import XCTest - -extension Kana2Kanji { - struct Result { - var endNode: ConvertGraph.LatticeNode - var correctGraph: CorrectGraph - var inputGraph: InputGraph - var lookupGraph: LookupGraph - var convertGraph: ConvertGraph - } - func _experimental_all(_ inputData: ComposingTextV2, option: ConvertRequestOptions) -> Result { - // グラフ構築 - print(#file, "start") - let correctGraph = CorrectGraph.build(input: inputData.input) - let inputGraph = InputGraph.build(input: correctGraph) - // 辞書ルックアップによりconvertGraphを構築 - print(#file, "lookup", inputGraph) - let (lookupGraph, convertGraph) = self.dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: option) - print(#file, "convert") - let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore) - return Result(endNode: result, correctGraph: correctGraph, inputGraph: inputGraph, lookupGraph: lookupGraph, convertGraph: convertGraph) - } - - func _experimental_additional( - composingText: ComposingTextV2, - additionalInputsStartIndex: Int, - previousResult: consuming Result, - option: ConvertRequestOptions - ) -> Result { - // グラフ構築 - print(#file, "start") - var insertedIndexSet = IndexSet() - for i in additionalInputsStartIndex ..< composingText.input.endIndex { - insertedIndexSet.formUnion(previousResult.correctGraph.update(with: composingText.input[i], index: i, input: composingText.input)) - } - // MARK: inputGraphの差分ベースの構築は困難なため、普通に構築し直す - let inputGraph = InputGraph.build(input: previousResult.correctGraph) - // MARK: ここは差分ベース - print(#file, "lookup", previousResult.inputGraph) - var (lookupGraph, convertGraph, matchInfo) = self.dicdataStore.buildConvertGraphDifferential(inputGraph: inputGraph, cacheLookupGraph: previousResult.lookupGraph, option: option) - print(#file, "convert") - let result = convertGraph.convertAllDifferential(cacheConvertGraph: previousResult.convertGraph, option: option, dicdataStore: self.dicdataStore, lookupGraphMatchInfo: matchInfo) - return Result(endNode: result, correctGraph: previousResult.correctGraph, inputGraph: inputGraph, lookupGraph: lookupGraph, convertGraph: convertGraph) - } - - func _experimental_delete( - composingText: ComposingTextV2, - previousResult: consuming Result, - option: ConvertRequestOptions - ) -> Result { - // グラフ構築 - print(#file, "start") - // MARK: この部分の差分ベースの構築は困難なため、普通に構築し直す - let correctGraph = CorrectGraph.build(input: composingText.input) - let inputGraph = InputGraph.build(input: correctGraph) - // MARK: ここから差分ベースにする - print(#file, "lookup", previousResult.inputGraph) - var (lookupGraph, convertGraph, matchInfo) = self.dicdataStore.buildConvertGraphDifferential(inputGraph: inputGraph, cacheLookupGraph: previousResult.lookupGraph, option: option) - print(#file, "convert") - let result = convertGraph.convertAllDifferential(cacheConvertGraph: previousResult.convertGraph, option: option, dicdataStore: self.dicdataStore, lookupGraphMatchInfo: matchInfo) - return Result(endNode: result, correctGraph: previousResult.correctGraph, inputGraph: inputGraph, lookupGraph: lookupGraph, convertGraph: convertGraph) - } -} - -private extension ConvertGraph.LatticeNode { - func joinedPrevs() -> [String] { - var result: [String] = [] - for prev in self.prevs { - var words = [self.data.word, prev.data.word] - var curPrev: (any RegisteredNodeProtocol) = prev - while let newPrev = curPrev.prev { - words.append(newPrev.data.word) - curPrev = newPrev - } - result.append(words.reversed().joined()) - } - return result - } -} - -final class ExperimentalConversionTests: XCTestCase { - func requestOptions() -> ConvertRequestOptions { - .withDefaultDictionary(requireJapanesePrediction: false, requireEnglishPrediction: false, keyboardLanguage: .ja_JP, learningType: .nothing, memoryDirectoryURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""), metadata: nil) - } - - func testBuildConvertGraph_たいかく() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - var c = ComposingTextV2() - c.append("たいかく", inputStyle: .systemFlickDirect) - let correctGraph = CorrectGraph.build(input: c.input) - let inputGraph = InputGraph.build(input: consume correctGraph) - let (_, convertGraph) = dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: requestOptions()) - XCTAssertEqual( - convertGraph.nodes.first { - $0.latticeNodes.contains(where: {$0.data.word == "他"}) - }?.latticeNodes.mapSet {$0.data.ruby} - .symmetricDifference(["タ", "タイ", "タイカ", "タイガ", "タイカク", "タイガク"]), - [] - ) - } - - func testConversion_たい() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("たい", inputStyle: .systemFlickDirect) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("タイ")) // たい - XCTAssertTrue(result.endNode.joinedPrevs().contains("台")) // たい - } - - func testConversion_いか() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("いか", inputStyle: .systemFlickDirect) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("以下")) // いか - XCTAssertTrue(result.endNode.joinedPrevs().contains("伊賀")) // いが - } - - func testConversion_かかく() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("かかく", inputStyle: .systemFlickDirect) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("価格")) // かかく - XCTAssertTrue(result.endNode.joinedPrevs().contains("科学")) // かがく - XCTAssertTrue(result.endNode.joinedPrevs().contains("画角")) // がかく - XCTAssertTrue(result.endNode.joinedPrevs().contains("雅楽")) // ががく - } - - func testConversion_たいか() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("たいか", inputStyle: .systemFlickDirect) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("対価")) // たいか - XCTAssertTrue(result.endNode.joinedPrevs().contains("大河")) // たいが - } - - func testConversion_たいかく() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("たいかく", inputStyle: .systemFlickDirect) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("体格")) // たいかく - XCTAssertTrue(result.endNode.joinedPrevs().contains("退学")) // たいがく - } - - func testConversion_むらさき() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("むらさき", inputStyle: .systemFlickDirect) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("紫")) // むらさき - } - - func testBuildConvertGraph_youshouki() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - var c = ComposingTextV2() - c.append("youshouki", inputStyle: .systemRomanKana) - let correctGraph = CorrectGraph.build(input: c.input) - let inputGraph = InputGraph.build(input: consume correctGraph) - let (_, convertGraph) = dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: requestOptions()) - XCTAssertEqual( - convertGraph.nodes.first { - $0.latticeNodes.contains(where: {$0.data.word == "世"}) - }?.latticeNodes.mapSet {$0.data.ruby} - .symmetricDifference(["ヨ", "ヨウ", "ヨウシ", "ヨウショ", "ヨウショウ", "ヨウショウキ"]), - [] - ) - } - - func testConversion_youshouki() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("youshouki", inputStyle: .systemRomanKana) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("幼少期")) // ようしょうき - } - - func testConversion_みらいえいが() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - do { - var c = ComposingTextV2() - c.append("みらいえいが", inputStyle: .systemFlickDirect) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("未来映画")) - } - do { - var c = ComposingTextV2() - c.append("miraieiga", inputStyle: .systemRomanKana) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("未来映画")) - } - } - - func testConversion() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - do { - var c = ComposingTextV2() - c.append("sitta", inputStyle: .systemRomanKana) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("知った")) - } - do { - var c = ComposingTextV2() - c.append("unda", inputStyle: .systemRomanKana) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("産んだ")) - } - do { - var c = ComposingTextV2() - c.append("ixtsuta", inputStyle: .systemRomanKana) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("言った")) - } - do { - var c = ComposingTextV2() - c.append("its", inputStyle: .systemRomanKana) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("いた")) - } - do { - var c = ComposingTextV2() - c.append("itsi", inputStyle: .systemRomanKana) - let result = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(result.endNode.joinedPrevs().contains("痛い")) - } - } - - func testConversion_incremental_たい() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("たい", inputStyle: .systemFlickDirect) - let firstResult = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(firstResult.endNode.joinedPrevs().contains("タイ")) // たい - XCTAssertTrue(firstResult.endNode.joinedPrevs().contains("台")) // たい - c.append("こ", inputStyle: .systemFlickDirect) - let secondResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 2, - previousResult: firstResult, - option: requestOptions() - ) - XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太鼓")) // たいこ - XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太古")) // たいこ - c.append("く", inputStyle: .systemFlickDirect) - let thirdResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 3, - previousResult: secondResult, - option: requestOptions() - ) - XCTAssertTrue(thirdResult.endNode.joinedPrevs().contains("大国")) // たいこく - c.removeLast() - let forthResult = kana2kanji._experimental_delete( - composingText: c, - previousResult: thirdResult, - option: requestOptions() - ) - XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太鼓")) // たいこ - XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太古")) // たいこ - XCTAssertFalse(forthResult.endNode.joinedPrevs().contains("大国")) // たいこく - } - - func testConversion_incremental_intai() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("i", inputStyle: .systemRomanKana) - let firstResult = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(firstResult.endNode.joinedPrevs().contains("胃")) // い - c.append("n", inputStyle: .systemRomanKana) - let secondResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 1, - previousResult: firstResult, - option: requestOptions() - ) - print(secondResult.endNode.joinedPrevs()) - c.append("t", inputStyle: .systemRomanKana) - let thirdResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 2, - previousResult: secondResult, - option: requestOptions() - ) - print(thirdResult.endNode.joinedPrevs()) - c.append("a", inputStyle: .systemRomanKana) - let forthResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 3, - previousResult: thirdResult, - option: requestOptions() - ) - XCTAssertTrue(forthResult.endNode.joinedPrevs().contains("インタ")) // インタ - c.append("i", inputStyle: .systemRomanKana) - let fifthResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 4, - previousResult: forthResult, - option: requestOptions() - ) - XCTAssertTrue(fifthResult.endNode.joinedPrevs().contains("引退")) // インタイ - } - - func testConversion_incremental_intsi() throws { - let dicdataStore = DicdataStore(requestOptions: requestOptions()) - let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore) - var c = ComposingTextV2() - c.append("i", inputStyle: .systemRomanKana) - let firstResult = kana2kanji._experimental_all(c, option: requestOptions()) - XCTAssertTrue(firstResult.endNode.joinedPrevs().contains("胃")) // い - c.append("n", inputStyle: .systemRomanKana) - let secondResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 1, - previousResult: firstResult, - option: requestOptions() - ) - // XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("胃n")) // in - c.append("t", inputStyle: .systemRomanKana) - let thirdResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 2, - previousResult: secondResult, - option: requestOptions() - ) - // XCTAssertTrue(thirdResult.endNode.joinedPrevs().contains("インt")) // int - c.append("s", inputStyle: .systemRomanKana) - let forthResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 3, - previousResult: thirdResult, - option: requestOptions() - ) - XCTAssertTrue(forthResult.endNode.joinedPrevs().contains("インタ")) // インタ - c.append("i", inputStyle: .systemRomanKana) - let fifthResult = kana2kanji._experimental_additional( - composingText: c, - additionalInputsStartIndex: 4, - previousResult: forthResult, - option: requestOptions() - ) - XCTAssertTrue(fifthResult.endNode.joinedPrevs().contains("引退")) // インタイ - } -}