From f5037e393cc833b69c853620fb8871f7a48dcfcb Mon Sep 17 00:00:00 2001
From: Miwa <63481257+ensan-hcl@users.noreply.github.com>
Date: Fri, 27 Jun 2025 22:32:46 +0900
Subject: [PATCH] =?UTF-8?q?perf:=20=E5=90=8C=E3=81=98louds=E3=81=AB?=
 =?UTF-8?q?=E5=AF=BE=E3=81=99=E3=82=8B=E6=A4=9C=E7=B4=A2=E3=82=92=E3=83=90?=
 =?UTF-8?q?=E3=83=AB=E3=82=AF=E5=87=A6=E7=90=86=E3=81=99=E3=82=8B=E3=81=93?=
 =?UTF-8?q?=E3=81=A8=E3=81=AB=E3=82=88=E3=81=A3=E3=81=A6=E3=80=81=E5=87=A6?=
 =?UTF-8?q?=E7=90=86=E3=81=AE=E5=8A=B9=E7=8E=87=E5=8C=96=E3=82=92=E5=AE=9F?=
 =?UTF-8?q?=E7=8F=BE=20(#208)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* perf: 同じloudsに対する検索をバルク処理することによって、処理の効率化を実現

* fix: bug

* test: add typo correction test

* chore: finalize imp;
---
 .../DicdataStore/DicdataStore.swift           | 34 +++++++----
 .../LOUDS/LOUDS.swift                         | 56 +++++++++++++++++++
 .../DicdataStoreTests/DicdataStoreTests.swift | 20 +++++++
 3 files changed, 99 insertions(+), 11 deletions(-)
diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
index 0095226..c3ec2fd 100644
--- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
+++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
@@ -243,6 +243,23 @@ public final class DicdataStore {
         return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)])
     }
 
+    /// 辞書検索用関数
+    /// - Parameters:
+    ///   - group: ファイルのプレフィックスとなる文字列（通常、最初の1文字）と、その文字列で始まる文字IDのプレフィックスの集合
+    ///   - depth: 検索対象となる深さ。`2..<4`の場合は2文字・3文字の候補のみ取りだす
+    /// - Returns: 発見されたすべてのインデックス
+    private func throughMatchLOUDS(group: [String: [([Character], [UInt8])]], depth: Range<Int>) -> [(key: String, indices: Set<Int>)] {
+        let indices: [(String, Set<Int>)] = group.map {dic in
+            guard let louds = self.loadLOUDS(query: dic.key) else {
+                return (dic.key, [])
+            }
+            // バルク処理用の実装を呼び出す
+            let result = louds.byfixNodeIndices(targets: dic.value.map { $0.1 }, depth: depth)
+            return (dic.key, Set(result))
+        }
+        return indices
+    }
+
     private func prefixMatchLOUDS(query: String, charIDs: [UInt8], depth: Int = .max, maxCount: Int = .max) -> [Int] {
         guard let louds = self.loadLOUDS(query: query) else {
             return []
@@ -292,20 +309,15 @@ public final class DicdataStore {
         // MARK: 誤り訂正の対象を列挙する。非常に重い処理。
         var stringToInfo = inputData.getRangesWithTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
         // MARK: 検索対象を列挙していく。
-        let stringSet = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
+        let stringSet: [([Character], [UInt8])] = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
         let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
-        // 先頭の文字: そこで検索したい文字列の集合
-        let group = [Character: [([Character], [UInt8])]].init(grouping: stringSet, by: {$0.0.first!})
-
         let depth = minCharIDsCount - 1 ..< maxCharIDsCount
-        var indices: [(String, Set<Int>)] = group.map {dic in
-            let key = String(dic.key)
-            let set = dic.value.flatMapSet {(_, charIDs) in self.throughMatchLOUDS(query: key, charIDs: charIDs, depth: depth)}
-            return (key, set)
-        }
-        indices.append(("user", stringSet.flatMapSet {self.throughMatchLOUDS(query: "user", charIDs: $0.1, depth: depth)}))
+        let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)})
+        var indices = self.throughMatchLOUDS(group: group, depth: depth)
         if learningManager.enabled {
-            indices.append(("memory", stringSet.flatMapSet {self.throughMatchLOUDS(query: "memory", charIDs: $0.1, depth: depth)}))
+            indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet, "memory": stringSet], depth: depth))
+        } else {
+            indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet], depth: depth))
         }
         // MARK: 検索によって得たindicesから辞書データを実際に取り出していく
         var dicdata: [DicdataElement] = []
diff --git a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
index 0513d55..07d87ac 100644
--- a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
+++ b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
@@ -216,4 +216,60 @@ package struct LOUDS: Sendable {
         }
         return indices
     }
+
+    /// 辞書順ソート
+    private static func lexLessThan(_ lhs: [UInt8], _ rhs: [UInt8]) -> Bool {
+        let minCount = Swift.min(lhs.count, rhs.count)
+        for i in 0..<minCount {
+            let l = lhs[i]
+            let r = rhs[i]
+            if l != r {
+                return l < r
+            }
+        }
+        return lhs.count < rhs.count
+    }
+
+    /// 部分前方一致検索を実行する
+    ///
+    /// 「しかい」を入力した場合、「しかい」だけでなく「し」「しか」の検索も行う。
+    /// - Parameter chars: CharIDに変換した文字列
+    /// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト
+    /// - Note: より適切な名前に変更したい
+    @inlinable func byfixNodeIndices(targets: [[UInt8]], depth: Range<Int>) -> [Int] {
+        // 辞書順でソートする
+//        let targets = targets.sorted(by: Self.lexLessThan)
+        var targets = targets
+        targets.sort(by: Self.lexLessThan)
+        // 最終出力となる
+        var indices: [Int] = []
+        // 現在の探索結果を保存しておく
+        var stack: [(nodeIndex: Int, char: UInt8)] = []
+        for chars in targets {
+            // iがupperBoundを超えない範囲で検索を行う
+            for (i, char) in chars.enumerated() where i < depth.upperBound {
+                if i < stack.count, stack[i].char == char {
+                    // すでに探索済み
+                    continue
+                } else if i < stack.count, stack[i].char != char {
+                    // 異なる文字が見つかったら、その時点でそこから先のstackを破棄
+                    stack = Array(stack[..<i])
+                }
+                // ここに到達する場合、stack[i]は存在しない。
+                assert(i >= stack.count, "stack[\(i)] must not exist for logical reason.")
+                // このケースでは、探索を行う
+                // 直前のstackを取り出し、そのnodeIndexから次のcharを探索する
+                if let nodeIndex = self.searchCharNodeIndex(from: stack.last?.nodeIndex ?? 1, char: char) {
+                    if depth.contains(i) {
+                        indices.append(nodeIndex)
+                    }
+                    stack.append((nodeIndex, char))
+                } else {
+                    // 見つからなかった場合、打ち切る
+                    break
+                }
+            }
+        }
+        return indices
+    }
 }
diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift
index 8c16c9c..d738518 100644
--- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift
+++ b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift
@@ -155,6 +155,26 @@ final class DicdataStoreTests: XCTestCase {
         }
     }
 
+    /// 入力誤りを確実に修正できてほしい語群
+    func testMustCorrectTypo() throws {
+        let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
+        let mustWords = [
+            ("タイカクセイ", "大学生"),
+            ("シヨック", "ショック"),
+            ("キヨクイン", "局員"),
+            ("シヨーク", "ジョーク"),
+            ("サリカニ", "ザリガニ"),
+            ("ノクチヒテヨ", "野口英世"),
+            ("オタノフナカ", "織田信長"),
+        ]
+        for (key, word) in mustWords {
+            var c = ComposingText()
+            c.insertAtCursorPosition(key, inputStyle: .direct)
+            let result = dicdataStore.getLOUDSData(inputData: c, from: 0, to: c.input.endIndex - 1, needTypoCorrection: true)
+            XCTAssertEqual(result.first(where: {$0.data.word == word})?.data.word, word)
+        }
+    }
+
     func testGetLOUDSDataInRange() throws {
         let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
         do {