From 74d4d412c39df0af45dc86bcd9aa4ec9187968c0 Mon Sep 17 00:00:00 2001
From: Miwa <63481257+ensan-hcl@users.noreply.github.com>
Date: Fri, 27 Jun 2025 23:21:11 +0900
Subject: [PATCH] =?UTF-8?q?refactor:=20=E8=BE=9E=E6=9B=B8=E6=A4=9C?=
 =?UTF-8?q?=E7=B4=A2=E9=96=A2=E9=80=A3=E3=81=AE=E9=96=A2=E6=95=B0=E3=81=AE?=
 =?UTF-8?q?=E5=AE=9F=E8=A3=85=E3=82=92=E7=B5=B1=E5=90=88=E3=81=97=E3=80=81?=
 =?UTF-8?q?=E5=90=8D=E5=89=8D=E3=82=92=E6=95=B4=E7=90=86=20(#209)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* perf: 同じloudsに対する検索をバルク処理することによって、処理の効率化を実現

* fix: bug

* test: add typo correction test

* chore: finalize imp;

* feat: update search-related impls
---
 .../DicdataStore/DicdataStore.swift           | 76 +++++++++++--------
 .../LearningMemoryTests.swift                 |  8 +-
 2 files changed, 49 insertions(+), 35 deletions(-)
diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
index c3ec2fd..f65b93b 100644
--- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
+++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
@@ -27,7 +27,7 @@ public final class DicdataStore {
     private var mmValue: [PValue] = []
 
     private var loudses: [String: LOUDS] = [:]
-    private var loudstxts: [String: Data] = [:]    
+    private var loudstxts: [String: Data] = [:]
     private var importedLoudses: Set<String> = []
     private var charsID: [Character: UInt8] = [:]
     private var learningManager = LearningManager()
@@ -227,28 +227,29 @@ public final class DicdataStore {
         }
     }
 
-    func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] {
+    /// 完全一致検索を行う関数。
+    /// - Parameters:
+    ///   - query: 対象とするLOUDS辞書の識別子（通常は先頭1文字や"user"など）。
+    ///   - charIDs: 検索する語を表す文字ID列。
+    /// - Returns: 与えられた文字ID列と完全に一致するノードインデックスの配列（存在すれば1件、存在しなければ空配列）。
+    ///
+    /// 入力の文字ID列がLOUDS内のノードと完全一致する場合、そのノードのインデックスを返す。
+    /// 一致しない場合は空の配列を返す。
+    func perfectMatchingSearch(query: String, charIDs: [UInt8]) -> [Int] {
         guard let louds = self.loadLOUDS(query: query) else {
             return []
         }
         return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
     }
 
-    private func throughMatchLOUDS(query: String, charIDs: [UInt8], depth: Range<Int>) -> [Int] {
-        guard let louds = self.loadLOUDS(query: query) else {
-            return []
-        }
-        let result = louds.byfixNodeIndices(chars: charIDs)
-        // result[1]から始まるので、例えば3..<5 (3文字と4文字)の場合は1文字ずつずらして4..<6の範囲をもらう
-        return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)])
-    }
-
-    /// 辞書検索用関数
+    /// 入力の各prefix（ア、アイ、アイウ...）をすべて順にたどってLOUDS辞書から候補を検索する関数。
     /// - Parameters:
-    ///   - group: ファイルのプレフィックスとなる文字列（通常、最初の1文字）と、その文字列で始まる文字IDのプレフィックスの集合
-    ///   - depth: 検索対象となる深さ。`2..<4`の場合は2文字・3文字の候補のみ取りだす
-    /// - Returns: 発見されたすべてのインデックス
-    private func throughMatchLOUDS(group: [String: [([Character], [UInt8])]], depth: Range<Int>) -> [(key: String, indices: Set<Int>)] {
+    ///   - group: 検索対象の辞書をキーにした、検索語（Character配列とcharID配列のペア）のリスト。
+    ///   - depth: 各検索語のprefix深さの範囲。例: `2..<4` なら2文字・3文字のprefixを対象にする。
+    /// - Returns: 各辞書ごとに、見つかったノードインデックスの集合。
+    ///
+    /// 「アイウ」に対して「ア」「アイ」「アイウ」のすべてをLOUDSで検索するバルク処理を行う。
+    private func movingTowardPrefixSearch(group: [String: [([Character], [UInt8])]], depth: Range<Int>) -> [(key: String, indices: Set<Int>)] {
         let indices: [(String, Set<Int>)] = group.map {dic in
             guard let louds = self.loadLOUDS(query: dic.key) else {
                 return (dic.key, [])
@@ -260,7 +261,17 @@ public final class DicdataStore {
         return indices
     }
 
-    private func prefixMatchLOUDS(query: String, charIDs: [UInt8], depth: Int = .max, maxCount: Int = .max) -> [Int] {
+    /// prefixを起点として、それに続く語（prefix match）をLOUDS上で探索する関数。
+    /// - Parameters:
+    ///   - query: 辞書ファイルの識別子（通常は先頭1文字や"user"など）。
+    ///   - charIDs: 接頭辞を構成する文字ID列。
+    ///   - depth: 接頭辞から何文字先まで探索するかの上限。
+    ///   - maxCount: 最大取得件数。多すぎると性能劣化につながるため制限できる。
+    /// - Returns: 与えられたprefixで始まる語のノードインデックスのリスト。
+    ///
+    /// 入力のprefixにマッチする語をLOUDSから最大`maxCount`件、最大`depth`文字先まで探索する。
+    /// 「ABC」→「ABC」「ABCD」「ABCDE」などを対象とする検索。
+    private func startingFromPrefixSearch(query: String, charIDs: [UInt8], depth: Int = .max, maxCount: Int = .max) -> [Int] {
         guard let louds = self.loadLOUDS(query: query) else {
             return []
         }
@@ -313,11 +324,11 @@ public final class DicdataStore {
         let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
         let depth = minCharIDsCount - 1 ..< maxCharIDsCount
         let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)})
-        var indices = self.throughMatchLOUDS(group: group, depth: depth)
+        var indices = self.movingTowardPrefixSearch(group: group, depth: depth)
         if learningManager.enabled {
-            indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet, "memory": stringSet], depth: depth))
+            indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet, "memory": stringSet], depth: depth))
         } else {
-            indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet], depth: depth))
+            indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet], depth: depth))
         }
         // MARK: 検索によって得たindicesから辞書データを実際に取り出していく
         var dicdata: [DicdataElement] = []
@@ -421,15 +432,18 @@ public final class DicdataStore {
             return [characterNode]
         }
         let maxIDs = maxString.map(self.character2charId)
-        var keys = [String(stringToEndIndex.keys.first!.first!), "user"]
+        var group: [String: [([Character], [UInt8])]] = [
+            String(stringToEndIndex.keys.first!.first!): [(maxString, maxIDs)],
+            "user": [(maxString, maxIDs)],
+        ]
         if learningManager.enabled {
-            keys.append("memory")
+            group["memory"] = group["user"]
         }
         // MARK: 検索によって得たindicesから辞書データを実際に取り出していく
         var dicdata: [DicdataElement] = []
         let depth = minString.count - 1 ..< maxString.count
-        for identifier in keys {
-            dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: self.throughMatchLOUDS(query: identifier, charIDs: maxIDs, depth: depth)))
+        for (identifier, indices) in self.movingTowardPrefixSearch(group: group, depth: depth) {
+            dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: indices))
         }
         if learningManager.enabled {
             // temporalな学習結果にpenaltyを加えて追加する
@@ -485,19 +499,19 @@ public final class DicdataStore {
         var indices: [(String, Set<Int>)] = group.map {dic in
             let head = String(dic.key)
             let set = dic.value.flatMapSet { (_, charIDs) in
-                self.perfectMatchLOUDS(query: head, charIDs: charIDs)
+                self.perfectMatchingSearch(query: head, charIDs: charIDs)
             }
             return (head, set)
         }
         do {
             let set = strings.flatMapSet { (_, charIDs) in
-                self.perfectMatchLOUDS(query: "user", charIDs: charIDs)
+                self.perfectMatchingSearch(query: "user", charIDs: charIDs)
             }
             indices.append(("user", set))
         }
         if learningManager.enabled {
             let set = strings.flatMapSet { (_, charIDs) in
-                self.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
+                self.perfectMatchingSearch(query: "memory", charIDs: charIDs)
             }
             indices.append(("memory", set))
         }
@@ -591,16 +605,16 @@ public final class DicdataStore {
         } else {
             Int.max
         }
-        let prefixIndices = self.prefixMatchLOUDS(query: first, charIDs: charIDs, depth: depth, maxCount: maxCount)
+        let prefixIndices = self.startingFromPrefixSearch(query: first, charIDs: charIDs, depth: depth, maxCount: maxCount)
 
         result.append(
             contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices))
                 .filter { Self.predictionUsable[$0.rcid] }
         )
-        let userDictIndices = self.prefixMatchLOUDS(query: "user", charIDs: charIDs, maxCount: maxCount)
+        let userDictIndices = self.startingFromPrefixSearch(query: "user", charIDs: charIDs, maxCount: maxCount)
         result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices)))
         if learningManager.enabled {
-            let memoryDictIndices = self.prefixMatchLOUDS(query: "memory", charIDs: charIDs, maxCount: maxCount)
+            let memoryDictIndices = self.startingFromPrefixSearch(query: "memory", charIDs: charIDs, maxCount: maxCount)
             result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices)))
             result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
         }
@@ -1073,4 +1087,4 @@ public final class DicdataStore {
         "w": ["ワ", "ウィ", "ウェ", "ヲ"],
         "wy": ["ヰ", "ヱ"]
     ]
-}
+}
\ No newline at end of file
diff --git a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift
index a007137..573b474 100644
--- a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift
+++ b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift
@@ -78,7 +78,7 @@ final class LearningMemoryTests: XCTestCase {
         let dicdataStore = DicdataStore(requestOptions: options)
         dicdataStore.sendToDicdataStore(.setRequestOptions(options))
         let charIDs = "テスト".map { dicdataStore.character2charId($0) }
-        let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
+        let indices = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
         let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices)
         XCTAssertFalse(dicdata.isEmpty)
         XCTAssertTrue(dicdata.contains { $0.word == element.word && $0.ruby == element.ruby })
@@ -95,7 +95,7 @@ final class LearningMemoryTests: XCTestCase {
             )
         )
 
-        let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
+        let indices2 = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
         let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
         XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
     }
@@ -118,7 +118,7 @@ final class LearningMemoryTests: XCTestCase {
         let dicdataStore = DicdataStore(requestOptions: options)
         dicdataStore.sendToDicdataStore(.setRequestOptions(options))
         let charIDs = "テスト".map { dicdataStore.character2charId($0) }
-        let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
+        let indices = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
         let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices)
         XCTAssertFalse(dicdata.isEmpty)
         XCTAssertEqual(dicdata.count { $0.word == element.word && $0.ruby == element.ruby }, 2)
@@ -135,7 +135,7 @@ final class LearningMemoryTests: XCTestCase {
             )
         )
 
-        let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs)
+        let indices2 = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs)
         let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2)
         XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby })
     }