From 74d4d412c39df0af45dc86bcd9aa4ec9187968c0 Mon Sep 17 00:00:00 2001 From: Miwa <63481257+ensan-hcl@users.noreply.github.com> Date: Fri, 27 Jun 2025 23:21:11 +0900 Subject: [PATCH] =?UTF-8?q?refactor:=20=E8=BE=9E=E6=9B=B8=E6=A4=9C?= =?UTF-8?q?=E7=B4=A2=E9=96=A2=E9=80=A3=E3=81=AE=E9=96=A2=E6=95=B0=E3=81=AE?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=E3=82=92=E7=B5=B1=E5=90=88=E3=81=97=E3=80=81?= =?UTF-8?q?=E5=90=8D=E5=89=8D=E3=82=92=E6=95=B4=E7=90=86=20(#209)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * perf: 同じloudsに対する検索をバルク処理することによって、処理の効率化を実現 * fix: bug * test: add typo correction test * chore: finalize imp; * feat: update search-related impls --- .../DicdataStore/DicdataStore.swift | 76 +++++++++++-------- .../LearningMemoryTests.swift | 8 +- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift index c3ec2fd..f65b93b 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift @@ -27,7 +27,7 @@ public final class DicdataStore { private var mmValue: [PValue] = [] private var loudses: [String: LOUDS] = [:] - private var loudstxts: [String: Data] = [:] + private var loudstxts: [String: Data] = [:] private var importedLoudses: Set = [] private var charsID: [Character: UInt8] = [:] private var learningManager = LearningManager() @@ -227,28 +227,29 @@ public final class DicdataStore { } } - func perfectMatchLOUDS(query: String, charIDs: [UInt8]) -> [Int] { + /// 完全一致検索を行う関数。 + /// - Parameters: + /// - query: 対象とするLOUDS辞書の識別子(通常は先頭1文字や"user"など)。 + /// - charIDs: 検索する語を表す文字ID列。 + /// - Returns: 与えられた文字ID列と完全に一致するノードインデックスの配列(存在すれば1件、存在しなければ空配列)。 + /// + /// 入力の文字ID列がLOUDS内のノードと完全一致する場合、そのノードのインデックスを返す。 + /// 一致しない場合は空の配列を返す。 + func perfectMatchingSearch(query: String, charIDs: [UInt8]) -> [Int] { guard let louds = self.loadLOUDS(query: query) else { return [] } return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0} } - private func throughMatchLOUDS(query: String, charIDs: [UInt8], depth: Range) -> [Int] { - guard let louds = self.loadLOUDS(query: query) else { - return [] - } - let result = louds.byfixNodeIndices(chars: charIDs) - // result[1]から始まるので、例えば3..<5 (3文字と4文字)の場合は1文字ずつずらして4..<6の範囲をもらう - return Array(result[min(depth.lowerBound + 1, result.endIndex) ..< min(depth.upperBound + 1, result.endIndex)]) - } - - /// 辞書検索用関数 + /// 入力の各prefix(ア、アイ、アイウ...)をすべて順にたどってLOUDS辞書から候補を検索する関数。 /// - Parameters: - /// - group: ファイルのプレフィックスとなる文字列(通常、最初の1文字)と、その文字列で始まる文字IDのプレフィックスの集合 - /// - depth: 検索対象となる深さ。`2..<4`の場合は2文字・3文字の候補のみ取りだす - /// - Returns: 発見されたすべてのインデックス - private func throughMatchLOUDS(group: [String: [([Character], [UInt8])]], depth: Range) -> [(key: String, indices: Set)] { + /// - group: 検索対象の辞書をキーにした、検索語(Character配列とcharID配列のペア)のリスト。 + /// - depth: 各検索語のprefix深さの範囲。例: `2..<4` なら2文字・3文字のprefixを対象にする。 + /// - Returns: 各辞書ごとに、見つかったノードインデックスの集合。 + /// + /// 「アイウ」に対して「ア」「アイ」「アイウ」のすべてをLOUDSで検索するバルク処理を行う。 + private func movingTowardPrefixSearch(group: [String: [([Character], [UInt8])]], depth: Range) -> [(key: String, indices: Set)] { let indices: [(String, Set)] = group.map {dic in guard let louds = self.loadLOUDS(query: dic.key) else { return (dic.key, []) @@ -260,7 +261,17 @@ public final class DicdataStore { return indices } - private func prefixMatchLOUDS(query: String, charIDs: [UInt8], depth: Int = .max, maxCount: Int = .max) -> [Int] { + /// prefixを起点として、それに続く語(prefix match)をLOUDS上で探索する関数。 + /// - Parameters: + /// - query: 辞書ファイルの識別子(通常は先頭1文字や"user"など)。 + /// - charIDs: 接頭辞を構成する文字ID列。 + /// - depth: 接頭辞から何文字先まで探索するかの上限。 + /// - maxCount: 最大取得件数。多すぎると性能劣化につながるため制限できる。 + /// - Returns: 与えられたprefixで始まる語のノードインデックスのリスト。 + /// + /// 入力のprefixにマッチする語をLOUDSから最大`maxCount`件、最大`depth`文字先まで探索する。 + /// 「ABC」→「ABC」「ABCD」「ABCDE」などを対象とする検索。 + private func startingFromPrefixSearch(query: String, charIDs: [UInt8], depth: Int = .max, maxCount: Int = .max) -> [Int] { guard let louds = self.loadLOUDS(query: query) else { return [] } @@ -313,11 +324,11 @@ public final class DicdataStore { let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1) let depth = minCharIDsCount - 1 ..< maxCharIDsCount let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)}) - var indices = self.throughMatchLOUDS(group: group, depth: depth) + var indices = self.movingTowardPrefixSearch(group: group, depth: depth) if learningManager.enabled { - indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet, "memory": stringSet], depth: depth)) + indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet, "memory": stringSet], depth: depth)) } else { - indices.append(contentsOf: self.throughMatchLOUDS(group: ["user": stringSet], depth: depth)) + indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet], depth: depth)) } // MARK: 検索によって得たindicesから辞書データを実際に取り出していく var dicdata: [DicdataElement] = [] @@ -421,15 +432,18 @@ public final class DicdataStore { return [characterNode] } let maxIDs = maxString.map(self.character2charId) - var keys = [String(stringToEndIndex.keys.first!.first!), "user"] + var group: [String: [([Character], [UInt8])]] = [ + String(stringToEndIndex.keys.first!.first!): [(maxString, maxIDs)], + "user": [(maxString, maxIDs)], + ] if learningManager.enabled { - keys.append("memory") + group["memory"] = group["user"] } // MARK: 検索によって得たindicesから辞書データを実際に取り出していく var dicdata: [DicdataElement] = [] let depth = minString.count - 1 ..< maxString.count - for identifier in keys { - dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: self.throughMatchLOUDS(query: identifier, charIDs: maxIDs, depth: depth))) + for (identifier, indices) in self.movingTowardPrefixSearch(group: group, depth: depth) { + dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: indices)) } if learningManager.enabled { // temporalな学習結果にpenaltyを加えて追加する @@ -485,19 +499,19 @@ public final class DicdataStore { var indices: [(String, Set)] = group.map {dic in let head = String(dic.key) let set = dic.value.flatMapSet { (_, charIDs) in - self.perfectMatchLOUDS(query: head, charIDs: charIDs) + self.perfectMatchingSearch(query: head, charIDs: charIDs) } return (head, set) } do { let set = strings.flatMapSet { (_, charIDs) in - self.perfectMatchLOUDS(query: "user", charIDs: charIDs) + self.perfectMatchingSearch(query: "user", charIDs: charIDs) } indices.append(("user", set)) } if learningManager.enabled { let set = strings.flatMapSet { (_, charIDs) in - self.perfectMatchLOUDS(query: "memory", charIDs: charIDs) + self.perfectMatchingSearch(query: "memory", charIDs: charIDs) } indices.append(("memory", set)) } @@ -591,16 +605,16 @@ public final class DicdataStore { } else { Int.max } - let prefixIndices = self.prefixMatchLOUDS(query: first, charIDs: charIDs, depth: depth, maxCount: maxCount) + let prefixIndices = self.startingFromPrefixSearch(query: first, charIDs: charIDs, depth: depth, maxCount: maxCount) result.append( contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices)) .filter { Self.predictionUsable[$0.rcid] } ) - let userDictIndices = self.prefixMatchLOUDS(query: "user", charIDs: charIDs, maxCount: maxCount) + let userDictIndices = self.startingFromPrefixSearch(query: "user", charIDs: charIDs, maxCount: maxCount) result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices))) if learningManager.enabled { - let memoryDictIndices = self.prefixMatchLOUDS(query: "memory", charIDs: charIDs, maxCount: maxCount) + let memoryDictIndices = self.startingFromPrefixSearch(query: "memory", charIDs: charIDs, maxCount: maxCount) result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices))) result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs)) } @@ -1073,4 +1087,4 @@ public final class DicdataStore { "w": ["ワ", "ウィ", "ウェ", "ヲ"], "wy": ["ヰ", "ヱ"] ] -} +} \ No newline at end of file diff --git a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift index a007137..573b474 100644 --- a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift @@ -78,7 +78,7 @@ final class LearningMemoryTests: XCTestCase { let dicdataStore = DicdataStore(requestOptions: options) dicdataStore.sendToDicdataStore(.setRequestOptions(options)) let charIDs = "テスト".map { dicdataStore.character2charId($0) } - let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs) + let indices = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs) let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices) XCTAssertFalse(dicdata.isEmpty) XCTAssertTrue(dicdata.contains { $0.word == element.word && $0.ruby == element.ruby }) @@ -95,7 +95,7 @@ final class LearningMemoryTests: XCTestCase { ) ) - let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs) + let indices2 = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs) let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2) XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby }) } @@ -118,7 +118,7 @@ final class LearningMemoryTests: XCTestCase { let dicdataStore = DicdataStore(requestOptions: options) dicdataStore.sendToDicdataStore(.setRequestOptions(options)) let charIDs = "テスト".map { dicdataStore.character2charId($0) } - let indices = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs) + let indices = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs) let dicdata = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices) XCTAssertFalse(dicdata.isEmpty) XCTAssertEqual(dicdata.count { $0.word == element.word && $0.ruby == element.ruby }, 2) @@ -135,7 +135,7 @@ final class LearningMemoryTests: XCTestCase { ) ) - let indices2 = dicdataStore.perfectMatchLOUDS(query: "memory", charIDs: charIDs) + let indices2 = dicdataStore.perfectMatchingSearch(query: "memory", charIDs: charIDs) let dicdata2 = dicdataStore.getDicdataFromLoudstxt3(identifier: "memory", indices: indices2) XCTAssertFalse(dicdata2.contains { $0.word == element.word && $0.ruby == element.ruby }) }