From 2e61ad163837f45c2c687fa3c1d9d5f17b34c066 Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Sun, 29 Jun 2025 15:01:20 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20depth=E9=96=A2=E9=80=A3=E3=81=AE?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=E3=82=92=E9=81=A9=E5=88=87=E3=81=AB=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DicdataStore/DicdataStore.swift | 41 +++++++++++-------- .../DicdataStore/LearningMemory.swift | 14 +++---- .../LOUDS/LOUDS.swift | 29 +++++++------ .../TemporalLearningMemoryTrieTests.swift | 2 +- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift index c3d02d7..7b9b103 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift @@ -275,7 +275,7 @@ public final class DicdataStore { var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:] var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = [] - var temporaryMemoryDicdata: [DicdataElement] = [] + var temporaryMemoryDicdata: [Int: [DicdataElement]] = [:] // ジェネレータを舐める while let (characters, info) = generator.next() { guard let firstCharacter = characters.first else { @@ -292,7 +292,7 @@ public final class DicdataStore { for key in keys { withMutableValue(&targetLOUDS[key]) { helper in if helper == nil, let louds = self.loadLOUDS(query: key) { - helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds, depth: 0 ..< .max) + helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds) } guard helper != nil else { return @@ -303,20 +303,22 @@ public final class DicdataStore { } } // 短期記憶についてはこの位置で処理する - let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs, depth: 0 ..< .max) + let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs) updated = updated || !(result.dicdata.isEmpty) availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex) - for data in result.dicdata { - if info.penalty.isZero { - temporaryMemoryDicdata.append(data) + for (depth, dicdata) in result.dicdata { + for data in dicdata { + if info.penalty.isZero { + temporaryMemoryDicdata[depth, default: []].append(data) + } + let ratio = Self.penaltyRatio[data.lcid] + let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値 + let adjust = pUnit * info.penalty * ratio + if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) { + continue + } + temporaryMemoryDicdata[depth, default: []].append(data.adjustedData(adjust)) } - let ratio = Self.penaltyRatio[data.lcid] - let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値 - let adjust = pUnit * info.penalty * ratio - if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) { - continue - } - temporaryMemoryDicdata.append(data.adjustedData(adjust)) } if availableMaxIndex < characters.endIndex - 1 { // 到達不可能だったパスを通知 @@ -326,11 +328,14 @@ public final class DicdataStore { stringToInfo.append((characters, info)) } } - + let minCount = stringToInfo.map {$0.0.count}.min() ?? 0 + print(#function, minCount, stringToInfo.map{$0.0}) return ( Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}), - targetLOUDS.map { ($0.key, $0.value.indices)}, - temporaryMemoryDicdata + targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )}, + temporaryMemoryDicdata.flatMap { + minCount < $0.key + 1 ? $0.value : [] + } ) } /// prefixを起点として、それに続く語(prefix match)をLOUDS上で探索する関数。 @@ -490,7 +495,9 @@ public final class DicdataStore { } if learningManager.enabled { // temporalな学習結果にpenaltyを加えて追加する - dicdata.append(contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata) + dicdata.append( + contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata.flatMap { $0.value } + ) } for (key, value) in stringToEndIndex { let convertTarget = String(key) diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift index bf27a2b..b1f697c 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift @@ -584,22 +584,22 @@ struct TemporalLearningMemoryTrie { return nodes[index].dataIndices.map {self.dicdata[$0]} } - func movingTowardPrefixSearch(chars: [UInt8], depth: Range) -> (dicdata: [DicdataElement], availableMaxIndex: Int) { + func movingTowardPrefixSearch(chars: [UInt8], depth: Range) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) { var index = 0 var availableMaxIndex = 0 - var indices: [Int] = [] + var indices: [Int: [Int]] = [:] for (offset, char) in chars.enumerated() { if let nextIndex = nodes[index].children[char] { availableMaxIndex = index index = nextIndex if depth.contains(offset) { - indices.append(contentsOf: nodes[index].dataIndices) + indices[offset] = nodes[index].dataIndices } } else { - return (indices.map {self.dicdata[$0]}, availableMaxIndex) + return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex) } } - return (indices.map {self.dicdata[$0]}, availableMaxIndex) + return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex) } func prefixMatch(chars: [UInt8]) -> [DicdataElement] { @@ -720,9 +720,9 @@ final class LearningManager { return self.temporaryMemory.perfectMatch(chars: charIDs) } - func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range) -> (dicdata: [DicdataElement], availableMaxIndex: Int) { + func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range = 0 ..< .max) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) { guard let options, options.learningType.needUsingMemory else { - return ([], 0) + return ([:], 0) } return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth) } diff --git a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift index fdfbe52..7f3e915 100644 --- a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift +++ b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift @@ -240,25 +240,30 @@ package struct LOUDS: Sendable { // 辞書順でソートする var targets = targets targets.sort(by: Self.lexLessThan) - var helper = MovingTowardPrefixSearchHelper(louds: self, depth: depth) + var helper = MovingTowardPrefixSearchHelper(louds: self) for target in targets { _ = helper.update(target: target) } - return helper.indices + return helper.indicesInDepth(depth: depth) } struct MovingTowardPrefixSearchHelper { - init(louds: LOUDS, depth: Range) { + init(louds: LOUDS) { self.louds = louds - self.depth = depth } let louds: LOUDS - let depth: Range // 最終出力となる - var indices: [Int] = [] + var indices: [(depth: Int, index: Int)] = [] // 現在の探索結果を保存しておく var stack: [(nodeIndex: Int, char: UInt8)] = [] - + + func indicesInDepth(depth: Range) -> [Int] { + return self.indices + .lazy + .filter { depth.contains($0.depth) } + .map { $0.index } + } + /// `target`を用いて更新する /// - Parameter target: 検索対象の`CharID`の列 /// - Returns: `updated`はこれによって`indices`の更新があったかどうか。`availableMaxIndex`はアクセスに成功した最大インデックス @@ -266,7 +271,7 @@ package struct LOUDS: Sendable { var updated = false var availableMaxIndex = 0 // iがupperBoundを超えない範囲で検索を行う - for (i, char) in target.enumerated() where i < self.depth.upperBound { + for (i, char) in target.enumerated() { if i < self.stack.count, self.stack[i].char == char { // すでに探索済み availableMaxIndex = i @@ -280,11 +285,9 @@ package struct LOUDS: Sendable { // このケースでは、探索を行う // 直前のstackを取り出し、そのnodeIndexから次のcharを探索する if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) { - if self.depth.contains(i) { - self.indices.append(nodeIndex) - updated = true - availableMaxIndex = i - } + self.indices.append((i, nodeIndex)) + updated = true + availableMaxIndex = i self.stack.append((nodeIndex, char)) } else { // 見つからなかった場合、打ち切る diff --git a/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift b/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift index f4e26f7..cc7f1b6 100644 --- a/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift @@ -27,7 +27,7 @@ final class TemporalLearningMemoryTrieTests: XCTestCase { XCTAssertEqual(result1.first?.word, element1.word) XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false) - let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..