From eb4e669b2b80b7fbd12e86bd313905716ba3a42d Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Sun, 29 Jun 2025 12:30:34 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20typo=20correction=E3=81=AE=E6=9E=9D?= =?UTF-8?q?=E5=88=88=E3=82=8A=E3=82=92=E5=AE=9F=E8=A3=85=E3=81=97=E3=80=81?= =?UTF-8?q?direct=E5=85=A5=E5=8A=9B=E3=81=AE=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=81=A7TestFullConversion=E3=81=AE=E5=AE=9F=E8=A1=8C=E9=80=9F?= =?UTF-8?q?=E5=BA=A6=E3=81=8C1.6=E5=80=8D=E3=81=AB=E5=90=91=E4=B8=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DicdataStore/DicdataStore.swift | 20 ++++++++++++++---- .../DicdataStore/LearningMemory.swift | 12 ++++++----- .../DicdataStore/TypoCorrection.swift | 21 +++++++++++++++++++ .../LOUDS/LOUDS.swift | 14 +++++++++---- .../TemporalLearningMemoryTrieTests.swift | 2 +- 5 files changed, 55 insertions(+), 14 deletions(-) diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift index d9fe5e6..c3d02d7 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift @@ -288,17 +288,25 @@ public final class DicdataStore { [String(firstCharacter), "user"] } var updated = false + var availableMaxIndex = 0 for key in keys { withMutableValue(&targetLOUDS[key]) { helper in if helper == nil, let louds = self.loadLOUDS(query: key) { helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds, depth: 0 ..< .max) } - let hasUpdate = helper?.update(target: charIDs) ?? false - updated = updated || hasUpdate + guard helper != nil else { + return + } + let result = helper!.update(target: charIDs) + updated = updated || result.updated + availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex) } } // 短期記憶についてはこの位置で処理する - for data in self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs, depth: 0 ..< .max) { + let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs, depth: 0 ..< .max) + updated = updated || !(result.dicdata.isEmpty) + availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex) + for data in result.dicdata { if info.penalty.isZero { temporaryMemoryDicdata.append(data) } @@ -310,6 +318,10 @@ public final class DicdataStore { } temporaryMemoryDicdata.append(data.adjustedData(adjust)) } + if availableMaxIndex < characters.endIndex - 1 { + // 到達不可能だったパスを通知 + generator.setUnreachablePath(target: characters[...(availableMaxIndex + 1)]) + } if updated { stringToInfo.append((characters, info)) } @@ -478,7 +490,7 @@ public final class DicdataStore { } if learningManager.enabled { // temporalな学習結果にpenaltyを加えて追加する - dicdata.append(contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth)) + dicdata.append(contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata) } for (key, value) in stringToEndIndex { let convertTarget = String(key) diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift index 08feec7..bf27a2b 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift @@ -584,20 +584,22 @@ struct TemporalLearningMemoryTrie { return nodes[index].dataIndices.map {self.dicdata[$0]} } - func movingTowardPrefixSearch(chars: [UInt8], depth: Range) -> [DicdataElement] { + func movingTowardPrefixSearch(chars: [UInt8], depth: Range) -> (dicdata: [DicdataElement], availableMaxIndex: Int) { var index = 0 + var availableMaxIndex = 0 var indices: [Int] = [] for (offset, char) in chars.enumerated() { if let nextIndex = nodes[index].children[char] { + availableMaxIndex = index index = nextIndex if depth.contains(offset) { indices.append(contentsOf: nodes[index].dataIndices) } } else { - return indices.map {self.dicdata[$0]} + return (indices.map {self.dicdata[$0]}, availableMaxIndex) } } - return indices.map {self.dicdata[$0]} + return (indices.map {self.dicdata[$0]}, availableMaxIndex) } func prefixMatch(chars: [UInt8]) -> [DicdataElement] { @@ -718,9 +720,9 @@ final class LearningManager { return self.temporaryMemory.perfectMatch(chars: charIDs) } - func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range) -> [DicdataElement] { + func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range) -> (dicdata: [DicdataElement], availableMaxIndex: Int) { guard let options, options.learningType.needUsingMemory else { - return [] + return ([], 0) } return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth) } diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/TypoCorrection.swift b/Sources/KanaKanjiConverterModule/DicdataStore/TypoCorrection.swift index 401f37b..aa2b865 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/TypoCorrection.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/TypoCorrection.swift @@ -42,6 +42,27 @@ struct TypoCorrectionGenerator { var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)] + /// `target`で始まる場合は到達不可能であることを知らせる + mutating func setUnreachablePath(target: some Collection) { + self.stack = self.stack.filter { (convertTargetElements, lastElement, count, penalty) in + var stablePrefix: [Character] = [] + loop: for item in convertTargetElements { + switch item.inputStyle { + case .direct: + stablePrefix.append(contentsOf: item.string) + case .roman2kana: + // TODO: impl + break loop + } + // 安定なprefixが + if stablePrefix.hasPrefix(target) { + return false + } + } + return true + } + } + mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? { while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() { var result: ([Character], (endIndex: Int, penalty: PValue))? = nil diff --git a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift index cc49ea3..fdfbe52 100644 --- a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift +++ b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift @@ -242,7 +242,7 @@ package struct LOUDS: Sendable { targets.sort(by: Self.lexLessThan) var helper = MovingTowardPrefixSearchHelper(louds: self, depth: depth) for target in targets { - helper.update(target: target) + _ = helper.update(target: target) } return helper.indices } @@ -258,13 +258,18 @@ package struct LOUDS: Sendable { var indices: [Int] = [] // 現在の探索結果を保存しておく var stack: [(nodeIndex: Int, char: UInt8)] = [] - - @inlinable mutating func update(target: [UInt8]) -> Bool { + + /// `target`を用いて更新する + /// - Parameter target: 検索対象の`CharID`の列 + /// - Returns: `updated`はこれによって`indices`の更新があったかどうか。`availableMaxIndex`はアクセスに成功した最大インデックス + @inlinable mutating func update(target: [UInt8]) -> (updated: Bool, availableMaxIndex: Int) { var updated = false + var availableMaxIndex = 0 // iがupperBoundを超えない範囲で検索を行う for (i, char) in target.enumerated() where i < self.depth.upperBound { if i < self.stack.count, self.stack[i].char == char { // すでに探索済み + availableMaxIndex = i continue } else if i < self.stack.count, self.stack[i].char != char { // 異なる文字が見つかったら、その時点でそこから先のstackを破棄 @@ -278,6 +283,7 @@ package struct LOUDS: Sendable { if self.depth.contains(i) { self.indices.append(nodeIndex) updated = true + availableMaxIndex = i } self.stack.append((nodeIndex, char)) } else { @@ -285,7 +291,7 @@ package struct LOUDS: Sendable { break } } - return updated + return (updated, availableMaxIndex) } } } diff --git a/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift b/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift index 92a8ef8..f4e26f7 100644 --- a/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift @@ -27,7 +27,7 @@ final class TemporalLearningMemoryTrieTests: XCTestCase { XCTAssertEqual(result1.first?.word, element1.word) XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false) - let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..