feat: TypoCorrectionとMovingTowardPrefixSearchを同時実行する関数を追加

2025-12-03 02:58:27 +00:00 · 2025-06-29 01:51:08 +09:00
parent 04d7edffdd
commit d636926e83
3 changed files with 97 additions and 45 deletions
--- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
+++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
@@ -261,6 +261,66 @@ public final class DicdataStore {
        return indices
    }

+    func movingTowardPrefixSearch(
+        inputs: [ComposingText.InputElement],
+        leftIndex: Int,
+        rightIndexRange: Range<Int>,
+        useMemory: Bool
+    ) -> (
+        stringToInfo: [[Character]: (endIndex: Int, penalty: PValue)],
+        indices: [(key: String, indices: [Int])],
+        temporaryMemoryDicdata: [DicdataElement]
+    ) {
+        var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange)
+        var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
+        var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
+
+        var temporaryMemoryDicdata: [DicdataElement] = []
+        // ジェネレータを舐める
+        while let (characters, info) = generator.next() {
+            guard let firstCharacter = characters.first else {
+                continue
+            }
+            let charIDs = characters.map(self.character2charId(_:))
+            let keys: [String] = if useMemory {
+                [String(firstCharacter), "user", "memory"]
+            } else {
+                [String(firstCharacter), "user"]
+            }
+            var updated = false
+            for key in keys {
+                withMutableValue(&targetLOUDS[key]) { helper in
+                    if helper == nil, let louds = self.loadLOUDS(query: key) {
+                        helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds, depth: 0 ..< .max)
+                    }
+                    let hasUpdate = helper?.update(target: charIDs) ?? false
+                    updated = updated || hasUpdate
+                }
+            }
+            // 短期記憶についてはこの位置で処理する
+            for data in self.learningManager.temporaryThroughMatch(charIDs: consume charIDs, depth: 0 ..< .max) {
+                if info.penalty.isZero {
+                    temporaryMemoryDicdata.append(data)
+                }
+                let ratio = Self.penaltyRatio[data.lcid]
+                let pUnit: PValue = Self.getPenalty(data: data) / 2   // 負の値
+                let adjust = pUnit * info.penalty * ratio
+                if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
+                    continue
+                }
+                temporaryMemoryDicdata.append(data.adjustedData(adjust))
+            }
+            if updated {
+                stringToInfo.append((characters, info))
+            }
+        }
+
+        return (
+            Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
+            targetLOUDS.map { ($0.key, $0.value.indices)},
+            temporaryMemoryDicdata
+        )
+    }
    /// prefixを起点として、それに続く語（prefix match）をLOUDS上で探索する関数。
    /// - Parameters:
    ///   - query: 辞書ファイルの識別子（通常は先頭1文字や"user"など）。
@@ -318,20 +378,8 @@ public final class DicdataStore {
            segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
        }
        // MARK: 誤り訂正の対象を列挙する。非常に重い処理。
-        var stringToInfo = TypoCorrection.getRangesWithTypos(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
-        // MARK: 検索対象を列挙していく。
-        let stringSet: [([Character], [UInt8])] = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
-        let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
-        let depth = minCharIDsCount - 1 ..< maxCharIDsCount
-        let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)})
-        var indices = self.movingTowardPrefixSearch(group: group, depth: depth)
-        if learningManager.enabled {
-            indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet, "memory": stringSet], depth: depth))
-        } else {
-            indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet], depth: depth))
-        }
+        var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled)
        // MARK: 検索によって得たindicesから辞書データを実際に取り出していく
-        var dicdata: [DicdataElement] = []
        for (identifier, value) in indices {
            let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
                let rubyArray = Array(data.ruby)
@@ -349,23 +397,6 @@ public final class DicdataStore {
            }
            dicdata.append(contentsOf: result)
        }
-        // temporalな学習結果にpenaltyを加えて追加する
-        for (_, charIds) in consume stringSet {
-            for data in self.learningManager.temporaryThroughMatch(charIDs: consume charIds, depth: depth) {
-                let rubyArray = Array(data.ruby)
-                let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
-                if penalty.isZero {
-                    dicdata.append(data)
-                }
-                let ratio = Self.penaltyRatio[data.lcid]
-                let pUnit: PValue = Self.getPenalty(data: data) / 2   // 負の値
-                let adjust = pUnit * penalty * ratio
-                if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
-                    continue
-                }
-                dicdata.append(data.adjustedData(adjust))
-            }
-        }

        for i in toIndexLeft ..< toIndexRight {
            do {
@@ -1087,4 +1118,4 @@ public final class DicdataStore {
        "w": ["ワ", "ウィ", "ウェ", "ヲ"],
        "wy": ["ヰ", "ヱ"]
    ]
-}
+}
--- a/Sources/KanaKanjiConverterModule/DicdataStore/TypoCorrection.swift
+++ b/Sources/KanaKanjiConverterModule/DicdataStore/TypoCorrection.swift
@@ -42,7 +42,7 @@ struct TypoCorrectionGenerator {

    var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]

-    mutating func generate(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) -> ([Character], (endIndex: Int, penalty: PValue))? {
+    mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? {
        while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
            var result: ([Character], (endIndex: Int, penalty: PValue))? = nil
            if rightIndexRange.contains(count + left - 1) {
@@ -52,14 +52,18 @@ struct TypoCorrectionGenerator {
            }
            // エスケープ
            if self.nodes.endIndex <= count {
-                continue
+                if let result {
+                    return result
+                }
            }
            // 訂正数上限(3個)
            if penalty >= maxPenalty {
                var convertTargetElements = convertTargetElements
                let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
                if count + correct.count > self.nodes.endIndex {
-                    continue
+                    if let result {
+                        return result
+                    }
                }
                for element in correct {
                    ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
--- a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
+++ b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
@@ -240,35 +240,52 @@ package struct LOUDS: Sendable {
        // 辞書順でソートする
        var targets = targets
        targets.sort(by: Self.lexLessThan)
+        var helper = MovingTowardPrefixSearchHelper(louds: self, depth: depth)
+        for target in targets {
+            helper.update(target: target)
+        }
+        return helper.indices
+    }
+
+    struct MovingTowardPrefixSearchHelper {
+        init(louds: LOUDS, depth: Range<Int>) {
+            self.louds = louds
+            self.depth = depth
+        }
+        let louds: LOUDS
+        let depth: Range<Int>
        // 最終出力となる
        var indices: [Int] = []
        // 現在の探索結果を保存しておく
        var stack: [(nodeIndex: Int, char: UInt8)] = []
-        for chars in targets {
+
+        @inlinable mutating func update(target: [UInt8]) -> Bool {
+            var updated = false
            // iがupperBoundを超えない範囲で検索を行う
-            for (i, char) in chars.enumerated() where i < depth.upperBound {
-                if i < stack.count, stack[i].char == char {
+            for (i, char) in target.enumerated() where i < self.depth.upperBound {
+                if i < self.stack.count, self.stack[i].char == char {
                    // すでに探索済み
                    continue
-                } else if i < stack.count, stack[i].char != char {
+                } else if i < self.stack.count, self.stack[i].char != char {
                    // 異なる文字が見つかったら、その時点でそこから先のstackを破棄
-                    stack = Array(stack[..<i])
+                    self.stack = Array(self.stack[..<i])
                }
                // ここに到達する場合、stack[i]は存在しない。
-                assert(i >= stack.count, "stack[\(i)] must not exist for logical reason.")
+                assert(i >= self.stack.count, "stack[\(i)] must not exist for logical reason.")
                // このケースでは、探索を行う
                // 直前のstackを取り出し、そのnodeIndexから次のcharを探索する
-                if let nodeIndex = self.searchCharNodeIndex(from: stack.last?.nodeIndex ?? 1, char: char) {
-                    if depth.contains(i) {
-                        indices.append(nodeIndex)
+                if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
+                    if self.depth.contains(i) {
+                        self.indices.append(nodeIndex)
+                        updated = true
                    }
-                    stack.append((nodeIndex, char))
+                    self.stack.append((nodeIndex, char))
                } else {
                    // 見つからなかった場合、打ち切る
                    break
                }
            }
+            return updated
        }
-        return indices
    }
 }