refactor: Typo Correctionがオフのモードの場合に呼ばれていた実装のうち、不要なものを削除

This commit is contained in:
Miwa / Ensan
2025-06-29 19:15:14 +09:00
parent a90467c38a
commit 14fa82bee9
3 changed files with 8 additions and 213 deletions

View File

@ -242,36 +242,18 @@ public final class DicdataStore {
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
}
/// prefix...LOUDS
/// - Parameters:
/// - group: CharactercharID
/// - depth: prefix: `2..<4` 23prefix
/// - Returns:
///
/// LOUDS
private func movingTowardPrefixSearch(group: [String: [([Character], [UInt8])]], depth: Range<Int>) -> [(key: String, indices: Set<Int>)] {
let indices: [(String, Set<Int>)] = group.map {dic in
guard let louds = self.loadLOUDS(query: dic.key) else {
return (dic.key, [])
}
//
let result = louds.byfixNodeIndices(targets: dic.value.map { $0.1 }, depth: depth)
return (dic.key, Set(result))
}
return indices
}
func movingTowardPrefixSearch(
inputs: [ComposingText.InputElement],
leftIndex: Int,
rightIndexRange: Range<Int>,
useMemory: Bool
useMemory: Bool,
needTypoCorrection: Bool
) -> (
stringToInfo: [[Character]: (endIndex: Int, penalty: PValue)],
indices: [(key: String, indices: [Int])],
temporaryMemoryDicdata: [DicdataElement]
) {
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange)
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange, needTypoCorrection: needTypoCorrection)
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
@ -329,7 +311,6 @@ public final class DicdataStore {
}
}
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
print(#function, minCount, stringToInfo.map{$0.0})
return (
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
@ -381,9 +362,6 @@ public final class DicdataStore {
/// - from:
/// - toIndexRange: `from ..< (toIndexRange)`
public func getLOUDSDataInRange(inputData: ComposingText, from fromIndex: Int, toIndexRange: Range<Int>? = nil, needTypoCorrection: Bool = true) -> [LatticeNode] {
if !needTypoCorrection {
return self.getFrozenLOUDSDataInRange(inputData: inputData, from: fromIndex, toIndexRange: toIndexRange)
}
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
@ -395,7 +373,7 @@ public final class DicdataStore {
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
}
// MARK:
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled)
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled, needTypoCorrection: needTypoCorrection)
// MARK: indices
for (identifier, value) in indices {
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
@ -452,77 +430,6 @@ public final class DicdataStore {
}
}
/// kana2lattice
/// - Parameters:
/// - inputData:
/// - from:
/// - toIndexRange: `from ..< (toIndexRange)`
private func getFrozenLOUDSDataInRange(inputData: ComposingText, from fromIndex: Int, toIndexRange: Range<Int>? = nil) -> [LatticeNode] {
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
debug(#function, fromIndex, toIndexRange?.description ?? "nil", toIndexLeft, toIndexRight)
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
debug(#function, "index is wrong")
return []
}
let character = String(inputData.input[fromIndex].character.toKatakana())
let characterNode = LatticeNode(data: DicdataElement(word: character, ruby: character, cid: CIDData..cid, mid: MIDData..mid, value: -10), inputRange: fromIndex ..< fromIndex + 1)
if fromIndex == .zero {
characterNode.prevs.append(.BOSNode())
}
// MARK:
let stringToEndIndex = TypoCorrection.getRangesWithoutTypos(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
// MARK:
guard let (minString, maxString) = stringToEndIndex.keys.minAndMax(by: {$0.count < $1.count}) else {
debug(#function, "minString/maxString is nil", stringToEndIndex)
return [characterNode]
}
let maxIDs = maxString.map(self.character2charId)
var group: [String: [([Character], [UInt8])]] = [
String(stringToEndIndex.keys.first!.first!): [(maxString, maxIDs)],
"user": [(maxString, maxIDs)],
]
if learningManager.enabled {
group["memory"] = group["user"]
}
// MARK: indices
var dicdata: [DicdataElement] = []
let depth = minString.count - 1 ..< maxString.count
for (identifier, indices) in self.movingTowardPrefixSearch(group: group, depth: depth) {
dicdata.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: identifier, indices: indices))
}
if learningManager.enabled {
// temporalpenalty
dicdata.append(
contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata.flatMap { $0.value }
)
}
for (key, value) in stringToEndIndex {
let convertTarget = String(key)
dicdata.append(contentsOf: self.getWiseDicdata(convertTarget: convertTarget, inputData: inputData, inputRange: fromIndex ..< value + 1))
dicdata.append(contentsOf: self.getMatchDynamicUserDict(convertTarget))
}
if fromIndex == .zero {
return dicdata.compactMap {
guard let endIndex = stringToEndIndex[Array($0.ruby)] else {
return nil
}
let node = LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
node.prevs.append(RegisteredNode.BOSNode())
return node
} + [characterNode]
} else {
return dicdata.compactMap {
guard let endIndex = stringToEndIndex[Array($0.ruby)] else {
return nil
}
return LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
} + [characterNode]
}
}
/// kana2latticelouds
/// - Parameters:
/// - inputData:

View File

@ -1,7 +1,8 @@
import SwiftUtils
struct TypoCorrectionGenerator {
init(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) {
init(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>, needTypoCorrection: Bool) {
self.maxPenalty = needTypoCorrection ? 3.5 * 3 : 0
self.inputs = inputs
self.left = left
self.rightIndexRange = rightIndexRange
@ -14,7 +15,7 @@ struct TypoCorrectionGenerator {
if count <= j {
return []
}
return TypoCorrection.getTypo(inputs[left + i ... left + j])
return TypoCorrection.getTypo(inputs[left + i ... left + j], frozen: !needTypoCorrection)
}
}
//
@ -33,7 +34,7 @@ struct TypoCorrectionGenerator {
}
}
let maxPenalty: PValue = 3.5 * 3
let maxPenalty: PValue
let inputs: [ComposingText.InputElement]
let left: Int
let rightIndexRange: Range<Int>
@ -145,71 +146,6 @@ enum TypoCorrection {
return !CharacterUtils.isRomanLetter(first) && !DicdataStore.existLOUDS(for: first)
}
/// closedRange
/// `left=4, rightIndexRange=6..<10``4...6, 4...7, 4...8, 4...9`
/// `left <= rightIndexRange.startIndex`
static func getRangesWithoutTypos(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) -> [[Character]: Int] {
let count = rightIndexRange.endIndex - left
debug(#function, left, rightIndexRange, count)
let nodes = (0..<count).map {(i: Int) in
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
let j = i + k
if count <= j {
return []
}
// frozen: truetypo
return Self.getTypo(inputs[left + i ... left + j], frozen: true)
}
}
// Performance Tuning NoteDictionaryArrayDictionary
var stringToInfo: [([Character], Int)] = []
//
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int)] = nodes[0].compactMap { typoCandidate in
guard let firstElement = typoCandidate.inputElements.first else {
return nil
}
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
var convertTargetElements = [ComposingText.ConvertTargetElement]()
for element in typoCandidate.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
return (convertTargetElements, typoCandidate.inputElements.last!, typoCandidate.inputElements.count)
}
return nil
}
while case .some((var convertTargetElements, let lastElement, let count)) = stack.popLast() {
if rightIndexRange.contains(count + left - 1) {
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
stringToInfo.append((convertTarget, (count + left - 1)))
}
}
//
if nodes.endIndex <= count {
continue
}
stack.append(contentsOf: nodes[count].compactMap {
if count + $0.inputElements.count > nodes.endIndex {
return nil
}
for element in $0.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
if Self.shouldBeRemovedForDicdataStore(components: convertTargetElements) {
return nil
}
return (
convertTargetElements: convertTargetElements,
lastElement: $0.inputElements.last!,
count: count + $0.inputElements.count
)
})
}
return Dictionary(stringToInfo, uniquingKeysWith: {$0 < $1 ? $1 : $0})
}
static func getRangeWithTypos(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndex right: Int) -> [[Character]: PValue] {
// i
// input = [d(), r(s), r(i), r(t), r(s), d(), d(), d()]

View File

@ -199,54 +199,6 @@ package struct LOUDS: Sendable {
return self.prefixNodeIndices(nodeIndex: nodeIndex, maxDepth: maxDepth, maxCount: maxCount)
}
///
///
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
/// - Note:
@inlinable func byfixNodeIndices(chars: [UInt8]) -> [Int] {
var indices = [1]
for char in chars {
if let nodeIndex = self.searchCharNodeIndex(from: indices.last!, char: char) {
indices.append(nodeIndex)
} else {
break
}
}
return indices
}
///
private static func lexLessThan(_ lhs: [UInt8], _ rhs: [UInt8]) -> Bool {
let minCount = Swift.min(lhs.count, rhs.count)
for i in 0..<minCount {
let l = lhs[i]
let r = rhs[i]
if l != r {
return l < r
}
}
return lhs.count < rhs.count
}
///
///
///
/// - Parameter chars: CharID
/// - Returns: loudstxt3
/// - Note:
@inlinable func byfixNodeIndices(targets: [[UInt8]], depth: Range<Int>) -> [Int] {
//
var targets = targets
targets.sort(by: Self.lexLessThan)
var helper = MovingTowardPrefixSearchHelper(louds: self)
for target in targets {
_ = helper.update(target: target)
}
return helper.indicesInDepth(depth: depth)
}
struct MovingTowardPrefixSearchHelper {
init(louds: LOUDS) {
self.louds = louds