Files
AzooKeyKanaKanjiConverter/Sources/KanaKanjiConverterModule/DictionaryManagement/TypoCorrection.swift
Miwa 59cde2a2ca Merge pull request #224 from azooKey/refactor/remove_complicated_boundary_checker
refactor: 特定のローマ字かな変換テーブルを前提にした複雑な境界チェックを廃止する
2025-07-21 17:33:47 +09:00

292 lines
12 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import SwiftUtils
struct TypoCorrectionGenerator: Sendable {
init(inputs: [ComposingText.InputElement], range: ProcessRange, needTypoCorrection: Bool) {
self.maxPenalty = needTypoCorrection ? 3.5 * 3 : 0
self.inputs = inputs
self.range = range
let count = self.range.rightIndexRange.endIndex - range.leftIndex
self.count = count
self.nodes = (0..<count).map {(i: Int) in
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
let j = i + k
if count <= j {
return []
}
return Self.getTypo(inputs[range.leftIndex + i ... range.leftIndex + j], frozen: !needTypoCorrection)
}
}
//
var leftConvertTargetElements: [ComposingText.ConvertTargetElement] = []
for element in inputs[0 ..< range.leftIndex] {
ComposingText.updateConvertTargetElements(currentElements: &leftConvertTargetElements, newElement: element)
}
let actualLeftConvertTarget = leftConvertTargetElements.reduce(into: "") { $0 += $1.string}
self.stack = nodes[0].compactMap { typoCandidate in
var convertTargetElements = [ComposingText.ConvertTargetElement]()
var fullConvertTargetElements = leftConvertTargetElements
for element in typoCandidate.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
ComposingText.updateConvertTargetElements(currentElements: &fullConvertTargetElements, newElement: element)
}
let fullConvertTarget = fullConvertTargetElements.reduce(into: "") { $0 += $1.string}
let convertTarget = convertTargetElements.reduce(into: "") { $0 += $1.string}
if fullConvertTarget == actualLeftConvertTarget + convertTarget {
return (convertTargetElements, typoCandidate.inputElements.count, typoCandidate.weight)
} else {
return nil
}
}
}
let maxPenalty: PValue
let inputs: [ComposingText.InputElement]
let range: ProcessRange
let nodes: [[TypoCandidate]]
let count: Int
struct ProcessRange: Sendable, Equatable {
var leftIndex: Int
var rightIndexRange: Range<Int>
}
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], count: Int, penalty: PValue)]
private static func check(
_ leftConvertTargetElements: [ComposingText.ConvertTargetElement],
isPrefixOf rightConvertTargetElements: [ComposingText.ConvertTargetElement]
) -> Bool {
if leftConvertTargetElements.count > rightConvertTargetElements.count {
//
return false
} else if leftConvertTargetElements.count == rightConvertTargetElements.count {
let lastIndex = leftConvertTargetElements.count - 1
if lastIndex == -1 {
// emptytrue
return true
}
// 1prefix
for (lhs, rhs) in zip(leftConvertTargetElements[0 ..< lastIndex], rightConvertTargetElements[0 ..< lastIndex]) {
if lhs != rhs {
return false
}
}
if leftConvertTargetElements[lastIndex].inputStyle != rightConvertTargetElements[lastIndex].inputStyle {
return false
}
return rightConvertTargetElements[lastIndex].string.hasPrefix(leftConvertTargetElements[lastIndex].string)
} else {
// leftConvertTargetElementsprefix
for (lhs, rhs) in zip(leftConvertTargetElements, rightConvertTargetElements[0 ..< leftConvertTargetElements.endIndex]) {
if lhs != rhs {
return false
}
}
return true
}
}
/// `target`
mutating func setUnreachablePath(target: some Collection<Character>) {
self.stack = self.stack.filter { (convertTargetElements, count, penalty) in
var stablePrefix: [Character] = []
loop: for item in convertTargetElements {
switch item.inputStyle {
case .direct:
stablePrefix.append(contentsOf: item.string)
case .roman2kana:
var stableIndex = item.string.endIndex
for suffix in Roman2Kana.unstableSuffixes {
if item.string.hasSuffix(suffix) {
stableIndex = min(stableIndex, item.string.endIndex - suffix.count)
}
}
if stableIndex == item.string.endIndex {
stablePrefix.append(contentsOf: item.string)
} else {
// break
stablePrefix.append(contentsOf: item.string[0 ..< stableIndex])
break loop
}
}
// prefixtargetprefixstackunreachable
if stablePrefix.hasPrefix(target) {
return false
}
}
return true
}
}
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
while let (convertTargetElements, count, penalty) = self.stack.popLast() {
var result: ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? = nil
if self.range.rightIndexRange.contains(count + self.range.leftIndex - 1) {
let originalConvertTarget = convertTargetElements.reduce(into: []) { $0 += $1.string.map { $0.toKatakana() } }
if self.range.leftIndex + count < self.inputs.endIndex {
var newConvertTargetElements = convertTargetElements
ComposingText.updateConvertTargetElements(currentElements: &newConvertTargetElements, newElement: inputs[self.range.leftIndex + count])
if Self.check(convertTargetElements, isPrefixOf: newConvertTargetElements) {
result = (originalConvertTarget, (.input(count + self.range.leftIndex - 1), penalty))
}
} else {
result = (originalConvertTarget, (.input(count + self.range.leftIndex - 1), penalty))
}
}
//
if self.nodes.endIndex <= count {
if let result {
return result
} else {
continue
}
}
// (3)
if penalty >= maxPenalty {
var convertTargetElements = convertTargetElements
let correct = [inputs[self.range.leftIndex + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
if count + correct.count > self.nodes.endIndex {
if let result {
return result
} else {
continue
}
}
for element in correct {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
stack.append((convertTargetElements, count + correct.count, penalty))
} else {
stack.append(contentsOf: self.nodes[count].compactMap {
if count + $0.inputElements.count > self.nodes.endIndex {
return nil
}
var convertTargetElements = convertTargetElements
for element in $0.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
return (
convertTargetElements: convertTargetElements,
count: count + $0.inputElements.count,
penalty: penalty + $0.weight
)
})
}
// yield
if let result {
return result
}
}
return nil
}
fileprivate static func getTypo(_ elements: some Collection<ComposingText.InputElement>, frozen: Bool = false) -> [TypoCandidate] {
let key = elements.reduce(into: "") {$0.append($1.character.toKatakana())}
if (elements.allSatisfy {$0.inputStyle == .direct}) {
let dictionary: [String: [TypoCandidate]] = frozen ? [:] : Self.directPossibleTypo
if key.count > 1 {
return dictionary[key, default: []]
} else if key.count == 1 {
var result = dictionary[key, default: []]
//
result.append(TypoCandidate(inputElements: key.map {ComposingText.InputElement(character: $0, inputStyle: .direct)}, weight: 0))
return result
}
}
if (elements.allSatisfy {$0.inputStyle == .roman2kana}) {
let dictionary: [String: [TypoCandidate]] = frozen ? [:] : Self.roman2KanaPossibleTypo
if key.count > 1 {
return dictionary[key, default: []]
} else if key.count == 1 {
var result = dictionary[key, default: []]
//
result.append(
TypoCandidate(inputElements: key.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)}, weight: 0)
)
return result
}
}
return []
}
fileprivate static let lengths = [0, 1]
private struct TypoUnit: Equatable {
var value: String
var weight: PValue
init(_ value: String, weight: PValue = 3.5) {
self.value = value
self.weight = weight
}
}
struct TypoCandidate: Sendable, Equatable {
var inputElements: [ComposingText.InputElement]
var weight: PValue
}
///
private static let directPossibleTypo: [String: [TypoCandidate]] = [
"": [TypoUnit("", weight: 7.0)],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("", weight: 6.0)],
"": [TypoUnit("")],
"": [TypoUnit("", weight: 6.0), TypoUnit("", weight: 4.5)],
"": [TypoUnit("", weight: 6.0)],
"": [TypoUnit("", weight: 4.5)],
"": [TypoUnit("", weight: 4.5), TypoUnit("", weight: 6.0)],
"": [TypoUnit(""), TypoUnit("", weight: 4.5)],
"": [TypoUnit(""), TypoUnit("", weight: 4.5)],
"": [TypoUnit(""), TypoUnit("", weight: 4.5)],
"": [TypoUnit(""), TypoUnit("", weight: 4.5)],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")],
"": [TypoUnit("")]
].mapValues {
$0.map {
TypoCandidate(
inputElements: $0.value.map {ComposingText.InputElement(character: $0, inputStyle: .direct)},
weight: $0.weight
)
}
}
private static let roman2KanaPossibleTypo: [String: [TypoCandidate]] = [
"bs": ["ba"],
"no": ["bo"],
"li": ["ki"],
"lo": ["ko"],
"lu": ["ku"],
"my": ["mu"],
"tp": ["to"],
"ts": ["ta"],
"wi": ["wo"],
"pu": ["ou"]
].mapValues {
$0.map {
TypoCandidate(
inputElements: $0.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)},
weight: 3.5
)
}
}
}