mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 02:58:27 +00:00
feat: TypoCorrectionとMovingTowardPrefixSearchを同時実行する関数を追加
This commit is contained in:
@@ -261,6 +261,66 @@ public final class DicdataStore {
|
||||
return indices
|
||||
}
|
||||
|
||||
func movingTowardPrefixSearch(
|
||||
inputs: [ComposingText.InputElement],
|
||||
leftIndex: Int,
|
||||
rightIndexRange: Range<Int>,
|
||||
useMemory: Bool
|
||||
) -> (
|
||||
stringToInfo: [[Character]: (endIndex: Int, penalty: PValue)],
|
||||
indices: [(key: String, indices: [Int])],
|
||||
temporaryMemoryDicdata: [DicdataElement]
|
||||
) {
|
||||
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange)
|
||||
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
|
||||
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
||||
|
||||
var temporaryMemoryDicdata: [DicdataElement] = []
|
||||
// ジェネレータを舐める
|
||||
while let (characters, info) = generator.next() {
|
||||
guard let firstCharacter = characters.first else {
|
||||
continue
|
||||
}
|
||||
let charIDs = characters.map(self.character2charId(_:))
|
||||
let keys: [String] = if useMemory {
|
||||
[String(firstCharacter), "user", "memory"]
|
||||
} else {
|
||||
[String(firstCharacter), "user"]
|
||||
}
|
||||
var updated = false
|
||||
for key in keys {
|
||||
withMutableValue(&targetLOUDS[key]) { helper in
|
||||
if helper == nil, let louds = self.loadLOUDS(query: key) {
|
||||
helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds, depth: 0 ..< .max)
|
||||
}
|
||||
let hasUpdate = helper?.update(target: charIDs) ?? false
|
||||
updated = updated || hasUpdate
|
||||
}
|
||||
}
|
||||
// 短期記憶についてはこの位置で処理する
|
||||
for data in self.learningManager.temporaryThroughMatch(charIDs: consume charIDs, depth: 0 ..< .max) {
|
||||
if info.penalty.isZero {
|
||||
temporaryMemoryDicdata.append(data)
|
||||
}
|
||||
let ratio = Self.penaltyRatio[data.lcid]
|
||||
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
||||
let adjust = pUnit * info.penalty * ratio
|
||||
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
|
||||
continue
|
||||
}
|
||||
temporaryMemoryDicdata.append(data.adjustedData(adjust))
|
||||
}
|
||||
if updated {
|
||||
stringToInfo.append((characters, info))
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
|
||||
targetLOUDS.map { ($0.key, $0.value.indices)},
|
||||
temporaryMemoryDicdata
|
||||
)
|
||||
}
|
||||
/// prefixを起点として、それに続く語(prefix match)をLOUDS上で探索する関数。
|
||||
/// - Parameters:
|
||||
/// - query: 辞書ファイルの識別子(通常は先頭1文字や"user"など)。
|
||||
@@ -318,20 +378,8 @@ public final class DicdataStore {
|
||||
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
|
||||
}
|
||||
// MARK: 誤り訂正の対象を列挙する。非常に重い処理。
|
||||
var stringToInfo = TypoCorrection.getRangesWithTypos(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
|
||||
// MARK: 検索対象を列挙していく。
|
||||
let stringSet: [([Character], [UInt8])] = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
|
||||
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
|
||||
let depth = minCharIDsCount - 1 ..< maxCharIDsCount
|
||||
let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)})
|
||||
var indices = self.movingTowardPrefixSearch(group: group, depth: depth)
|
||||
if learningManager.enabled {
|
||||
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet, "memory": stringSet], depth: depth))
|
||||
} else {
|
||||
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet], depth: depth))
|
||||
}
|
||||
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled)
|
||||
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
||||
var dicdata: [DicdataElement] = []
|
||||
for (identifier, value) in indices {
|
||||
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
||||
let rubyArray = Array(data.ruby)
|
||||
@@ -349,23 +397,6 @@ public final class DicdataStore {
|
||||
}
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
// temporalな学習結果にpenaltyを加えて追加する
|
||||
for (_, charIds) in consume stringSet {
|
||||
for data in self.learningManager.temporaryThroughMatch(charIDs: consume charIds, depth: depth) {
|
||||
let rubyArray = Array(data.ruby)
|
||||
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
|
||||
if penalty.isZero {
|
||||
dicdata.append(data)
|
||||
}
|
||||
let ratio = Self.penaltyRatio[data.lcid]
|
||||
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
||||
let adjust = pUnit * penalty * ratio
|
||||
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
|
||||
continue
|
||||
}
|
||||
dicdata.append(data.adjustedData(adjust))
|
||||
}
|
||||
}
|
||||
|
||||
for i in toIndexLeft ..< toIndexRight {
|
||||
do {
|
||||
@@ -1087,4 +1118,4 @@ public final class DicdataStore {
|
||||
"w": ["ワ", "ウィ", "ウェ", "ヲ"],
|
||||
"wy": ["ヰ", "ヱ"]
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ struct TypoCorrectionGenerator {
|
||||
|
||||
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]
|
||||
|
||||
mutating func generate(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) -> ([Character], (endIndex: Int, penalty: PValue))? {
|
||||
mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? {
|
||||
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
|
||||
var result: ([Character], (endIndex: Int, penalty: PValue))? = nil
|
||||
if rightIndexRange.contains(count + left - 1) {
|
||||
@@ -52,14 +52,18 @@ struct TypoCorrectionGenerator {
|
||||
}
|
||||
// エスケープ
|
||||
if self.nodes.endIndex <= count {
|
||||
continue
|
||||
if let result {
|
||||
return result
|
||||
}
|
||||
}
|
||||
// 訂正数上限(3個)
|
||||
if penalty >= maxPenalty {
|
||||
var convertTargetElements = convertTargetElements
|
||||
let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
if count + correct.count > self.nodes.endIndex {
|
||||
continue
|
||||
if let result {
|
||||
return result
|
||||
}
|
||||
}
|
||||
for element in correct {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
|
||||
@@ -240,35 +240,52 @@ package struct LOUDS: Sendable {
|
||||
// 辞書順でソートする
|
||||
var targets = targets
|
||||
targets.sort(by: Self.lexLessThan)
|
||||
var helper = MovingTowardPrefixSearchHelper(louds: self, depth: depth)
|
||||
for target in targets {
|
||||
helper.update(target: target)
|
||||
}
|
||||
return helper.indices
|
||||
}
|
||||
|
||||
struct MovingTowardPrefixSearchHelper {
|
||||
init(louds: LOUDS, depth: Range<Int>) {
|
||||
self.louds = louds
|
||||
self.depth = depth
|
||||
}
|
||||
let louds: LOUDS
|
||||
let depth: Range<Int>
|
||||
// 最終出力となる
|
||||
var indices: [Int] = []
|
||||
// 現在の探索結果を保存しておく
|
||||
var stack: [(nodeIndex: Int, char: UInt8)] = []
|
||||
for chars in targets {
|
||||
|
||||
@inlinable mutating func update(target: [UInt8]) -> Bool {
|
||||
var updated = false
|
||||
// iがupperBoundを超えない範囲で検索を行う
|
||||
for (i, char) in chars.enumerated() where i < depth.upperBound {
|
||||
if i < stack.count, stack[i].char == char {
|
||||
for (i, char) in target.enumerated() where i < self.depth.upperBound {
|
||||
if i < self.stack.count, self.stack[i].char == char {
|
||||
// すでに探索済み
|
||||
continue
|
||||
} else if i < stack.count, stack[i].char != char {
|
||||
} else if i < self.stack.count, self.stack[i].char != char {
|
||||
// 異なる文字が見つかったら、その時点でそこから先のstackを破棄
|
||||
stack = Array(stack[..<i])
|
||||
self.stack = Array(self.stack[..<i])
|
||||
}
|
||||
// ここに到達する場合、stack[i]は存在しない。
|
||||
assert(i >= stack.count, "stack[\(i)] must not exist for logical reason.")
|
||||
assert(i >= self.stack.count, "stack[\(i)] must not exist for logical reason.")
|
||||
// このケースでは、探索を行う
|
||||
// 直前のstackを取り出し、そのnodeIndexから次のcharを探索する
|
||||
if let nodeIndex = self.searchCharNodeIndex(from: stack.last?.nodeIndex ?? 1, char: char) {
|
||||
if depth.contains(i) {
|
||||
indices.append(nodeIndex)
|
||||
if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
|
||||
if self.depth.contains(i) {
|
||||
self.indices.append(nodeIndex)
|
||||
updated = true
|
||||
}
|
||||
stack.append((nodeIndex, char))
|
||||
self.stack.append((nodeIndex, char))
|
||||
} else {
|
||||
// 見つからなかった場合、打ち切る
|
||||
break
|
||||
}
|
||||
}
|
||||
return updated
|
||||
}
|
||||
return indices
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user