mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
feat: typo correctionの枝刈りを実装し、direct入力のケースでTestFullConversionの実行速度が1.6倍に向上
This commit is contained in:
@ -288,17 +288,25 @@ public final class DicdataStore {
|
||||
[String(firstCharacter), "user"]
|
||||
}
|
||||
var updated = false
|
||||
var availableMaxIndex = 0
|
||||
for key in keys {
|
||||
withMutableValue(&targetLOUDS[key]) { helper in
|
||||
if helper == nil, let louds = self.loadLOUDS(query: key) {
|
||||
helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds, depth: 0 ..< .max)
|
||||
}
|
||||
let hasUpdate = helper?.update(target: charIDs) ?? false
|
||||
updated = updated || hasUpdate
|
||||
guard helper != nil else {
|
||||
return
|
||||
}
|
||||
let result = helper!.update(target: charIDs)
|
||||
updated = updated || result.updated
|
||||
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
|
||||
}
|
||||
}
|
||||
// 短期記憶についてはこの位置で処理する
|
||||
for data in self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs, depth: 0 ..< .max) {
|
||||
let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs, depth: 0 ..< .max)
|
||||
updated = updated || !(result.dicdata.isEmpty)
|
||||
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
|
||||
for data in result.dicdata {
|
||||
if info.penalty.isZero {
|
||||
temporaryMemoryDicdata.append(data)
|
||||
}
|
||||
@ -310,6 +318,10 @@ public final class DicdataStore {
|
||||
}
|
||||
temporaryMemoryDicdata.append(data.adjustedData(adjust))
|
||||
}
|
||||
if availableMaxIndex < characters.endIndex - 1 {
|
||||
// 到達不可能だったパスを通知
|
||||
generator.setUnreachablePath(target: characters[...(availableMaxIndex + 1)])
|
||||
}
|
||||
if updated {
|
||||
stringToInfo.append((characters, info))
|
||||
}
|
||||
@ -478,7 +490,7 @@ public final class DicdataStore {
|
||||
}
|
||||
if learningManager.enabled {
|
||||
// temporalな学習結果にpenaltyを加えて追加する
|
||||
dicdata.append(contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth))
|
||||
dicdata.append(contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata)
|
||||
}
|
||||
for (key, value) in stringToEndIndex {
|
||||
let convertTarget = String(key)
|
||||
|
@ -584,20 +584,22 @@ struct TemporalLearningMemoryTrie {
|
||||
return nodes[index].dataIndices.map {self.dicdata[$0]}
|
||||
}
|
||||
|
||||
func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> [DicdataElement] {
|
||||
func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> (dicdata: [DicdataElement], availableMaxIndex: Int) {
|
||||
var index = 0
|
||||
var availableMaxIndex = 0
|
||||
var indices: [Int] = []
|
||||
for (offset, char) in chars.enumerated() {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
availableMaxIndex = index
|
||||
index = nextIndex
|
||||
if depth.contains(offset) {
|
||||
indices.append(contentsOf: nodes[index].dataIndices)
|
||||
}
|
||||
} else {
|
||||
return indices.map {self.dicdata[$0]}
|
||||
return (indices.map {self.dicdata[$0]}, availableMaxIndex)
|
||||
}
|
||||
}
|
||||
return indices.map {self.dicdata[$0]}
|
||||
return (indices.map {self.dicdata[$0]}, availableMaxIndex)
|
||||
}
|
||||
|
||||
func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
|
||||
@ -718,9 +720,9 @@ final class LearningManager {
|
||||
return self.temporaryMemory.perfectMatch(chars: charIDs)
|
||||
}
|
||||
|
||||
func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int>) -> [DicdataElement] {
|
||||
func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int>) -> (dicdata: [DicdataElement], availableMaxIndex: Int) {
|
||||
guard let options, options.learningType.needUsingMemory else {
|
||||
return []
|
||||
return ([], 0)
|
||||
}
|
||||
return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth)
|
||||
}
|
||||
|
@ -42,6 +42,27 @@ struct TypoCorrectionGenerator {
|
||||
|
||||
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]
|
||||
|
||||
/// `target`で始まる場合は到達不可能であることを知らせる
|
||||
mutating func setUnreachablePath(target: some Collection<Character>) {
|
||||
self.stack = self.stack.filter { (convertTargetElements, lastElement, count, penalty) in
|
||||
var stablePrefix: [Character] = []
|
||||
loop: for item in convertTargetElements {
|
||||
switch item.inputStyle {
|
||||
case .direct:
|
||||
stablePrefix.append(contentsOf: item.string)
|
||||
case .roman2kana:
|
||||
// TODO: impl
|
||||
break loop
|
||||
}
|
||||
// 安定なprefixが
|
||||
if stablePrefix.hasPrefix(target) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? {
|
||||
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
|
||||
var result: ([Character], (endIndex: Int, penalty: PValue))? = nil
|
||||
|
@ -242,7 +242,7 @@ package struct LOUDS: Sendable {
|
||||
targets.sort(by: Self.lexLessThan)
|
||||
var helper = MovingTowardPrefixSearchHelper(louds: self, depth: depth)
|
||||
for target in targets {
|
||||
helper.update(target: target)
|
||||
_ = helper.update(target: target)
|
||||
}
|
||||
return helper.indices
|
||||
}
|
||||
@ -258,13 +258,18 @@ package struct LOUDS: Sendable {
|
||||
var indices: [Int] = []
|
||||
// 現在の探索結果を保存しておく
|
||||
var stack: [(nodeIndex: Int, char: UInt8)] = []
|
||||
|
||||
@inlinable mutating func update(target: [UInt8]) -> Bool {
|
||||
|
||||
/// `target`を用いて更新する
|
||||
/// - Parameter target: 検索対象の`CharID`の列
|
||||
/// - Returns: `updated`はこれによって`indices`の更新があったかどうか。`availableMaxIndex`はアクセスに成功した最大インデックス
|
||||
@inlinable mutating func update(target: [UInt8]) -> (updated: Bool, availableMaxIndex: Int) {
|
||||
var updated = false
|
||||
var availableMaxIndex = 0
|
||||
// iがupperBoundを超えない範囲で検索を行う
|
||||
for (i, char) in target.enumerated() where i < self.depth.upperBound {
|
||||
if i < self.stack.count, self.stack[i].char == char {
|
||||
// すでに探索済み
|
||||
availableMaxIndex = i
|
||||
continue
|
||||
} else if i < self.stack.count, self.stack[i].char != char {
|
||||
// 異なる文字が見つかったら、その時点でそこから先のstackを破棄
|
||||
@ -278,6 +283,7 @@ package struct LOUDS: Sendable {
|
||||
if self.depth.contains(i) {
|
||||
self.indices.append(nodeIndex)
|
||||
updated = true
|
||||
availableMaxIndex = i
|
||||
}
|
||||
self.stack.append((nodeIndex, char))
|
||||
} else {
|
||||
@ -285,7 +291,7 @@ package struct LOUDS: Sendable {
|
||||
break
|
||||
}
|
||||
}
|
||||
return updated
|
||||
return (updated, availableMaxIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ final class TemporalLearningMemoryTrieTests: XCTestCase {
|
||||
XCTAssertEqual(result1.first?.word, element1.word)
|
||||
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
|
||||
|
||||
let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count)
|
||||
let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count).dicdata
|
||||
XCTAssertEqual(result2.map { $0.word }, [element2.word])
|
||||
|
||||
let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))
|
||||
|
Reference in New Issue
Block a user