mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
Merge pull request #211 from azooKey/feat/unified_lookup
feat: 誤り訂正と辞書引きの統合処理の実装
This commit is contained in:
@ -261,6 +261,83 @@ public final class DicdataStore {
|
||||
return indices
|
||||
}
|
||||
|
||||
func movingTowardPrefixSearch(
|
||||
inputs: [ComposingText.InputElement],
|
||||
leftIndex: Int,
|
||||
rightIndexRange: Range<Int>,
|
||||
useMemory: Bool
|
||||
) -> (
|
||||
stringToInfo: [[Character]: (endIndex: Int, penalty: PValue)],
|
||||
indices: [(key: String, indices: [Int])],
|
||||
temporaryMemoryDicdata: [DicdataElement]
|
||||
) {
|
||||
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange)
|
||||
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
|
||||
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
||||
|
||||
var temporaryMemoryDicdata: [Int: [DicdataElement]] = [:]
|
||||
// ジェネレータを舐める
|
||||
while let (characters, info) = generator.next() {
|
||||
guard let firstCharacter = characters.first else {
|
||||
continue
|
||||
}
|
||||
let charIDs = characters.map(self.character2charId(_:))
|
||||
let keys: [String] = if useMemory {
|
||||
[String(firstCharacter), "user", "memory"]
|
||||
} else {
|
||||
[String(firstCharacter), "user"]
|
||||
}
|
||||
var updated = false
|
||||
var availableMaxIndex = 0
|
||||
for key in keys {
|
||||
withMutableValue(&targetLOUDS[key]) { helper in
|
||||
if helper == nil, let louds = self.loadLOUDS(query: key) {
|
||||
helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds)
|
||||
}
|
||||
guard helper != nil else {
|
||||
return
|
||||
}
|
||||
let result = helper!.update(target: charIDs)
|
||||
updated = updated || result.updated
|
||||
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
|
||||
}
|
||||
}
|
||||
// 短期記憶についてはこの位置で処理する
|
||||
let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs)
|
||||
updated = updated || !(result.dicdata.isEmpty)
|
||||
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
|
||||
for (depth, dicdata) in result.dicdata {
|
||||
for data in dicdata {
|
||||
if info.penalty.isZero {
|
||||
temporaryMemoryDicdata[depth, default: []].append(data)
|
||||
}
|
||||
let ratio = Self.penaltyRatio[data.lcid]
|
||||
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
||||
let adjust = pUnit * info.penalty * ratio
|
||||
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
|
||||
continue
|
||||
}
|
||||
temporaryMemoryDicdata[depth, default: []].append(data.adjustedData(adjust))
|
||||
}
|
||||
}
|
||||
if availableMaxIndex < characters.endIndex - 1 {
|
||||
// 到達不可能だったパスを通知
|
||||
generator.setUnreachablePath(target: characters[...(availableMaxIndex + 1)])
|
||||
}
|
||||
if updated {
|
||||
stringToInfo.append((characters, info))
|
||||
}
|
||||
}
|
||||
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
|
||||
print(#function, minCount, stringToInfo.map{$0.0})
|
||||
return (
|
||||
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
|
||||
targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
|
||||
temporaryMemoryDicdata.flatMap {
|
||||
minCount < $0.key + 1 ? $0.value : []
|
||||
}
|
||||
)
|
||||
}
|
||||
/// prefixを起点として、それに続く語(prefix match)をLOUDS上で探索する関数。
|
||||
/// - Parameters:
|
||||
/// - query: 辞書ファイルの識別子(通常は先頭1文字や"user"など)。
|
||||
@ -318,20 +395,8 @@ public final class DicdataStore {
|
||||
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
|
||||
}
|
||||
// MARK: 誤り訂正の対象を列挙する。非常に重い処理。
|
||||
var stringToInfo = inputData.getRangesWithTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
|
||||
// MARK: 検索対象を列挙していく。
|
||||
let stringSet: [([Character], [UInt8])] = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
|
||||
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
|
||||
let depth = minCharIDsCount - 1 ..< maxCharIDsCount
|
||||
let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)})
|
||||
var indices = self.movingTowardPrefixSearch(group: group, depth: depth)
|
||||
if learningManager.enabled {
|
||||
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet, "memory": stringSet], depth: depth))
|
||||
} else {
|
||||
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet], depth: depth))
|
||||
}
|
||||
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled)
|
||||
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
||||
var dicdata: [DicdataElement] = []
|
||||
for (identifier, value) in indices {
|
||||
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
||||
let rubyArray = Array(data.ruby)
|
||||
@ -349,23 +414,6 @@ public final class DicdataStore {
|
||||
}
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
// temporalな学習結果にpenaltyを加えて追加する
|
||||
for (_, charIds) in consume stringSet {
|
||||
for data in self.learningManager.temporaryThroughMatch(charIDs: consume charIds, depth: depth) {
|
||||
let rubyArray = Array(data.ruby)
|
||||
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
|
||||
if penalty.isZero {
|
||||
dicdata.append(data)
|
||||
}
|
||||
let ratio = Self.penaltyRatio[data.lcid]
|
||||
let pUnit: PValue = Self.getPenalty(data: data) / 2 // 負の値
|
||||
let adjust = pUnit * penalty * ratio
|
||||
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
|
||||
continue
|
||||
}
|
||||
dicdata.append(data.adjustedData(adjust))
|
||||
}
|
||||
}
|
||||
|
||||
for i in toIndexLeft ..< toIndexRight {
|
||||
do {
|
||||
@ -425,7 +473,7 @@ public final class DicdataStore {
|
||||
}
|
||||
|
||||
// MARK: 誤り訂正なし
|
||||
let stringToEndIndex = inputData.getRangesWithoutTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
|
||||
let stringToEndIndex = TypoCorrection.getRangesWithoutTypos(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
|
||||
// MARK: 検索対象を列挙していく。
|
||||
guard let (minString, maxString) = stringToEndIndex.keys.minAndMax(by: {$0.count < $1.count}) else {
|
||||
debug(#function, "minString/maxString is nil", stringToEndIndex)
|
||||
@ -447,7 +495,9 @@ public final class DicdataStore {
|
||||
}
|
||||
if learningManager.enabled {
|
||||
// temporalな学習結果にpenaltyを加えて追加する
|
||||
dicdata.append(contentsOf: self.learningManager.temporaryThroughMatch(charIDs: consume maxIDs, depth: depth))
|
||||
dicdata.append(
|
||||
contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata.flatMap { $0.value }
|
||||
)
|
||||
}
|
||||
for (key, value) in stringToEndIndex {
|
||||
let convertTarget = String(key)
|
||||
@ -485,7 +535,7 @@ public final class DicdataStore {
|
||||
let segment = inputData.input[fromIndex...toIndex].reduce(into: "") {$0.append($1.character)}.toKatakana()
|
||||
|
||||
// TODO: 最適化の余地あり
|
||||
let string2penalty = inputData.getRangeWithTypos(fromIndex, toIndex).filter {
|
||||
let string2penalty = TypoCorrection.getRangeWithTypos(inputs: inputData.input, leftIndex: fromIndex, rightIndex: toIndex).filter {
|
||||
needTypoCorrection || $0.value == 0.0
|
||||
}
|
||||
|
||||
|
@ -584,20 +584,22 @@ struct TemporalLearningMemoryTrie {
|
||||
return nodes[index].dataIndices.map {self.dicdata[$0]}
|
||||
}
|
||||
|
||||
func throughMatch(chars: [UInt8], depth: Range<Int>) -> [DicdataElement] {
|
||||
func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
|
||||
var index = 0
|
||||
var indices: [Int] = []
|
||||
var availableMaxIndex = 0
|
||||
var indices: [Int: [Int]] = [:]
|
||||
for (offset, char) in chars.enumerated() {
|
||||
if let nextIndex = nodes[index].children[char] {
|
||||
availableMaxIndex = index
|
||||
index = nextIndex
|
||||
if depth.contains(offset) {
|
||||
indices.append(contentsOf: nodes[index].dataIndices)
|
||||
indices[offset] = nodes[index].dataIndices
|
||||
}
|
||||
} else {
|
||||
return indices.map {self.dicdata[$0]}
|
||||
return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
|
||||
}
|
||||
}
|
||||
return indices.map {self.dicdata[$0]}
|
||||
return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
|
||||
}
|
||||
|
||||
func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
|
||||
@ -718,11 +720,11 @@ final class LearningManager {
|
||||
return self.temporaryMemory.perfectMatch(chars: charIDs)
|
||||
}
|
||||
|
||||
func temporaryThroughMatch(charIDs: [UInt8], depth: Range<Int>) -> [DicdataElement] {
|
||||
func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int> = 0 ..< .max) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
|
||||
guard let options, options.learningType.needUsingMemory else {
|
||||
return []
|
||||
return ([:], 0)
|
||||
}
|
||||
return self.temporaryMemory.throughMatch(chars: charIDs, depth: depth)
|
||||
return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth)
|
||||
}
|
||||
|
||||
func temporaryPrefixMatch(charIDs: [UInt8]) -> [DicdataElement] {
|
||||
|
@ -1,50 +1,29 @@
|
||||
//
|
||||
// TypoCorrection.swift
|
||||
// Keyboard
|
||||
//
|
||||
// Created by ensan on 2022/12/18.
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
import SwiftUtils
|
||||
|
||||
// MARK: 誤り訂正用のAPI
|
||||
extension ComposingText {
|
||||
private func shouldBeRemovedForDicdataStore(components: [ConvertTargetElement]) -> Bool {
|
||||
// 判定に使うのは最初の1エレメントの最初の文字で十分
|
||||
guard let first = components.first?.string.first?.toKatakana() else {
|
||||
return false
|
||||
}
|
||||
return !CharacterUtils.isRomanLetter(first) && !DicdataStore.existLOUDS(for: first)
|
||||
}
|
||||
struct TypoCorrectionGenerator {
|
||||
init(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) {
|
||||
self.inputs = inputs
|
||||
self.left = left
|
||||
self.rightIndexRange = rightIndexRange
|
||||
|
||||
/// closedRangeでもらう
|
||||
/// getRangeWithTyposの複数版にあたる。`result`の計算が一回で済む分、高速になる。
|
||||
/// 例えば`left=4, rightIndexRange=6..<10`の場合、`4...6, 4...7, 4...8, 4...9`の範囲で計算する
|
||||
/// `left <= rightIndexRange.startIndex`が常に成り立つ
|
||||
func getRangesWithTypos(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: (endIndex: Int, penalty: PValue)] {
|
||||
let count = rightIndexRange.endIndex - left
|
||||
debug(#function, left, rightIndexRange, count)
|
||||
let nodes = (0..<count).map {(i: Int) in
|
||||
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
|
||||
self.count = count
|
||||
self.nodes = (0..<count).map {(i: Int) in
|
||||
TypoCorrection.lengths.flatMap {(k: Int) -> [TypoCorrection.TypoCandidate] in
|
||||
let j = i + k
|
||||
if count <= j {
|
||||
return []
|
||||
}
|
||||
return Self.getTypo(self.input[left + i ... left + j])
|
||||
return TypoCorrection.getTypo(inputs[left + i ... left + j])
|
||||
}
|
||||
}
|
||||
|
||||
let maxPenalty: PValue = 3.5 * 3
|
||||
// Performance Tuning Note:直接Dictionaryを作るのではなく、一度Arrayを作ってから最後にDictionaryに変換する方が、高速である
|
||||
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
||||
|
||||
// 深さ優先で列挙する
|
||||
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
|
||||
self.stack = nodes[0].compactMap { typoCandidate in
|
||||
guard let firstElement = typoCandidate.inputElements.first else {
|
||||
return nil
|
||||
}
|
||||
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
|
||||
var convertTargetElements = [ConvertTargetElement]()
|
||||
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
|
||||
var convertTargetElements = [ComposingText.ConvertTargetElement]()
|
||||
for element in typoCandidate.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
@ -52,37 +31,91 @@ extension ComposingText {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
while let (convertTargetElements, lastElement, count, penalty) = stack.popLast() {
|
||||
}
|
||||
|
||||
let maxPenalty: PValue = 3.5 * 3
|
||||
let inputs: [ComposingText.InputElement]
|
||||
let left: Int
|
||||
let rightIndexRange: Range<Int>
|
||||
let nodes: [[TypoCorrection.TypoCandidate]]
|
||||
let count: Int
|
||||
|
||||
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]
|
||||
|
||||
/// `target`で始まる場合は到達不可能であることを知らせる
|
||||
mutating func setUnreachablePath(target: some Collection<Character>) {
|
||||
self.stack = self.stack.filter { (convertTargetElements, lastElement, count, penalty) in
|
||||
var stablePrefix: [Character] = []
|
||||
loop: for item in convertTargetElements {
|
||||
switch item.inputStyle {
|
||||
case .direct:
|
||||
stablePrefix.append(contentsOf: item.string)
|
||||
case .roman2kana:
|
||||
// TODO: impl
|
||||
var stableIndex = item.string.endIndex
|
||||
for suffix in Roman2Kana.unstableSuffixes {
|
||||
if item.string.hasSuffix(suffix) {
|
||||
stableIndex = min(stableIndex, item.string.endIndex - suffix.count)
|
||||
}
|
||||
}
|
||||
if stableIndex == item.string.endIndex {
|
||||
stablePrefix.append(contentsOf: item.string)
|
||||
} else {
|
||||
// 全体が安定でない場合は、そこでbreakする
|
||||
stablePrefix.append(contentsOf: item.string[0 ..< stableIndex])
|
||||
break loop
|
||||
}
|
||||
}
|
||||
// 安定なprefixがtargetをprefixに持つ場合、このstack内のアイテムについてもunreachableであることが分かるので、除去する
|
||||
if stablePrefix.hasPrefix(target) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? {
|
||||
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
|
||||
var result: ([Character], (endIndex: Int, penalty: PValue))? = nil
|
||||
if rightIndexRange.contains(count + left - 1) {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
stringToInfo.append((convertTarget, (count + left - 1, penalty)))
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
result = (convertTarget, (count + left - 1, penalty))
|
||||
}
|
||||
}
|
||||
// エスケープ
|
||||
if nodes.endIndex <= count {
|
||||
continue
|
||||
if self.nodes.endIndex <= count {
|
||||
if let result {
|
||||
return result
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
// 訂正数上限(3個)
|
||||
if penalty >= maxPenalty {
|
||||
var convertTargetElements = convertTargetElements
|
||||
let correct = [self.input[left + count]].map {InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
if count + correct.count > nodes.endIndex {
|
||||
continue
|
||||
let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
if count + correct.count > self.nodes.endIndex {
|
||||
if let result {
|
||||
return result
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
for element in correct {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
stack.append((convertTargetElements, correct.last!, count + correct.count, penalty))
|
||||
} else {
|
||||
stack.append(contentsOf: nodes[count].compactMap {
|
||||
if count + $0.inputElements.count > nodes.endIndex {
|
||||
stack.append(contentsOf: self.nodes[count].compactMap {
|
||||
if count + $0.inputElements.count > self.nodes.endIndex {
|
||||
return nil
|
||||
}
|
||||
var convertTargetElements = convertTargetElements
|
||||
for element in $0.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
if TypoCorrection.shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
return nil
|
||||
}
|
||||
return (
|
||||
@ -93,14 +126,29 @@ extension ComposingText {
|
||||
)
|
||||
})
|
||||
}
|
||||
// このループで出力すべきものがある場合は出力する(yield)
|
||||
if let result {
|
||||
return result
|
||||
}
|
||||
}
|
||||
return Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0})
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: 誤り訂正用のAPI
|
||||
enum TypoCorrection {
|
||||
fileprivate static func shouldBeRemovedForDicdataStore(components: [ComposingText.ConvertTargetElement]) -> Bool {
|
||||
// 判定に使うのは最初の1エレメントの最初の文字で十分
|
||||
guard let first = components.first?.string.first?.toKatakana() else {
|
||||
return false
|
||||
}
|
||||
return !CharacterUtils.isRomanLetter(first) && !DicdataStore.existLOUDS(for: first)
|
||||
}
|
||||
|
||||
/// closedRangeでもらう
|
||||
/// 例えば`left=4, rightIndexRange=6..<10`の場合、`4...6, 4...7, 4...8, 4...9`の範囲で計算する
|
||||
/// `left <= rightIndexRange.startIndex`が常に成り立つ
|
||||
func getRangesWithoutTypos(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: Int] {
|
||||
static func getRangesWithoutTypos(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) -> [[Character]: Int] {
|
||||
let count = rightIndexRange.endIndex - left
|
||||
debug(#function, left, rightIndexRange, count)
|
||||
let nodes = (0..<count).map {(i: Int) in
|
||||
@ -110,7 +158,7 @@ extension ComposingText {
|
||||
return []
|
||||
}
|
||||
// frozen: trueとしているため、typo候補は含まれない
|
||||
return Self.getTypo(self.input[left + i ... left + j], frozen: true)
|
||||
return Self.getTypo(inputs[left + i ... left + j], frozen: true)
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,12 +166,12 @@ extension ComposingText {
|
||||
var stringToInfo: [([Character], Int)] = []
|
||||
|
||||
// 深さ優先で列挙する
|
||||
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int)] = nodes[0].compactMap { typoCandidate in
|
||||
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int)] = nodes[0].compactMap { typoCandidate in
|
||||
guard let firstElement = typoCandidate.inputElements.first else {
|
||||
return nil
|
||||
}
|
||||
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
|
||||
var convertTargetElements = [ConvertTargetElement]()
|
||||
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
|
||||
var convertTargetElements = [ComposingText.ConvertTargetElement]()
|
||||
for element in typoCandidate.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
@ -133,7 +181,7 @@ extension ComposingText {
|
||||
}
|
||||
while case .some((var convertTargetElements, let lastElement, let count)) = stack.popLast() {
|
||||
if rightIndexRange.contains(count + left - 1) {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
stringToInfo.append((convertTarget, (count + left - 1)))
|
||||
}
|
||||
}
|
||||
@ -148,7 +196,7 @@ extension ComposingText {
|
||||
for element in $0.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
if Self.shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
return nil
|
||||
}
|
||||
return (
|
||||
@ -162,7 +210,7 @@ extension ComposingText {
|
||||
}
|
||||
|
||||
|
||||
func getRangeWithTypos(_ left: Int, _ right: Int) -> [[Character]: PValue] {
|
||||
static func getRangeWithTypos(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndex right: Int) -> [[Character]: PValue] {
|
||||
// 各iから始まる候補を列挙する
|
||||
// 例えばinput = [d(あ), r(s), r(i), r(t), r(s), d(は), d(は), d(れ)]の場合
|
||||
// nodes = [[d(あ)], [r(s)], [r(i)], [r(t), [r(t), r(a)]], [r(s)], [d(は), d(ば), d(ぱ)], [d(れ)]]
|
||||
@ -174,19 +222,19 @@ extension ComposingText {
|
||||
if count <= j {
|
||||
return []
|
||||
}
|
||||
return Self.getTypo(self.input[left + i ... left + j])
|
||||
return Self.getTypo(inputs[left + i ... left + j])
|
||||
}
|
||||
}
|
||||
|
||||
let maxPenalty: PValue = 3.5 * 3
|
||||
|
||||
// 深さ優先で列挙する
|
||||
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
|
||||
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
|
||||
guard let firstElement = typoCandidate.inputElements.first else {
|
||||
return nil
|
||||
}
|
||||
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
|
||||
var convertTargetElements = [ConvertTargetElement]()
|
||||
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
|
||||
var convertTargetElements = [ComposingText.ConvertTargetElement]()
|
||||
for element in typoCandidate.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
@ -199,7 +247,7 @@ extension ComposingText {
|
||||
|
||||
while let (convertTargetElements, lastElement, count, penalty) = stack.popLast() {
|
||||
if count + left - 1 == right {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
stringToPenalty.append((convertTarget, penalty))
|
||||
}
|
||||
continue
|
||||
@ -211,7 +259,7 @@ extension ComposingText {
|
||||
// 訂正数上限(3個)
|
||||
if penalty >= maxPenalty {
|
||||
var convertTargetElements = convertTargetElements
|
||||
let correct = [self.input[left + count]].map {InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
if count + correct.count > nodes.endIndex {
|
||||
continue
|
||||
}
|
||||
@ -228,7 +276,7 @@ extension ComposingText {
|
||||
for element in $0.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
}
|
||||
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
if Self.shouldBeRemovedForDicdataStore(components: convertTargetElements) {
|
||||
return nil
|
||||
}
|
||||
return (
|
||||
@ -243,7 +291,7 @@ extension ComposingText {
|
||||
return Dictionary(stringToPenalty, uniquingKeysWith: max)
|
||||
}
|
||||
|
||||
private static func getTypo(_ elements: some Collection<InputElement>, frozen: Bool = false) -> [TypoCandidate] {
|
||||
fileprivate static func getTypo(_ elements: some Collection<ComposingText.InputElement>, frozen: Bool = false) -> [TypoCandidate] {
|
||||
let key = elements.reduce(into: "") {$0.append($1.character)}.toKatakana()
|
||||
|
||||
if (elements.allSatisfy {$0.inputStyle == .direct}) {
|
||||
@ -251,19 +299,19 @@ extension ComposingText {
|
||||
if key.count > 1 {
|
||||
return dictionary[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.value.map {InputElement(character: $0, inputStyle: .direct)},
|
||||
inputElements: $0.value.map {ComposingText.InputElement(character: $0, inputStyle: .direct)},
|
||||
weight: $0.weight
|
||||
)
|
||||
}
|
||||
} else if key.count == 1 {
|
||||
var result = dictionary[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.value.map {InputElement(character: $0, inputStyle: .direct)},
|
||||
inputElements: $0.value.map {ComposingText.InputElement(character: $0, inputStyle: .direct)},
|
||||
weight: $0.weight
|
||||
)
|
||||
}
|
||||
// そのまま
|
||||
result.append(TypoCandidate(inputElements: key.map {InputElement(character: $0, inputStyle: .direct)}, weight: 0))
|
||||
result.append(TypoCandidate(inputElements: key.map {ComposingText.InputElement(character: $0, inputStyle: .direct)}, weight: 0))
|
||||
return result
|
||||
}
|
||||
}
|
||||
@ -272,20 +320,20 @@ extension ComposingText {
|
||||
if key.count > 1 {
|
||||
return dictionary[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.map {InputElement(character: $0, inputStyle: .roman2kana)},
|
||||
inputElements: $0.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)},
|
||||
weight: 3.5
|
||||
)
|
||||
}
|
||||
} else if key.count == 1 {
|
||||
var result = dictionary[key, default: []].map {
|
||||
TypoCandidate(
|
||||
inputElements: $0.map {InputElement(character: $0, inputStyle: .roman2kana)},
|
||||
inputElements: $0.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)},
|
||||
weight: 3.5
|
||||
)
|
||||
}
|
||||
// そのまま
|
||||
result.append(
|
||||
TypoCandidate(inputElements: key.map {InputElement(character: $0, inputStyle: .roman2kana)}, weight: 0)
|
||||
TypoCandidate(inputElements: key.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)}, weight: 0)
|
||||
)
|
||||
return result
|
||||
}
|
||||
@ -293,7 +341,7 @@ extension ComposingText {
|
||||
return []
|
||||
}
|
||||
|
||||
private static let lengths = [0, 1]
|
||||
fileprivate static let lengths = [0, 1]
|
||||
|
||||
private struct TypoUnit: Equatable {
|
||||
var value: String
|
||||
@ -306,7 +354,7 @@ extension ComposingText {
|
||||
}
|
||||
|
||||
struct TypoCandidate: Equatable {
|
||||
var inputElements: [InputElement]
|
||||
var inputElements: [ComposingText.InputElement]
|
||||
var weight: PValue
|
||||
}
|
||||
|
||||
|
@ -238,38 +238,63 @@ package struct LOUDS: Sendable {
|
||||
/// - Note: より適切な名前に変更したい
|
||||
@inlinable func byfixNodeIndices(targets: [[UInt8]], depth: Range<Int>) -> [Int] {
|
||||
// 辞書順でソートする
|
||||
// let targets = targets.sorted(by: Self.lexLessThan)
|
||||
var targets = targets
|
||||
targets.sort(by: Self.lexLessThan)
|
||||
var helper = MovingTowardPrefixSearchHelper(louds: self)
|
||||
for target in targets {
|
||||
_ = helper.update(target: target)
|
||||
}
|
||||
return helper.indicesInDepth(depth: depth)
|
||||
}
|
||||
|
||||
struct MovingTowardPrefixSearchHelper {
|
||||
init(louds: LOUDS) {
|
||||
self.louds = louds
|
||||
}
|
||||
let louds: LOUDS
|
||||
// 最終出力となる
|
||||
var indices: [Int] = []
|
||||
var indices: [(depth: Int, index: Int)] = []
|
||||
// 現在の探索結果を保存しておく
|
||||
var stack: [(nodeIndex: Int, char: UInt8)] = []
|
||||
for chars in targets {
|
||||
|
||||
func indicesInDepth(depth: Range<Int>) -> [Int] {
|
||||
return self.indices
|
||||
.lazy
|
||||
.filter { depth.contains($0.depth) }
|
||||
.map { $0.index }
|
||||
}
|
||||
|
||||
/// `target`を用いて更新する
|
||||
/// - Parameter target: 検索対象の`CharID`の列
|
||||
/// - Returns: `updated`はこれによって`indices`の更新があったかどうか。`availableMaxIndex`はアクセスに成功した最大インデックス
|
||||
@inlinable mutating func update(target: [UInt8]) -> (updated: Bool, availableMaxIndex: Int) {
|
||||
var updated = false
|
||||
var availableMaxIndex = 0
|
||||
// iがupperBoundを超えない範囲で検索を行う
|
||||
for (i, char) in chars.enumerated() where i < depth.upperBound {
|
||||
if i < stack.count, stack[i].char == char {
|
||||
for (i, char) in target.enumerated() {
|
||||
if i < self.stack.count, self.stack[i].char == char {
|
||||
// すでに探索済み
|
||||
availableMaxIndex = i
|
||||
continue
|
||||
} else if i < stack.count, stack[i].char != char {
|
||||
} else if i < self.stack.count, self.stack[i].char != char {
|
||||
// 異なる文字が見つかったら、その時点でそこから先のstackを破棄
|
||||
stack = Array(stack[..<i])
|
||||
self.stack = Array(self.stack[..<i])
|
||||
}
|
||||
// ここに到達する場合、stack[i]は存在しない。
|
||||
assert(i >= stack.count, "stack[\(i)] must not exist for logical reason.")
|
||||
assert(i >= self.stack.count, "stack[\(i)] must not exist for logical reason.")
|
||||
// このケースでは、探索を行う
|
||||
// 直前のstackを取り出し、そのnodeIndexから次のcharを探索する
|
||||
if let nodeIndex = self.searchCharNodeIndex(from: stack.last?.nodeIndex ?? 1, char: char) {
|
||||
if depth.contains(i) {
|
||||
indices.append(nodeIndex)
|
||||
}
|
||||
stack.append((nodeIndex, char))
|
||||
if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
|
||||
self.indices.append((i, nodeIndex))
|
||||
updated = true
|
||||
availableMaxIndex = i
|
||||
self.stack.append((nodeIndex, char))
|
||||
} else {
|
||||
// 見つからなかった場合、打ち切る
|
||||
break
|
||||
}
|
||||
}
|
||||
return (updated, availableMaxIndex)
|
||||
}
|
||||
return indices
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,11 @@ import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
enum Roman2Kana {
|
||||
static let unstableSuffixes: Set<[Character]> = hiraganaChanges.keys.flatMapSet { characters in
|
||||
characters.indices.map { i in
|
||||
Array(characters[...i])
|
||||
}
|
||||
}
|
||||
static let katakanaChanges: [String: String] = Dictionary(uniqueKeysWithValues: hiraganaChanges.map { (String($0.key), String($0.value).toKatakana()) })
|
||||
static let hiraganaChanges: [[Character]: [Character]] = Dictionary(uniqueKeysWithValues: [
|
||||
"a": "あ",
|
||||
|
@ -27,7 +27,7 @@ final class TemporalLearningMemoryTrieTests: XCTestCase {
|
||||
XCTAssertEqual(result1.first?.word, element1.word)
|
||||
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
|
||||
|
||||
let result2 = trie.throughMatch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count)
|
||||
let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count).dicdata.flatMap { $0.value }
|
||||
XCTAssertEqual(result2.map { $0.word }, [element2.word])
|
||||
|
||||
let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))
|
||||
|
@ -52,6 +52,24 @@ final class ConverterTests: XCTestCase {
|
||||
let results = await converter.requestCandidates(c, options: requestOptions())
|
||||
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func testRoman2KanaFullConversion() async throws {
|
||||
do {
|
||||
let converter = await KanaKanjiConverter()
|
||||
var c = ComposingText()
|
||||
c.insertAtCursorPosition("azuーkiーhasinjidainokiーboーdoapuridesu", inputStyle: .roman2kana)
|
||||
let results = await converter.requestCandidates(c, options: requestOptions())
|
||||
XCTAssertEqual(results.mainResults.first?.text, "azooKeyは新時代のキーボードアプリです")
|
||||
}
|
||||
do {
|
||||
let converter = await KanaKanjiConverter()
|
||||
var c = ComposingText()
|
||||
c.insertAtCursorPosition("youshoukikaratenisusuieiyakyuushourinjikenpounadosamazamanasupoーtuwokeikennsinagarasodatishougakkouzidaiharosanzerusukinkounitaizaisiteorigoruhuyatenisuwonaratteita", inputStyle: .roman2kana)
|
||||
let results = await converter.requestCandidates(c, options: requestOptions())
|
||||
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
|
||||
}
|
||||
}
|
||||
|
||||
// 1文字ずつ変換する
|
||||
|
Reference in New Issue
Block a user