Merge pull request #211 from azooKey/feat/unified_lookup

feat: 誤り訂正と辞書引きの統合処理の実装
This commit is contained in:
Miwa
2025-06-29 18:32:14 +09:00
committed by GitHub
7 changed files with 273 additions and 125 deletions

View File

@ -261,6 +261,83 @@ public final class DicdataStore {
return indices
}
func movingTowardPrefixSearch(
inputs: [ComposingText.InputElement],
leftIndex: Int,
rightIndexRange: Range<Int>,
useMemory: Bool
) -> (
stringToInfo: [[Character]: (endIndex: Int, penalty: PValue)],
indices: [(key: String, indices: [Int])],
temporaryMemoryDicdata: [DicdataElement]
) {
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange)
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
var temporaryMemoryDicdata: [Int: [DicdataElement]] = [:]
//
while let (characters, info) = generator.next() {
guard let firstCharacter = characters.first else {
continue
}
let charIDs = characters.map(self.character2charId(_:))
let keys: [String] = if useMemory {
[String(firstCharacter), "user", "memory"]
} else {
[String(firstCharacter), "user"]
}
var updated = false
var availableMaxIndex = 0
for key in keys {
withMutableValue(&targetLOUDS[key]) { helper in
if helper == nil, let louds = self.loadLOUDS(query: key) {
helper = LOUDS.MovingTowardPrefixSearchHelper(louds: louds)
}
guard helper != nil else {
return
}
let result = helper!.update(target: charIDs)
updated = updated || result.updated
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
}
}
//
let result = self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume charIDs)
updated = updated || !(result.dicdata.isEmpty)
availableMaxIndex = max(availableMaxIndex, result.availableMaxIndex)
for (depth, dicdata) in result.dicdata {
for data in dicdata {
if info.penalty.isZero {
temporaryMemoryDicdata[depth, default: []].append(data)
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * info.penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: data.ruby.count) {
continue
}
temporaryMemoryDicdata[depth, default: []].append(data.adjustedData(adjust))
}
}
if availableMaxIndex < characters.endIndex - 1 {
//
generator.setUnreachablePath(target: characters[...(availableMaxIndex + 1)])
}
if updated {
stringToInfo.append((characters, info))
}
}
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
print(#function, minCount, stringToInfo.map{$0.0})
return (
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
temporaryMemoryDicdata.flatMap {
minCount < $0.key + 1 ? $0.value : []
}
)
}
/// prefixprefix matchLOUDS
/// - Parameters:
/// - query: 1"user"
@ -318,20 +395,8 @@ public final class DicdataStore {
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
}
// MARK:
var stringToInfo = inputData.getRangesWithTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
// MARK:
let stringSet: [([Character], [UInt8])] = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
let depth = minCharIDsCount - 1 ..< maxCharIDsCount
let group = [String: [([Character], [UInt8])]].init(grouping: stringSet, by: {String($0.0.first!)})
var indices = self.movingTowardPrefixSearch(group: group, depth: depth)
if learningManager.enabled {
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet, "memory": stringSet], depth: depth))
} else {
indices.append(contentsOf: self.movingTowardPrefixSearch(group: ["user": stringSet], depth: depth))
}
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled)
// MARK: indices
var dicdata: [DicdataElement] = []
for (identifier, value) in indices {
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
let rubyArray = Array(data.ruby)
@ -349,23 +414,6 @@ public final class DicdataStore {
}
dicdata.append(contentsOf: result)
}
// temporalpenalty
for (_, charIds) in consume stringSet {
for data in self.learningManager.temporaryThroughMatch(charIDs: consume charIds, depth: depth) {
let rubyArray = Array(data.ruby)
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
if penalty.isZero {
dicdata.append(data)
}
let ratio = Self.penaltyRatio[data.lcid]
let pUnit: PValue = Self.getPenalty(data: data) / 2 //
let adjust = pUnit * penalty * ratio
if self.shouldBeRemoved(value: data.value() + adjust, wordCount: rubyArray.count) {
continue
}
dicdata.append(data.adjustedData(adjust))
}
}
for i in toIndexLeft ..< toIndexRight {
do {
@ -425,7 +473,7 @@ public final class DicdataStore {
}
// MARK:
let stringToEndIndex = inputData.getRangesWithoutTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
let stringToEndIndex = TypoCorrection.getRangesWithoutTypos(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
// MARK:
guard let (minString, maxString) = stringToEndIndex.keys.minAndMax(by: {$0.count < $1.count}) else {
debug(#function, "minString/maxString is nil", stringToEndIndex)
@ -447,7 +495,9 @@ public final class DicdataStore {
}
if learningManager.enabled {
// temporalpenalty
dicdata.append(contentsOf: self.learningManager.temporaryThroughMatch(charIDs: consume maxIDs, depth: depth))
dicdata.append(
contentsOf: self.learningManager.movingTowardPrefixSearchOnTemporaryMemory(charIDs: consume maxIDs, depth: depth).dicdata.flatMap { $0.value }
)
}
for (key, value) in stringToEndIndex {
let convertTarget = String(key)
@ -485,7 +535,7 @@ public final class DicdataStore {
let segment = inputData.input[fromIndex...toIndex].reduce(into: "") {$0.append($1.character)}.toKatakana()
// TODO:
let string2penalty = inputData.getRangeWithTypos(fromIndex, toIndex).filter {
let string2penalty = TypoCorrection.getRangeWithTypos(inputs: inputData.input, leftIndex: fromIndex, rightIndex: toIndex).filter {
needTypoCorrection || $0.value == 0.0
}

View File

@ -584,20 +584,22 @@ struct TemporalLearningMemoryTrie {
return nodes[index].dataIndices.map {self.dicdata[$0]}
}
func throughMatch(chars: [UInt8], depth: Range<Int>) -> [DicdataElement] {
func movingTowardPrefixSearch(chars: [UInt8], depth: Range<Int>) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
var index = 0
var indices: [Int] = []
var availableMaxIndex = 0
var indices: [Int: [Int]] = [:]
for (offset, char) in chars.enumerated() {
if let nextIndex = nodes[index].children[char] {
availableMaxIndex = index
index = nextIndex
if depth.contains(offset) {
indices.append(contentsOf: nodes[index].dataIndices)
indices[offset] = nodes[index].dataIndices
}
} else {
return indices.map {self.dicdata[$0]}
return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
}
}
return indices.map {self.dicdata[$0]}
return (indices.mapValues { items in items.map { self.dicdata[$0] }}, availableMaxIndex)
}
func prefixMatch(chars: [UInt8]) -> [DicdataElement] {
@ -718,11 +720,11 @@ final class LearningManager {
return self.temporaryMemory.perfectMatch(chars: charIDs)
}
func temporaryThroughMatch(charIDs: [UInt8], depth: Range<Int>) -> [DicdataElement] {
func movingTowardPrefixSearchOnTemporaryMemory(charIDs: [UInt8], depth: Range<Int> = 0 ..< .max) -> (dicdata: [Int: [DicdataElement]], availableMaxIndex: Int) {
guard let options, options.learningType.needUsingMemory else {
return []
return ([:], 0)
}
return self.temporaryMemory.throughMatch(chars: charIDs, depth: depth)
return self.temporaryMemory.movingTowardPrefixSearch(chars: charIDs, depth: depth)
}
func temporaryPrefixMatch(charIDs: [UInt8]) -> [DicdataElement] {

View File

@ -1,50 +1,29 @@
//
// TypoCorrection.swift
// Keyboard
//
// Created by ensan on 2022/12/18.
// Copyright © 2022 ensan. All rights reserved.
//
import SwiftUtils
// MARK: API
extension ComposingText {
private func shouldBeRemovedForDicdataStore(components: [ConvertTargetElement]) -> Bool {
// 使1
guard let first = components.first?.string.first?.toKatakana() else {
return false
}
return !CharacterUtils.isRomanLetter(first) && !DicdataStore.existLOUDS(for: first)
}
struct TypoCorrectionGenerator {
init(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) {
self.inputs = inputs
self.left = left
self.rightIndexRange = rightIndexRange
/// closedRange
/// getRangeWithTypos`result`
/// `left=4, rightIndexRange=6..<10``4...6, 4...7, 4...8, 4...9`
/// `left <= rightIndexRange.startIndex`
func getRangesWithTypos(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: (endIndex: Int, penalty: PValue)] {
let count = rightIndexRange.endIndex - left
debug(#function, left, rightIndexRange, count)
let nodes = (0..<count).map {(i: Int) in
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
self.count = count
self.nodes = (0..<count).map {(i: Int) in
TypoCorrection.lengths.flatMap {(k: Int) -> [TypoCorrection.TypoCandidate] in
let j = i + k
if count <= j {
return []
}
return Self.getTypo(self.input[left + i ... left + j])
return TypoCorrection.getTypo(inputs[left + i ... left + j])
}
}
let maxPenalty: PValue = 3.5 * 3
// Performance Tuning NoteDictionaryArrayDictionary
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
//
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
self.stack = nodes[0].compactMap { typoCandidate in
guard let firstElement = typoCandidate.inputElements.first else {
return nil
}
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
var convertTargetElements = [ConvertTargetElement]()
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
var convertTargetElements = [ComposingText.ConvertTargetElement]()
for element in typoCandidate.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
@ -52,37 +31,91 @@ extension ComposingText {
}
return nil
}
while let (convertTargetElements, lastElement, count, penalty) = stack.popLast() {
}
let maxPenalty: PValue = 3.5 * 3
let inputs: [ComposingText.InputElement]
let left: Int
let rightIndexRange: Range<Int>
let nodes: [[TypoCorrection.TypoCandidate]]
let count: Int
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]
/// `target`
mutating func setUnreachablePath(target: some Collection<Character>) {
self.stack = self.stack.filter { (convertTargetElements, lastElement, count, penalty) in
var stablePrefix: [Character] = []
loop: for item in convertTargetElements {
switch item.inputStyle {
case .direct:
stablePrefix.append(contentsOf: item.string)
case .roman2kana:
// TODO: impl
var stableIndex = item.string.endIndex
for suffix in Roman2Kana.unstableSuffixes {
if item.string.hasSuffix(suffix) {
stableIndex = min(stableIndex, item.string.endIndex - suffix.count)
}
}
if stableIndex == item.string.endIndex {
stablePrefix.append(contentsOf: item.string)
} else {
// break
stablePrefix.append(contentsOf: item.string[0 ..< stableIndex])
break loop
}
}
// prefixtargetprefixstackunreachable
if stablePrefix.hasPrefix(target) {
return false
}
}
return true
}
}
mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? {
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
var result: ([Character], (endIndex: Int, penalty: PValue))? = nil
if rightIndexRange.contains(count + left - 1) {
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
stringToInfo.append((convertTarget, (count + left - 1, penalty)))
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
result = (convertTarget, (count + left - 1, penalty))
}
}
//
if nodes.endIndex <= count {
continue
if self.nodes.endIndex <= count {
if let result {
return result
} else {
continue
}
}
// (3)
if penalty >= maxPenalty {
var convertTargetElements = convertTargetElements
let correct = [self.input[left + count]].map {InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
if count + correct.count > nodes.endIndex {
continue
let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
if count + correct.count > self.nodes.endIndex {
if let result {
return result
} else {
continue
}
}
for element in correct {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
stack.append((convertTargetElements, correct.last!, count + correct.count, penalty))
} else {
stack.append(contentsOf: nodes[count].compactMap {
if count + $0.inputElements.count > nodes.endIndex {
stack.append(contentsOf: self.nodes[count].compactMap {
if count + $0.inputElements.count > self.nodes.endIndex {
return nil
}
var convertTargetElements = convertTargetElements
for element in $0.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
if TypoCorrection.shouldBeRemovedForDicdataStore(components: convertTargetElements) {
return nil
}
return (
@ -93,14 +126,29 @@ extension ComposingText {
)
})
}
// yield
if let result {
return result
}
}
return Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0})
return nil
}
}
// MARK: API
enum TypoCorrection {
fileprivate static func shouldBeRemovedForDicdataStore(components: [ComposingText.ConvertTargetElement]) -> Bool {
// 使1
guard let first = components.first?.string.first?.toKatakana() else {
return false
}
return !CharacterUtils.isRomanLetter(first) && !DicdataStore.existLOUDS(for: first)
}
/// closedRange
/// `left=4, rightIndexRange=6..<10``4...6, 4...7, 4...8, 4...9`
/// `left <= rightIndexRange.startIndex`
func getRangesWithoutTypos(_ left: Int, rightIndexRange: Range<Int>) -> [[Character]: Int] {
static func getRangesWithoutTypos(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>) -> [[Character]: Int] {
let count = rightIndexRange.endIndex - left
debug(#function, left, rightIndexRange, count)
let nodes = (0..<count).map {(i: Int) in
@ -110,7 +158,7 @@ extension ComposingText {
return []
}
// frozen: truetypo
return Self.getTypo(self.input[left + i ... left + j], frozen: true)
return Self.getTypo(inputs[left + i ... left + j], frozen: true)
}
}
@ -118,12 +166,12 @@ extension ComposingText {
var stringToInfo: [([Character], Int)] = []
//
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int)] = nodes[0].compactMap { typoCandidate in
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int)] = nodes[0].compactMap { typoCandidate in
guard let firstElement = typoCandidate.inputElements.first else {
return nil
}
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
var convertTargetElements = [ConvertTargetElement]()
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
var convertTargetElements = [ComposingText.ConvertTargetElement]()
for element in typoCandidate.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
@ -133,7 +181,7 @@ extension ComposingText {
}
while case .some((var convertTargetElements, let lastElement, let count)) = stack.popLast() {
if rightIndexRange.contains(count + left - 1) {
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
stringToInfo.append((convertTarget, (count + left - 1)))
}
}
@ -148,7 +196,7 @@ extension ComposingText {
for element in $0.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
if Self.shouldBeRemovedForDicdataStore(components: convertTargetElements) {
return nil
}
return (
@ -162,7 +210,7 @@ extension ComposingText {
}
func getRangeWithTypos(_ left: Int, _ right: Int) -> [[Character]: PValue] {
static func getRangeWithTypos(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndex right: Int) -> [[Character]: PValue] {
// i
// input = [d(), r(s), r(i), r(t), r(s), d(), d(), d()]
// nodes = [[d()], [r(s)], [r(i)], [r(t), [r(t), r(a)]], [r(s)], [d(), d(), d()], [d()]]
@ -174,19 +222,19 @@ extension ComposingText {
if count <= j {
return []
}
return Self.getTypo(self.input[left + i ... left + j])
return Self.getTypo(inputs[left + i ... left + j])
}
}
let maxPenalty: PValue = 3.5 * 3
//
var stack: [(convertTargetElements: [ConvertTargetElement], lastElement: InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)] = nodes[0].compactMap { typoCandidate in
guard let firstElement = typoCandidate.inputElements.first else {
return nil
}
if Self.isLeftSideValid(first: firstElement, of: self.input, from: left) {
var convertTargetElements = [ConvertTargetElement]()
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
var convertTargetElements = [ComposingText.ConvertTargetElement]()
for element in typoCandidate.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
@ -199,7 +247,7 @@ extension ComposingText {
while let (convertTargetElements, lastElement, count, penalty) = stack.popLast() {
if count + left - 1 == right {
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: self.input, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
stringToPenalty.append((convertTarget, penalty))
}
continue
@ -211,7 +259,7 @@ extension ComposingText {
// (3)
if penalty >= maxPenalty {
var convertTargetElements = convertTargetElements
let correct = [self.input[left + count]].map {InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
if count + correct.count > nodes.endIndex {
continue
}
@ -228,7 +276,7 @@ extension ComposingText {
for element in $0.inputElements {
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
}
if shouldBeRemovedForDicdataStore(components: convertTargetElements) {
if Self.shouldBeRemovedForDicdataStore(components: convertTargetElements) {
return nil
}
return (
@ -243,7 +291,7 @@ extension ComposingText {
return Dictionary(stringToPenalty, uniquingKeysWith: max)
}
private static func getTypo(_ elements: some Collection<InputElement>, frozen: Bool = false) -> [TypoCandidate] {
fileprivate static func getTypo(_ elements: some Collection<ComposingText.InputElement>, frozen: Bool = false) -> [TypoCandidate] {
let key = elements.reduce(into: "") {$0.append($1.character)}.toKatakana()
if (elements.allSatisfy {$0.inputStyle == .direct}) {
@ -251,19 +299,19 @@ extension ComposingText {
if key.count > 1 {
return dictionary[key, default: []].map {
TypoCandidate(
inputElements: $0.value.map {InputElement(character: $0, inputStyle: .direct)},
inputElements: $0.value.map {ComposingText.InputElement(character: $0, inputStyle: .direct)},
weight: $0.weight
)
}
} else if key.count == 1 {
var result = dictionary[key, default: []].map {
TypoCandidate(
inputElements: $0.value.map {InputElement(character: $0, inputStyle: .direct)},
inputElements: $0.value.map {ComposingText.InputElement(character: $0, inputStyle: .direct)},
weight: $0.weight
)
}
//
result.append(TypoCandidate(inputElements: key.map {InputElement(character: $0, inputStyle: .direct)}, weight: 0))
result.append(TypoCandidate(inputElements: key.map {ComposingText.InputElement(character: $0, inputStyle: .direct)}, weight: 0))
return result
}
}
@ -272,20 +320,20 @@ extension ComposingText {
if key.count > 1 {
return dictionary[key, default: []].map {
TypoCandidate(
inputElements: $0.map {InputElement(character: $0, inputStyle: .roman2kana)},
inputElements: $0.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)},
weight: 3.5
)
}
} else if key.count == 1 {
var result = dictionary[key, default: []].map {
TypoCandidate(
inputElements: $0.map {InputElement(character: $0, inputStyle: .roman2kana)},
inputElements: $0.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)},
weight: 3.5
)
}
//
result.append(
TypoCandidate(inputElements: key.map {InputElement(character: $0, inputStyle: .roman2kana)}, weight: 0)
TypoCandidate(inputElements: key.map {ComposingText.InputElement(character: $0, inputStyle: .roman2kana)}, weight: 0)
)
return result
}
@ -293,7 +341,7 @@ extension ComposingText {
return []
}
private static let lengths = [0, 1]
fileprivate static let lengths = [0, 1]
private struct TypoUnit: Equatable {
var value: String
@ -306,7 +354,7 @@ extension ComposingText {
}
struct TypoCandidate: Equatable {
var inputElements: [InputElement]
var inputElements: [ComposingText.InputElement]
var weight: PValue
}

View File

@ -238,38 +238,63 @@ package struct LOUDS: Sendable {
/// - Note:
@inlinable func byfixNodeIndices(targets: [[UInt8]], depth: Range<Int>) -> [Int] {
//
// let targets = targets.sorted(by: Self.lexLessThan)
var targets = targets
targets.sort(by: Self.lexLessThan)
var helper = MovingTowardPrefixSearchHelper(louds: self)
for target in targets {
_ = helper.update(target: target)
}
return helper.indicesInDepth(depth: depth)
}
struct MovingTowardPrefixSearchHelper {
init(louds: LOUDS) {
self.louds = louds
}
let louds: LOUDS
//
var indices: [Int] = []
var indices: [(depth: Int, index: Int)] = []
//
var stack: [(nodeIndex: Int, char: UInt8)] = []
for chars in targets {
func indicesInDepth(depth: Range<Int>) -> [Int] {
return self.indices
.lazy
.filter { depth.contains($0.depth) }
.map { $0.index }
}
/// `target`
/// - Parameter target: `CharID`
/// - Returns: `updated``indices``availableMaxIndex`
@inlinable mutating func update(target: [UInt8]) -> (updated: Bool, availableMaxIndex: Int) {
var updated = false
var availableMaxIndex = 0
// iupperBound
for (i, char) in chars.enumerated() where i < depth.upperBound {
if i < stack.count, stack[i].char == char {
for (i, char) in target.enumerated() {
if i < self.stack.count, self.stack[i].char == char {
//
availableMaxIndex = i
continue
} else if i < stack.count, stack[i].char != char {
} else if i < self.stack.count, self.stack[i].char != char {
// stack
stack = Array(stack[..<i])
self.stack = Array(self.stack[..<i])
}
// stack[i]
assert(i >= stack.count, "stack[\(i)] must not exist for logical reason.")
assert(i >= self.stack.count, "stack[\(i)] must not exist for logical reason.")
//
// stacknodeIndexchar
if let nodeIndex = self.searchCharNodeIndex(from: stack.last?.nodeIndex ?? 1, char: char) {
if depth.contains(i) {
indices.append(nodeIndex)
}
stack.append((nodeIndex, char))
if let nodeIndex = self.louds.searchCharNodeIndex(from: self.stack.last?.nodeIndex ?? 1, char: char) {
self.indices.append((i, nodeIndex))
updated = true
availableMaxIndex = i
self.stack.append((nodeIndex, char))
} else {
//
break
}
}
return (updated, availableMaxIndex)
}
return indices
}
}

View File

@ -10,6 +10,11 @@ import Foundation
import SwiftUtils
enum Roman2Kana {
static let unstableSuffixes: Set<[Character]> = hiraganaChanges.keys.flatMapSet { characters in
characters.indices.map { i in
Array(characters[...i])
}
}
static let katakanaChanges: [String: String] = Dictionary(uniqueKeysWithValues: hiraganaChanges.map { (String($0.key), String($0.value).toKatakana()) })
static let hiraganaChanges: [[Character]: [Character]] = Dictionary(uniqueKeysWithValues: [
"a": "",

View File

@ -27,7 +27,7 @@ final class TemporalLearningMemoryTrieTests: XCTestCase {
XCTAssertEqual(result1.first?.word, element1.word)
XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
let result2 = trie.throughMatch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count)
let result2 = trie.movingTowardPrefixSearch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count).dicdata.flatMap { $0.value }
XCTAssertEqual(result2.map { $0.word }, [element2.word])
let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))

View File

@ -52,6 +52,24 @@ final class ConverterTests: XCTestCase {
let results = await converter.requestCandidates(c, options: requestOptions())
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
}
}
func testRoman2KanaFullConversion() async throws {
do {
let converter = await KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition("azuーkiーhasinjidainokiーboーdoapuridesu", inputStyle: .roman2kana)
let results = await converter.requestCandidates(c, options: requestOptions())
XCTAssertEqual(results.mainResults.first?.text, "azooKeyは新時代のキーボードアプリです")
}
do {
let converter = await KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition("youshoukikaratenisusuieiyakyuushourinjikenpounadosamazamanasupoーtuwokeikennsinagarasodatishougakkouzidaiharosanzerusukinkounitaizaisiteorigoruhuyatenisuwonaratteita", inputStyle: .roman2kana)
let results = await converter.requestCandidates(c, options: requestOptions())
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
}
}
// 1