mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
wip: test is not passing, but commit/push it for
working in another env
This commit is contained in:
@ -31,10 +31,35 @@ extension Kana2Kanji {
|
||||
let inputCount: Int = inputData.input.count
|
||||
let surfaceCount = inputData.convertTarget.count
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let i2sMap = inputData.inputIndexToSurfaceIndexMap()
|
||||
var rawNodes = (.zero ..< inputCount).map {
|
||||
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = i2sMap[$0] {
|
||||
(sIndex, nil)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
return dicdataStore.getLOUDSDataInRange(
|
||||
inputData: inputData,
|
||||
from: $0,
|
||||
surfaceRange: surfaceRange,
|
||||
needTypoCorrection: needTypoCorrection
|
||||
)
|
||||
}
|
||||
for sIndex in 0 ..< inputData.convertTarget.count where !i2sMap.values.contains(sIndex) {
|
||||
// inputIndexの列挙でカバーできないsIndexについて、追加で辞書を引いてrawNodesに追加
|
||||
rawNodes.append(
|
||||
dicdataStore.getLOUDSDataInRange(
|
||||
inputData: inputData,
|
||||
from: nil,
|
||||
surfaceRange: (sIndex, nil),
|
||||
needTypoCorrection: needTypoCorrection
|
||||
)
|
||||
)
|
||||
}
|
||||
let lattice: Lattice = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)}
|
||||
rawNodes: rawNodes
|
||||
)
|
||||
// 「i文字目から始まるnodes」に対して
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
@ -55,8 +80,12 @@ extension Kana2Kanji {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.range.endIndex
|
||||
// 後続ノードのindex(正規化する)
|
||||
let nextIndex: Lattice.LatticeIndex = switch node.range.endIndex {
|
||||
case .input(let index): if let sIndex = i2sMap[index] { .surface(sIndex) } else { node.range.endIndex }
|
||||
case .surface: node.range.endIndex
|
||||
}
|
||||
print(nextIndex, node.data.word, node.data.ruby, lattice[index: nextIndex].count)
|
||||
// 文字数がcountと等しい場合登録する
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
|
@ -1,8 +1,19 @@
|
||||
import Algorithms
|
||||
import SwiftUtils
|
||||
|
||||
struct LatticeNodeArray: Sequence {
|
||||
typealias Element = LatticeNode
|
||||
|
||||
var inputIndexedNodes: [LatticeNode]
|
||||
var surfaceIndexedNodes: [LatticeNode]
|
||||
|
||||
func makeIterator() -> Chain2Sequence<[LatticeNode], [LatticeNode]>.Iterator {
|
||||
inputIndexedNodes.chained(surfaceIndexedNodes).makeIterator()
|
||||
}
|
||||
}
|
||||
|
||||
struct Lattice: Sequence {
|
||||
typealias Element = [LatticeNode]
|
||||
typealias Element = LatticeNodeArray
|
||||
|
||||
init() {
|
||||
self.inputIndexedNodes = []
|
||||
@ -15,11 +26,12 @@ struct Lattice: Sequence {
|
||||
|
||||
for nodes in rawNodes {
|
||||
guard let first = nodes.first else { continue }
|
||||
print(nodes.mapSet { $0.range.startIndex }, nodes.count)
|
||||
switch first.range.startIndex {
|
||||
case .surface(let i):
|
||||
self.surfaceIndexedNodes[i] = nodes
|
||||
self.surfaceIndexedNodes[i].append(contentsOf: nodes)
|
||||
case .input(let i):
|
||||
self.inputIndexedNodes[i] = nodes
|
||||
self.inputIndexedNodes[i].append(contentsOf: nodes)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -97,15 +109,42 @@ struct Lattice: Sequence {
|
||||
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })
|
||||
}
|
||||
|
||||
func makeIterator() -> Chain2Sequence<[[LatticeNode]], [[LatticeNode]]>.Iterator {
|
||||
self.inputIndexedNodes.chained(self.surfaceIndexedNodes).makeIterator()
|
||||
struct Iterator: IteratorProtocol {
|
||||
init(lattice: Lattice) {
|
||||
self.lattice = lattice
|
||||
self.indices = (0, lattice.surfaceIndexedNodes.endIndex, 0, lattice.inputIndexedNodes.endIndex)
|
||||
}
|
||||
|
||||
typealias Element = LatticeNodeArray
|
||||
let lattice: Lattice
|
||||
var indices: (currentSurfaceIndex: Int, surfaceEndIndex: Int, currentInputIndex: Int, inputEndIndex: Int)
|
||||
|
||||
mutating func next() -> LatticeNodeArray? {
|
||||
if self.indices.currentSurfaceIndex < self.indices.surfaceEndIndex {
|
||||
defer {
|
||||
self.indices.currentSurfaceIndex += 1
|
||||
}
|
||||
return .init(inputIndexedNodes: [], surfaceIndexedNodes: self.lattice.surfaceIndexedNodes[self.indices.currentSurfaceIndex])
|
||||
} else if self.indices.currentInputIndex < self.indices.inputEndIndex {
|
||||
defer {
|
||||
self.indices.currentInputIndex += 1
|
||||
}
|
||||
return .init(inputIndexedNodes: self.lattice.inputIndexedNodes[self.indices.currentInputIndex], surfaceIndexedNodes: [])
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func makeIterator() -> Iterator {
|
||||
Iterator(lattice: self)
|
||||
}
|
||||
|
||||
var isEmpty: Bool {
|
||||
self.inputIndexedNodes.isEmpty && self.surfaceIndexedNodes.isEmpty
|
||||
}
|
||||
|
||||
enum LatticeIndex: Sendable, Equatable {
|
||||
enum LatticeIndex: Sendable, Equatable, Hashable {
|
||||
case surface(Int)
|
||||
case input(Int)
|
||||
|
||||
@ -114,7 +153,7 @@ struct Lattice: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
enum LatticeRange: Sendable, Equatable {
|
||||
enum LatticeRange: Sendable, Equatable, Hashable {
|
||||
static var zero: Self {
|
||||
.input(from: 0, to: 0)
|
||||
}
|
||||
@ -149,7 +188,7 @@ struct Lattice: Sequence {
|
||||
}
|
||||
|
||||
func merged(with other: Self) -> Self? {
|
||||
switch (self, other) {
|
||||
return switch (self, other) {
|
||||
case (let .surface(l, ml), let .surface(mr, r)):
|
||||
if ml == mr {
|
||||
.surface(from: l, to: r)
|
||||
|
@ -40,6 +40,6 @@ public final class LatticeNode {
|
||||
/// - Returns: 文節単位の区切り情報を持った変換候補データのリスト。
|
||||
/// - Note: 最終的に`EOS`ノードにおいて実行する想定のAPIになっている。
|
||||
func getCandidateData() -> [CandidateData] {
|
||||
self.prevs.map {$0.getCandidateData()}
|
||||
return self.prevs.map {$0.getCandidateData()}
|
||||
}
|
||||
}
|
||||
|
@ -242,20 +242,93 @@ public final class DicdataStore {
|
||||
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
|
||||
}
|
||||
|
||||
private struct UnifiedGenerator {
|
||||
struct SurfaceGenerator {
|
||||
var surface: [Character] = []
|
||||
var range: TypoCorrectionGenerator.ProcessRange
|
||||
var currentIndex: Int
|
||||
|
||||
init(surface: [Character], range: TypoCorrectionGenerator.ProcessRange) {
|
||||
self.surface = surface
|
||||
self.range = range
|
||||
self.currentIndex = range.rightIndexRange.lowerBound
|
||||
}
|
||||
|
||||
mutating func setUnreachablePath<C: Collection<Character>>(target: C) where C.Indices == Range<Int> {
|
||||
if self.surface[self.range.leftIndex...].hasPrefix(target) {
|
||||
// new upper boundを計算
|
||||
let currentLowerBound = self.range.rightIndexRange.lowerBound
|
||||
let currentUpperBound = self.range.rightIndexRange.upperBound
|
||||
let targetUpperBound = self.range.leftIndex + target.indices.upperBound
|
||||
self.range.rightIndexRange = min(currentLowerBound, targetUpperBound) ..< min(currentUpperBound, targetUpperBound)
|
||||
}
|
||||
}
|
||||
|
||||
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
|
||||
if self.surface.indices.contains(self.currentIndex), self.currentIndex < self.range.rightIndexRange.upperBound {
|
||||
defer {
|
||||
self.currentIndex += 1
|
||||
}
|
||||
let characters = Array(self.surface[self.range.leftIndex ... self.currentIndex])
|
||||
return (characters, (.surface(self.currentIndex), 0))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var typoCorrectionGenerator: TypoCorrectionGenerator? = nil
|
||||
var surfaceGenerator: SurfaceGenerator? = nil
|
||||
|
||||
mutating func register(_ generator: TypoCorrectionGenerator) {
|
||||
self.typoCorrectionGenerator = generator
|
||||
}
|
||||
mutating func register(_ generator: SurfaceGenerator) {
|
||||
self.surfaceGenerator = generator
|
||||
}
|
||||
mutating func setUnreachablePath<C: Collection<Character>>(target: C) where C.Indices == Range<Int> {
|
||||
self.typoCorrectionGenerator?.setUnreachablePath(target: target)
|
||||
self.surfaceGenerator?.setUnreachablePath(target: target)
|
||||
}
|
||||
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
|
||||
if let next = self.surfaceGenerator?.next() {
|
||||
return next
|
||||
}
|
||||
if let next = self.typoCorrectionGenerator?.next() {
|
||||
return next
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func movingTowardPrefixSearch(
|
||||
inputs: [ComposingText.InputElement],
|
||||
leftIndex: Int,
|
||||
rightIndexRange: Range<Int>,
|
||||
composingText: ComposingText,
|
||||
inputProcessRange: TypoCorrectionGenerator.ProcessRange?,
|
||||
surfaceProcessRange: TypoCorrectionGenerator.ProcessRange?,
|
||||
useMemory: Bool,
|
||||
needTypoCorrection: Bool
|
||||
) -> (
|
||||
stringToInfo: [[Character]: (endIndex: Int, penalty: PValue)],
|
||||
stringToInfo: [[Character]: (endIndex: Lattice.LatticeIndex, penalty: PValue)],
|
||||
indices: [(key: String, indices: [Int])],
|
||||
temporaryMemoryDicdata: [DicdataElement]
|
||||
) {
|
||||
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange, needTypoCorrection: needTypoCorrection)
|
||||
var generator = UnifiedGenerator()
|
||||
if let surfaceProcessRange {
|
||||
let surfaceGenerator = UnifiedGenerator.SurfaceGenerator(
|
||||
surface: Array(composingText.convertTarget.toKatakana()),
|
||||
range: surfaceProcessRange
|
||||
)
|
||||
generator.register(surfaceGenerator)
|
||||
}
|
||||
if let inputProcessRange {
|
||||
let typoCorrectionGenerator = TypoCorrectionGenerator(
|
||||
inputs: composingText.input,
|
||||
range: inputProcessRange,
|
||||
needTypoCorrection: needTypoCorrection
|
||||
)
|
||||
generator.register(typoCorrectionGenerator)
|
||||
}
|
||||
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
|
||||
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
||||
var stringToInfo: [([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))] = []
|
||||
// 動的辞書(一時学習データ、動的ユーザ辞書)から取り出されたデータ
|
||||
var dynamicDicdata: [Int: [DicdataElement]] = [:]
|
||||
// ジェネレータを舐める
|
||||
@ -332,8 +405,25 @@ public final class DicdataStore {
|
||||
}
|
||||
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
|
||||
return (
|
||||
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
|
||||
targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
|
||||
Dictionary(
|
||||
stringToInfo,
|
||||
uniquingKeysWith: { (lhs, rhs) in
|
||||
if lhs.penalty < rhs.penalty {
|
||||
return lhs
|
||||
} else if lhs.penalty == rhs.penalty {
|
||||
return switch (lhs.endIndex, rhs.endIndex) {
|
||||
case (.input, .input), (.surface, .surface): lhs // どっちでもいい
|
||||
case (.surface, .input): lhs // surfaceIndexを優先
|
||||
case (.input, .surface): rhs // surfaceIndexを優先
|
||||
}
|
||||
} else {
|
||||
return rhs
|
||||
}
|
||||
}
|
||||
),
|
||||
targetLOUDS.map {
|
||||
($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max))
|
||||
},
|
||||
dynamicDicdata.flatMap {
|
||||
minCount < $0.key + 1 ? $0.value : []
|
||||
}
|
||||
@ -381,24 +471,64 @@ public final class DicdataStore {
|
||||
/// - inputData: 入力データ
|
||||
/// - from: 起点
|
||||
/// - toIndexRange: `from ..< (toIndexRange)`の範囲で辞書ルックアップを行う。
|
||||
public func getLOUDSDataInRange(inputData: ComposingText, from fromIndex: Int, toIndexRange: Range<Int>? = nil, needTypoCorrection: Bool = true) -> [LatticeNode] {
|
||||
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
|
||||
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
|
||||
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
|
||||
public func getLOUDSDataInRange(
|
||||
inputData: ComposingText,
|
||||
from fromInputIndex: Int?,
|
||||
toIndexRange: Range<Int>? = nil,
|
||||
surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = nil,
|
||||
needTypoCorrection: Bool = true
|
||||
) -> [LatticeNode] {
|
||||
let inputProcessRange: TypoCorrectionGenerator.ProcessRange?
|
||||
|
||||
// TODO: make `fromInputIndex` optional later.
|
||||
if let fromInputIndex {
|
||||
let toInputIndexLeft = toIndexRange?.startIndex ?? fromInputIndex
|
||||
let toInputIndexRight = min(
|
||||
toIndexRange?.endIndex ?? inputData.input.count,
|
||||
fromInputIndex + self.maxlength
|
||||
)
|
||||
if fromInputIndex > toInputIndexLeft || toInputIndexLeft >= toInputIndexRight {
|
||||
debug(#function, "index is wrong")
|
||||
return []
|
||||
}
|
||||
inputProcessRange = .init(leftIndex: fromInputIndex, rightIndexRange: toInputIndexLeft ..< toInputIndexRight)
|
||||
} else {
|
||||
inputProcessRange = nil
|
||||
}
|
||||
|
||||
let segments = (fromIndex ..< toIndexRight).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
|
||||
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
|
||||
let surfaceProcessRange: TypoCorrectionGenerator.ProcessRange?
|
||||
if let surfaceRange {
|
||||
let toSurfaceIndexLeft = surfaceRange.endIndexRange?.startIndex ?? surfaceRange.startIndex
|
||||
let toSurfaceIndexRight = min(
|
||||
surfaceRange.endIndexRange?.endIndex ?? inputData.convertTarget.count,
|
||||
surfaceRange.startIndex + self.maxlength
|
||||
)
|
||||
if surfaceRange.startIndex > toSurfaceIndexLeft || toSurfaceIndexLeft >= toSurfaceIndexRight {
|
||||
debug(#function, "index is wrong")
|
||||
return []
|
||||
}
|
||||
surfaceProcessRange = .init(leftIndex: surfaceRange.startIndex, rightIndexRange: toSurfaceIndexLeft ..< toSurfaceIndexRight)
|
||||
} else {
|
||||
surfaceProcessRange = nil
|
||||
}
|
||||
if inputProcessRange == nil && surfaceProcessRange == nil {
|
||||
debug(#function, "either of inputProcessRange and surfaceProcessRange must not be nil")
|
||||
return []
|
||||
}
|
||||
// MARK: 誤り訂正の対象を列挙する。非常に重い処理。
|
||||
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled, needTypoCorrection: needTypoCorrection)
|
||||
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(
|
||||
composingText: inputData,
|
||||
inputProcessRange: inputProcessRange,
|
||||
surfaceProcessRange: surfaceProcessRange,
|
||||
useMemory: self.learningManager.enabled,
|
||||
needTypoCorrection: needTypoCorrection
|
||||
)
|
||||
print(stringToInfo)
|
||||
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
||||
for (identifier, value) in indices {
|
||||
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
||||
let rubyArray = Array(data.ruby)
|
||||
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
|
||||
let penalty = stringToInfo[rubyArray]?.penalty ?? 0
|
||||
if penalty.isZero {
|
||||
return data
|
||||
}
|
||||
@ -413,34 +543,40 @@ public final class DicdataStore {
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
|
||||
for i in toIndexLeft ..< toIndexRight {
|
||||
if let inputProcessRange {
|
||||
let segments = (inputProcessRange.leftIndex ..< inputProcessRange.rightIndexRange.endIndex).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
|
||||
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
|
||||
}
|
||||
for i in inputProcessRange.rightIndexRange {
|
||||
do {
|
||||
let result = self.getWiseDicdata(convertTarget: segments[i - fromIndex], inputData: inputData, inputRange: fromIndex ..< i + 1)
|
||||
let result = self.getWiseDicdata(
|
||||
convertTarget: segments[i - inputProcessRange.leftIndex],
|
||||
inputData: inputData,
|
||||
inputRange: inputProcessRange.leftIndex ..< i + 1
|
||||
)
|
||||
for item in result {
|
||||
stringToInfo[Array(item.ruby)] = (i, 0)
|
||||
stringToInfo[Array(item.ruby)] = (.input(i), 0)
|
||||
}
|
||||
dicdata.append(contentsOf: result)
|
||||
}
|
||||
}
|
||||
if fromIndex == .zero {
|
||||
}
|
||||
let needBOS = fromInputIndex == .zero
|
||||
let result: [LatticeNode] = dicdata.compactMap {
|
||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
||||
return nil
|
||||
}
|
||||
let node = LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
|
||||
let range: Lattice.LatticeRange = switch endIndex {
|
||||
case .input(let endIndex): .input(from: fromInputIndex!, to: endIndex + 1)
|
||||
case .surface(let endIndex): .surface(from: (surfaceRange?.startIndex)!, to: endIndex + 1)
|
||||
}
|
||||
let node = LatticeNode(data: $0, range: range)
|
||||
if needBOS {
|
||||
node.prevs.append(RegisteredNode.BOSNode())
|
||||
}
|
||||
return node
|
||||
}
|
||||
return result
|
||||
} else {
|
||||
let result: [LatticeNode] = dicdata.compactMap {
|
||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
||||
return nil
|
||||
}
|
||||
return LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
func getZeroHintPredictionDicdata(lastRcid: Int) -> [DicdataElement] {
|
||||
|
@ -1,13 +1,12 @@
|
||||
import SwiftUtils
|
||||
|
||||
struct TypoCorrectionGenerator: Sendable {
|
||||
init(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>, needTypoCorrection: Bool) {
|
||||
init(inputs: [ComposingText.InputElement], range: ProcessRange, needTypoCorrection: Bool) {
|
||||
self.maxPenalty = needTypoCorrection ? 3.5 * 3 : 0
|
||||
self.inputs = inputs
|
||||
self.left = left
|
||||
self.rightIndexRange = rightIndexRange
|
||||
self.range = range
|
||||
|
||||
let count = rightIndexRange.endIndex - left
|
||||
let count = self.range.rightIndexRange.endIndex - range.leftIndex
|
||||
self.count = count
|
||||
self.nodes = (0..<count).map {(i: Int) in
|
||||
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
|
||||
@ -15,7 +14,7 @@ struct TypoCorrectionGenerator: Sendable {
|
||||
if count <= j {
|
||||
return []
|
||||
}
|
||||
return Self.getTypo(inputs[left + i ... left + j], frozen: !needTypoCorrection)
|
||||
return Self.getTypo(inputs[range.leftIndex + i ... range.leftIndex + j], frozen: !needTypoCorrection)
|
||||
}
|
||||
}
|
||||
// 深さ優先で列挙する
|
||||
@ -23,7 +22,7 @@ struct TypoCorrectionGenerator: Sendable {
|
||||
guard let firstElement = typoCandidate.inputElements.first else {
|
||||
return nil
|
||||
}
|
||||
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
|
||||
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: range.leftIndex) {
|
||||
var convertTargetElements = [ComposingText.ConvertTargetElement]()
|
||||
for element in typoCandidate.inputElements {
|
||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||
@ -36,11 +35,15 @@ struct TypoCorrectionGenerator: Sendable {
|
||||
|
||||
let maxPenalty: PValue
|
||||
let inputs: [ComposingText.InputElement]
|
||||
let left: Int
|
||||
let rightIndexRange: Range<Int>
|
||||
let range: ProcessRange
|
||||
let nodes: [[TypoCandidate]]
|
||||
let count: Int
|
||||
|
||||
struct ProcessRange: Sendable, Equatable {
|
||||
var leftIndex: Int
|
||||
var rightIndexRange: Range<Int>
|
||||
}
|
||||
|
||||
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]
|
||||
|
||||
/// `target`で始まる場合は到達不可能であることを知らせる
|
||||
@ -75,12 +78,12 @@ struct TypoCorrectionGenerator: Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? {
|
||||
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
|
||||
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
|
||||
var result: ([Character], (endIndex: Int, penalty: PValue))? = nil
|
||||
if rightIndexRange.contains(count + left - 1) {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
result = (convertTarget, (count + left - 1, penalty))
|
||||
var result: ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? = nil
|
||||
if self.range.rightIndexRange.contains(count + self.range.leftIndex - 1) {
|
||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + self.range.leftIndex, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||
result = (convertTarget, (.input(count + self.range.leftIndex - 1), penalty))
|
||||
}
|
||||
}
|
||||
// エスケープ
|
||||
@ -94,7 +97,7 @@ struct TypoCorrectionGenerator: Sendable {
|
||||
// 訂正数上限(3個)
|
||||
if penalty >= maxPenalty {
|
||||
var convertTargetElements = convertTargetElements
|
||||
let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
let correct = [inputs[self.range.leftIndex + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||
if count + correct.count > self.nodes.endIndex {
|
||||
if let result {
|
||||
return result
|
||||
|
@ -218,6 +218,7 @@ final class ComposingTextTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testIndexMap() throws {
|
||||
do {
|
||||
var c = ComposingText()
|
||||
sequentialInput(&c, sequence: "kyouhaiitenkida", inputStyle: .roman2kana)
|
||||
let map = c.inputIndexToSurfaceIndexMap()
|
||||
@ -242,4 +243,19 @@ final class ComposingTextTests: XCTestCase {
|
||||
XCTAssertEqual(map[14], nil) // d
|
||||
XCTAssertEqual(map[15], 10) // a
|
||||
}
|
||||
do {
|
||||
var c = ComposingText()
|
||||
sequentialInput(&c, sequence: "sakujoshori", inputStyle: .roman2kana)
|
||||
let map = c.inputIndexToSurfaceIndexMap()
|
||||
let reversedMap = (0 ..< c.convertTarget.count + 1).compactMap {
|
||||
if map.values.contains($0) {
|
||||
String(c.convertTarget.prefix($0))
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
}
|
||||
XCTAssertFalse(reversedMap.contains("さくじ"))
|
||||
XCTAssertFalse(reversedMap.contains("さくじょし"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user