mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
wip: test is not passing, but commit/push it for
working in another env
This commit is contained in:
@ -31,10 +31,35 @@ extension Kana2Kanji {
|
|||||||
let inputCount: Int = inputData.input.count
|
let inputCount: Int = inputData.input.count
|
||||||
let surfaceCount = inputData.convertTarget.count
|
let surfaceCount = inputData.convertTarget.count
|
||||||
let result: LatticeNode = LatticeNode.EOSNode
|
let result: LatticeNode = LatticeNode.EOSNode
|
||||||
|
let i2sMap = inputData.inputIndexToSurfaceIndexMap()
|
||||||
|
var rawNodes = (.zero ..< inputCount).map {
|
||||||
|
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = i2sMap[$0] {
|
||||||
|
(sIndex, nil)
|
||||||
|
} else {
|
||||||
|
nil
|
||||||
|
}
|
||||||
|
return dicdataStore.getLOUDSDataInRange(
|
||||||
|
inputData: inputData,
|
||||||
|
from: $0,
|
||||||
|
surfaceRange: surfaceRange,
|
||||||
|
needTypoCorrection: needTypoCorrection
|
||||||
|
)
|
||||||
|
}
|
||||||
|
for sIndex in 0 ..< inputData.convertTarget.count where !i2sMap.values.contains(sIndex) {
|
||||||
|
// inputIndexの列挙でカバーできないsIndexについて、追加で辞書を引いてrawNodesに追加
|
||||||
|
rawNodes.append(
|
||||||
|
dicdataStore.getLOUDSDataInRange(
|
||||||
|
inputData: inputData,
|
||||||
|
from: nil,
|
||||||
|
surfaceRange: (sIndex, nil),
|
||||||
|
needTypoCorrection: needTypoCorrection
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
let lattice: Lattice = Lattice(
|
let lattice: Lattice = Lattice(
|
||||||
inputCount: inputCount,
|
inputCount: inputCount,
|
||||||
surfaceCount: surfaceCount,
|
surfaceCount: surfaceCount,
|
||||||
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)}
|
rawNodes: rawNodes
|
||||||
)
|
)
|
||||||
// 「i文字目から始まるnodes」に対して
|
// 「i文字目から始まるnodes」に対して
|
||||||
for (i, nodeArray) in lattice.indexedNodes() {
|
for (i, nodeArray) in lattice.indexedNodes() {
|
||||||
@ -55,8 +80,12 @@ extension Kana2Kanji {
|
|||||||
// valuesを更新する
|
// valuesを更新する
|
||||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||||
}
|
}
|
||||||
// 変換した文字数
|
// 後続ノードのindex(正規化する)
|
||||||
let nextIndex = node.range.endIndex
|
let nextIndex: Lattice.LatticeIndex = switch node.range.endIndex {
|
||||||
|
case .input(let index): if let sIndex = i2sMap[index] { .surface(sIndex) } else { node.range.endIndex }
|
||||||
|
case .surface: node.range.endIndex
|
||||||
|
}
|
||||||
|
print(nextIndex, node.data.word, node.data.ruby, lattice[index: nextIndex].count)
|
||||||
// 文字数がcountと等しい場合登録する
|
// 文字数がcountと等しい場合登録する
|
||||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||||
self.updateResultNode(with: node, resultNode: result)
|
self.updateResultNode(with: node, resultNode: result)
|
||||||
|
@ -1,8 +1,19 @@
|
|||||||
import Algorithms
|
import Algorithms
|
||||||
import SwiftUtils
|
import SwiftUtils
|
||||||
|
|
||||||
|
struct LatticeNodeArray: Sequence {
|
||||||
|
typealias Element = LatticeNode
|
||||||
|
|
||||||
|
var inputIndexedNodes: [LatticeNode]
|
||||||
|
var surfaceIndexedNodes: [LatticeNode]
|
||||||
|
|
||||||
|
func makeIterator() -> Chain2Sequence<[LatticeNode], [LatticeNode]>.Iterator {
|
||||||
|
inputIndexedNodes.chained(surfaceIndexedNodes).makeIterator()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct Lattice: Sequence {
|
struct Lattice: Sequence {
|
||||||
typealias Element = [LatticeNode]
|
typealias Element = LatticeNodeArray
|
||||||
|
|
||||||
init() {
|
init() {
|
||||||
self.inputIndexedNodes = []
|
self.inputIndexedNodes = []
|
||||||
@ -15,11 +26,12 @@ struct Lattice: Sequence {
|
|||||||
|
|
||||||
for nodes in rawNodes {
|
for nodes in rawNodes {
|
||||||
guard let first = nodes.first else { continue }
|
guard let first = nodes.first else { continue }
|
||||||
|
print(nodes.mapSet { $0.range.startIndex }, nodes.count)
|
||||||
switch first.range.startIndex {
|
switch first.range.startIndex {
|
||||||
case .surface(let i):
|
case .surface(let i):
|
||||||
self.surfaceIndexedNodes[i] = nodes
|
self.surfaceIndexedNodes[i].append(contentsOf: nodes)
|
||||||
case .input(let i):
|
case .input(let i):
|
||||||
self.inputIndexedNodes[i] = nodes
|
self.inputIndexedNodes[i].append(contentsOf: nodes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -97,15 +109,42 @@ struct Lattice: Sequence {
|
|||||||
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })
|
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeIterator() -> Chain2Sequence<[[LatticeNode]], [[LatticeNode]]>.Iterator {
|
struct Iterator: IteratorProtocol {
|
||||||
self.inputIndexedNodes.chained(self.surfaceIndexedNodes).makeIterator()
|
init(lattice: Lattice) {
|
||||||
|
self.lattice = lattice
|
||||||
|
self.indices = (0, lattice.surfaceIndexedNodes.endIndex, 0, lattice.inputIndexedNodes.endIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
typealias Element = LatticeNodeArray
|
||||||
|
let lattice: Lattice
|
||||||
|
var indices: (currentSurfaceIndex: Int, surfaceEndIndex: Int, currentInputIndex: Int, inputEndIndex: Int)
|
||||||
|
|
||||||
|
mutating func next() -> LatticeNodeArray? {
|
||||||
|
if self.indices.currentSurfaceIndex < self.indices.surfaceEndIndex {
|
||||||
|
defer {
|
||||||
|
self.indices.currentSurfaceIndex += 1
|
||||||
|
}
|
||||||
|
return .init(inputIndexedNodes: [], surfaceIndexedNodes: self.lattice.surfaceIndexedNodes[self.indices.currentSurfaceIndex])
|
||||||
|
} else if self.indices.currentInputIndex < self.indices.inputEndIndex {
|
||||||
|
defer {
|
||||||
|
self.indices.currentInputIndex += 1
|
||||||
|
}
|
||||||
|
return .init(inputIndexedNodes: self.lattice.inputIndexedNodes[self.indices.currentInputIndex], surfaceIndexedNodes: [])
|
||||||
|
} else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeIterator() -> Iterator {
|
||||||
|
Iterator(lattice: self)
|
||||||
}
|
}
|
||||||
|
|
||||||
var isEmpty: Bool {
|
var isEmpty: Bool {
|
||||||
self.inputIndexedNodes.isEmpty && self.surfaceIndexedNodes.isEmpty
|
self.inputIndexedNodes.isEmpty && self.surfaceIndexedNodes.isEmpty
|
||||||
}
|
}
|
||||||
|
|
||||||
enum LatticeIndex: Sendable, Equatable {
|
enum LatticeIndex: Sendable, Equatable, Hashable {
|
||||||
case surface(Int)
|
case surface(Int)
|
||||||
case input(Int)
|
case input(Int)
|
||||||
|
|
||||||
@ -114,7 +153,7 @@ struct Lattice: Sequence {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum LatticeRange: Sendable, Equatable {
|
enum LatticeRange: Sendable, Equatable, Hashable {
|
||||||
static var zero: Self {
|
static var zero: Self {
|
||||||
.input(from: 0, to: 0)
|
.input(from: 0, to: 0)
|
||||||
}
|
}
|
||||||
@ -149,7 +188,7 @@ struct Lattice: Sequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func merged(with other: Self) -> Self? {
|
func merged(with other: Self) -> Self? {
|
||||||
switch (self, other) {
|
return switch (self, other) {
|
||||||
case (let .surface(l, ml), let .surface(mr, r)):
|
case (let .surface(l, ml), let .surface(mr, r)):
|
||||||
if ml == mr {
|
if ml == mr {
|
||||||
.surface(from: l, to: r)
|
.surface(from: l, to: r)
|
||||||
|
@ -40,6 +40,6 @@ public final class LatticeNode {
|
|||||||
/// - Returns: 文節単位の区切り情報を持った変換候補データのリスト。
|
/// - Returns: 文節単位の区切り情報を持った変換候補データのリスト。
|
||||||
/// - Note: 最終的に`EOS`ノードにおいて実行する想定のAPIになっている。
|
/// - Note: 最終的に`EOS`ノードにおいて実行する想定のAPIになっている。
|
||||||
func getCandidateData() -> [CandidateData] {
|
func getCandidateData() -> [CandidateData] {
|
||||||
self.prevs.map {$0.getCandidateData()}
|
return self.prevs.map {$0.getCandidateData()}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -242,20 +242,93 @@ public final class DicdataStore {
|
|||||||
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
|
return [louds.searchNodeIndex(chars: charIDs)].compactMap {$0}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private struct UnifiedGenerator {
|
||||||
|
struct SurfaceGenerator {
|
||||||
|
var surface: [Character] = []
|
||||||
|
var range: TypoCorrectionGenerator.ProcessRange
|
||||||
|
var currentIndex: Int
|
||||||
|
|
||||||
|
init(surface: [Character], range: TypoCorrectionGenerator.ProcessRange) {
|
||||||
|
self.surface = surface
|
||||||
|
self.range = range
|
||||||
|
self.currentIndex = range.rightIndexRange.lowerBound
|
||||||
|
}
|
||||||
|
|
||||||
|
mutating func setUnreachablePath<C: Collection<Character>>(target: C) where C.Indices == Range<Int> {
|
||||||
|
if self.surface[self.range.leftIndex...].hasPrefix(target) {
|
||||||
|
// new upper boundを計算
|
||||||
|
let currentLowerBound = self.range.rightIndexRange.lowerBound
|
||||||
|
let currentUpperBound = self.range.rightIndexRange.upperBound
|
||||||
|
let targetUpperBound = self.range.leftIndex + target.indices.upperBound
|
||||||
|
self.range.rightIndexRange = min(currentLowerBound, targetUpperBound) ..< min(currentUpperBound, targetUpperBound)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
|
||||||
|
if self.surface.indices.contains(self.currentIndex), self.currentIndex < self.range.rightIndexRange.upperBound {
|
||||||
|
defer {
|
||||||
|
self.currentIndex += 1
|
||||||
|
}
|
||||||
|
let characters = Array(self.surface[self.range.leftIndex ... self.currentIndex])
|
||||||
|
return (characters, (.surface(self.currentIndex), 0))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var typoCorrectionGenerator: TypoCorrectionGenerator? = nil
|
||||||
|
var surfaceGenerator: SurfaceGenerator? = nil
|
||||||
|
|
||||||
|
mutating func register(_ generator: TypoCorrectionGenerator) {
|
||||||
|
self.typoCorrectionGenerator = generator
|
||||||
|
}
|
||||||
|
mutating func register(_ generator: SurfaceGenerator) {
|
||||||
|
self.surfaceGenerator = generator
|
||||||
|
}
|
||||||
|
mutating func setUnreachablePath<C: Collection<Character>>(target: C) where C.Indices == Range<Int> {
|
||||||
|
self.typoCorrectionGenerator?.setUnreachablePath(target: target)
|
||||||
|
self.surfaceGenerator?.setUnreachablePath(target: target)
|
||||||
|
}
|
||||||
|
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
|
||||||
|
if let next = self.surfaceGenerator?.next() {
|
||||||
|
return next
|
||||||
|
}
|
||||||
|
if let next = self.typoCorrectionGenerator?.next() {
|
||||||
|
return next
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func movingTowardPrefixSearch(
|
func movingTowardPrefixSearch(
|
||||||
inputs: [ComposingText.InputElement],
|
composingText: ComposingText,
|
||||||
leftIndex: Int,
|
inputProcessRange: TypoCorrectionGenerator.ProcessRange?,
|
||||||
rightIndexRange: Range<Int>,
|
surfaceProcessRange: TypoCorrectionGenerator.ProcessRange?,
|
||||||
useMemory: Bool,
|
useMemory: Bool,
|
||||||
needTypoCorrection: Bool
|
needTypoCorrection: Bool
|
||||||
) -> (
|
) -> (
|
||||||
stringToInfo: [[Character]: (endIndex: Int, penalty: PValue)],
|
stringToInfo: [[Character]: (endIndex: Lattice.LatticeIndex, penalty: PValue)],
|
||||||
indices: [(key: String, indices: [Int])],
|
indices: [(key: String, indices: [Int])],
|
||||||
temporaryMemoryDicdata: [DicdataElement]
|
temporaryMemoryDicdata: [DicdataElement]
|
||||||
) {
|
) {
|
||||||
var generator = TypoCorrectionGenerator(inputs: inputs, leftIndex: leftIndex, rightIndexRange: rightIndexRange, needTypoCorrection: needTypoCorrection)
|
var generator = UnifiedGenerator()
|
||||||
|
if let surfaceProcessRange {
|
||||||
|
let surfaceGenerator = UnifiedGenerator.SurfaceGenerator(
|
||||||
|
surface: Array(composingText.convertTarget.toKatakana()),
|
||||||
|
range: surfaceProcessRange
|
||||||
|
)
|
||||||
|
generator.register(surfaceGenerator)
|
||||||
|
}
|
||||||
|
if let inputProcessRange {
|
||||||
|
let typoCorrectionGenerator = TypoCorrectionGenerator(
|
||||||
|
inputs: composingText.input,
|
||||||
|
range: inputProcessRange,
|
||||||
|
needTypoCorrection: needTypoCorrection
|
||||||
|
)
|
||||||
|
generator.register(typoCorrectionGenerator)
|
||||||
|
}
|
||||||
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
|
var targetLOUDS: [String: LOUDS.MovingTowardPrefixSearchHelper] = [:]
|
||||||
var stringToInfo: [([Character], (endIndex: Int, penalty: PValue))] = []
|
var stringToInfo: [([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))] = []
|
||||||
// 動的辞書(一時学習データ、動的ユーザ辞書)から取り出されたデータ
|
// 動的辞書(一時学習データ、動的ユーザ辞書)から取り出されたデータ
|
||||||
var dynamicDicdata: [Int: [DicdataElement]] = [:]
|
var dynamicDicdata: [Int: [DicdataElement]] = [:]
|
||||||
// ジェネレータを舐める
|
// ジェネレータを舐める
|
||||||
@ -332,8 +405,25 @@ public final class DicdataStore {
|
|||||||
}
|
}
|
||||||
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
|
let minCount = stringToInfo.map {$0.0.count}.min() ?? 0
|
||||||
return (
|
return (
|
||||||
Dictionary(stringToInfo, uniquingKeysWith: {$0.penalty < $1.penalty ? $1 : $0}),
|
Dictionary(
|
||||||
targetLOUDS.map { ($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max) )},
|
stringToInfo,
|
||||||
|
uniquingKeysWith: { (lhs, rhs) in
|
||||||
|
if lhs.penalty < rhs.penalty {
|
||||||
|
return lhs
|
||||||
|
} else if lhs.penalty == rhs.penalty {
|
||||||
|
return switch (lhs.endIndex, rhs.endIndex) {
|
||||||
|
case (.input, .input), (.surface, .surface): lhs // どっちでもいい
|
||||||
|
case (.surface, .input): lhs // surfaceIndexを優先
|
||||||
|
case (.input, .surface): rhs // surfaceIndexを優先
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return rhs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
),
|
||||||
|
targetLOUDS.map {
|
||||||
|
($0.key, $0.value.indicesInDepth(depth: minCount - 1 ..< .max))
|
||||||
|
},
|
||||||
dynamicDicdata.flatMap {
|
dynamicDicdata.flatMap {
|
||||||
minCount < $0.key + 1 ? $0.value : []
|
minCount < $0.key + 1 ? $0.value : []
|
||||||
}
|
}
|
||||||
@ -381,24 +471,64 @@ public final class DicdataStore {
|
|||||||
/// - inputData: 入力データ
|
/// - inputData: 入力データ
|
||||||
/// - from: 起点
|
/// - from: 起点
|
||||||
/// - toIndexRange: `from ..< (toIndexRange)`の範囲で辞書ルックアップを行う。
|
/// - toIndexRange: `from ..< (toIndexRange)`の範囲で辞書ルックアップを行う。
|
||||||
public func getLOUDSDataInRange(inputData: ComposingText, from fromIndex: Int, toIndexRange: Range<Int>? = nil, needTypoCorrection: Bool = true) -> [LatticeNode] {
|
public func getLOUDSDataInRange(
|
||||||
let toIndexLeft = toIndexRange?.startIndex ?? fromIndex
|
inputData: ComposingText,
|
||||||
let toIndexRight = min(toIndexRange?.endIndex ?? inputData.input.count, fromIndex + self.maxlength)
|
from fromInputIndex: Int?,
|
||||||
if fromIndex > toIndexLeft || toIndexLeft >= toIndexRight {
|
toIndexRange: Range<Int>? = nil,
|
||||||
debug(#function, "index is wrong")
|
surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = nil,
|
||||||
return []
|
needTypoCorrection: Bool = true
|
||||||
|
) -> [LatticeNode] {
|
||||||
|
let inputProcessRange: TypoCorrectionGenerator.ProcessRange?
|
||||||
|
|
||||||
|
// TODO: make `fromInputIndex` optional later.
|
||||||
|
if let fromInputIndex {
|
||||||
|
let toInputIndexLeft = toIndexRange?.startIndex ?? fromInputIndex
|
||||||
|
let toInputIndexRight = min(
|
||||||
|
toIndexRange?.endIndex ?? inputData.input.count,
|
||||||
|
fromInputIndex + self.maxlength
|
||||||
|
)
|
||||||
|
if fromInputIndex > toInputIndexLeft || toInputIndexLeft >= toInputIndexRight {
|
||||||
|
debug(#function, "index is wrong")
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
inputProcessRange = .init(leftIndex: fromInputIndex, rightIndexRange: toInputIndexLeft ..< toInputIndexRight)
|
||||||
|
} else {
|
||||||
|
inputProcessRange = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
let segments = (fromIndex ..< toIndexRight).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
|
let surfaceProcessRange: TypoCorrectionGenerator.ProcessRange?
|
||||||
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
|
if let surfaceRange {
|
||||||
|
let toSurfaceIndexLeft = surfaceRange.endIndexRange?.startIndex ?? surfaceRange.startIndex
|
||||||
|
let toSurfaceIndexRight = min(
|
||||||
|
surfaceRange.endIndexRange?.endIndex ?? inputData.convertTarget.count,
|
||||||
|
surfaceRange.startIndex + self.maxlength
|
||||||
|
)
|
||||||
|
if surfaceRange.startIndex > toSurfaceIndexLeft || toSurfaceIndexLeft >= toSurfaceIndexRight {
|
||||||
|
debug(#function, "index is wrong")
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
surfaceProcessRange = .init(leftIndex: surfaceRange.startIndex, rightIndexRange: toSurfaceIndexLeft ..< toSurfaceIndexRight)
|
||||||
|
} else {
|
||||||
|
surfaceProcessRange = nil
|
||||||
|
}
|
||||||
|
if inputProcessRange == nil && surfaceProcessRange == nil {
|
||||||
|
debug(#function, "either of inputProcessRange and surfaceProcessRange must not be nil")
|
||||||
|
return []
|
||||||
}
|
}
|
||||||
// MARK: 誤り訂正の対象を列挙する。非常に重い処理。
|
// MARK: 誤り訂正の対象を列挙する。非常に重い処理。
|
||||||
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(inputs: inputData.input, leftIndex: fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight, useMemory: self.learningManager.enabled, needTypoCorrection: needTypoCorrection)
|
var (stringToInfo, indices, dicdata) = self.movingTowardPrefixSearch(
|
||||||
|
composingText: inputData,
|
||||||
|
inputProcessRange: inputProcessRange,
|
||||||
|
surfaceProcessRange: surfaceProcessRange,
|
||||||
|
useMemory: self.learningManager.enabled,
|
||||||
|
needTypoCorrection: needTypoCorrection
|
||||||
|
)
|
||||||
|
print(stringToInfo)
|
||||||
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
||||||
for (identifier, value) in indices {
|
for (identifier, value) in indices {
|
||||||
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
||||||
let rubyArray = Array(data.ruby)
|
let rubyArray = Array(data.ruby)
|
||||||
let penalty = stringToInfo[rubyArray, default: (0, .zero)].penalty
|
let penalty = stringToInfo[rubyArray]?.penalty ?? 0
|
||||||
if penalty.isZero {
|
if penalty.isZero {
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
@ -413,34 +543,40 @@ public final class DicdataStore {
|
|||||||
dicdata.append(contentsOf: result)
|
dicdata.append(contentsOf: result)
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in toIndexLeft ..< toIndexRight {
|
if let inputProcessRange {
|
||||||
do {
|
let segments = (inputProcessRange.leftIndex ..< inputProcessRange.rightIndexRange.endIndex).reduce(into: []) { (segments: inout [String], rightIndex: Int) in
|
||||||
let result = self.getWiseDicdata(convertTarget: segments[i - fromIndex], inputData: inputData, inputRange: fromIndex ..< i + 1)
|
segments.append((segments.last ?? "") + String(inputData.input[rightIndex].character.toKatakana()))
|
||||||
for item in result {
|
}
|
||||||
stringToInfo[Array(item.ruby)] = (i, 0)
|
for i in inputProcessRange.rightIndexRange {
|
||||||
|
do {
|
||||||
|
let result = self.getWiseDicdata(
|
||||||
|
convertTarget: segments[i - inputProcessRange.leftIndex],
|
||||||
|
inputData: inputData,
|
||||||
|
inputRange: inputProcessRange.leftIndex ..< i + 1
|
||||||
|
)
|
||||||
|
for item in result {
|
||||||
|
stringToInfo[Array(item.ruby)] = (.input(i), 0)
|
||||||
|
}
|
||||||
|
dicdata.append(contentsOf: result)
|
||||||
}
|
}
|
||||||
dicdata.append(contentsOf: result)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if fromIndex == .zero {
|
let needBOS = fromInputIndex == .zero
|
||||||
let result: [LatticeNode] = dicdata.compactMap {
|
let result: [LatticeNode] = dicdata.compactMap {
|
||||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
let node = LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
|
let range: Lattice.LatticeRange = switch endIndex {
|
||||||
|
case .input(let endIndex): .input(from: fromInputIndex!, to: endIndex + 1)
|
||||||
|
case .surface(let endIndex): .surface(from: (surfaceRange?.startIndex)!, to: endIndex + 1)
|
||||||
|
}
|
||||||
|
let node = LatticeNode(data: $0, range: range)
|
||||||
|
if needBOS {
|
||||||
node.prevs.append(RegisteredNode.BOSNode())
|
node.prevs.append(RegisteredNode.BOSNode())
|
||||||
return node
|
|
||||||
}
|
}
|
||||||
return result
|
return node
|
||||||
} else {
|
|
||||||
let result: [LatticeNode] = dicdata.compactMap {
|
|
||||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func getZeroHintPredictionDicdata(lastRcid: Int) -> [DicdataElement] {
|
func getZeroHintPredictionDicdata(lastRcid: Int) -> [DicdataElement] {
|
||||||
|
@ -1,13 +1,12 @@
|
|||||||
import SwiftUtils
|
import SwiftUtils
|
||||||
|
|
||||||
struct TypoCorrectionGenerator: Sendable {
|
struct TypoCorrectionGenerator: Sendable {
|
||||||
init(inputs: [ComposingText.InputElement], leftIndex left: Int, rightIndexRange: Range<Int>, needTypoCorrection: Bool) {
|
init(inputs: [ComposingText.InputElement], range: ProcessRange, needTypoCorrection: Bool) {
|
||||||
self.maxPenalty = needTypoCorrection ? 3.5 * 3 : 0
|
self.maxPenalty = needTypoCorrection ? 3.5 * 3 : 0
|
||||||
self.inputs = inputs
|
self.inputs = inputs
|
||||||
self.left = left
|
self.range = range
|
||||||
self.rightIndexRange = rightIndexRange
|
|
||||||
|
|
||||||
let count = rightIndexRange.endIndex - left
|
let count = self.range.rightIndexRange.endIndex - range.leftIndex
|
||||||
self.count = count
|
self.count = count
|
||||||
self.nodes = (0..<count).map {(i: Int) in
|
self.nodes = (0..<count).map {(i: Int) in
|
||||||
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
|
Self.lengths.flatMap {(k: Int) -> [TypoCandidate] in
|
||||||
@ -15,7 +14,7 @@ struct TypoCorrectionGenerator: Sendable {
|
|||||||
if count <= j {
|
if count <= j {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
return Self.getTypo(inputs[left + i ... left + j], frozen: !needTypoCorrection)
|
return Self.getTypo(inputs[range.leftIndex + i ... range.leftIndex + j], frozen: !needTypoCorrection)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 深さ優先で列挙する
|
// 深さ優先で列挙する
|
||||||
@ -23,7 +22,7 @@ struct TypoCorrectionGenerator: Sendable {
|
|||||||
guard let firstElement = typoCandidate.inputElements.first else {
|
guard let firstElement = typoCandidate.inputElements.first else {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: left) {
|
if ComposingText.isLeftSideValid(first: firstElement, of: inputs, from: range.leftIndex) {
|
||||||
var convertTargetElements = [ComposingText.ConvertTargetElement]()
|
var convertTargetElements = [ComposingText.ConvertTargetElement]()
|
||||||
for element in typoCandidate.inputElements {
|
for element in typoCandidate.inputElements {
|
||||||
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
ComposingText.updateConvertTargetElements(currentElements: &convertTargetElements, newElement: element)
|
||||||
@ -36,11 +35,15 @@ struct TypoCorrectionGenerator: Sendable {
|
|||||||
|
|
||||||
let maxPenalty: PValue
|
let maxPenalty: PValue
|
||||||
let inputs: [ComposingText.InputElement]
|
let inputs: [ComposingText.InputElement]
|
||||||
let left: Int
|
let range: ProcessRange
|
||||||
let rightIndexRange: Range<Int>
|
|
||||||
let nodes: [[TypoCandidate]]
|
let nodes: [[TypoCandidate]]
|
||||||
let count: Int
|
let count: Int
|
||||||
|
|
||||||
|
struct ProcessRange: Sendable, Equatable {
|
||||||
|
var leftIndex: Int
|
||||||
|
var rightIndexRange: Range<Int>
|
||||||
|
}
|
||||||
|
|
||||||
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]
|
var stack: [(convertTargetElements: [ComposingText.ConvertTargetElement], lastElement: ComposingText.InputElement, count: Int, penalty: PValue)]
|
||||||
|
|
||||||
/// `target`で始まる場合は到達不可能であることを知らせる
|
/// `target`で始まる場合は到達不可能であることを知らせる
|
||||||
@ -75,12 +78,12 @@ struct TypoCorrectionGenerator: Sendable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func next() -> ([Character], (endIndex: Int, penalty: PValue))? {
|
mutating func next() -> ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? {
|
||||||
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
|
while let (convertTargetElements, lastElement, count, penalty) = self.stack.popLast() {
|
||||||
var result: ([Character], (endIndex: Int, penalty: PValue))? = nil
|
var result: ([Character], (endIndex: Lattice.LatticeIndex, penalty: PValue))? = nil
|
||||||
if rightIndexRange.contains(count + left - 1) {
|
if self.range.rightIndexRange.contains(count + self.range.leftIndex - 1) {
|
||||||
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + left, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
if let convertTarget = ComposingText.getConvertTargetIfRightSideIsValid(lastElement: lastElement, of: inputs, to: count + self.range.leftIndex, convertTargetElements: convertTargetElements)?.map({$0.toKatakana()}) {
|
||||||
result = (convertTarget, (count + left - 1, penalty))
|
result = (convertTarget, (.input(count + self.range.leftIndex - 1), penalty))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// エスケープ
|
// エスケープ
|
||||||
@ -94,7 +97,7 @@ struct TypoCorrectionGenerator: Sendable {
|
|||||||
// 訂正数上限(3個)
|
// 訂正数上限(3個)
|
||||||
if penalty >= maxPenalty {
|
if penalty >= maxPenalty {
|
||||||
var convertTargetElements = convertTargetElements
|
var convertTargetElements = convertTargetElements
|
||||||
let correct = [inputs[left + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
let correct = [inputs[self.range.leftIndex + count]].map {ComposingText.InputElement(character: $0.character.toKatakana(), inputStyle: $0.inputStyle)}
|
||||||
if count + correct.count > self.nodes.endIndex {
|
if count + correct.count > self.nodes.endIndex {
|
||||||
if let result {
|
if let result {
|
||||||
return result
|
return result
|
||||||
|
@ -218,28 +218,44 @@ final class ComposingTextTests: XCTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func testIndexMap() throws {
|
func testIndexMap() throws {
|
||||||
var c = ComposingText()
|
do {
|
||||||
sequentialInput(&c, sequence: "kyouhaiitenkida", inputStyle: .roman2kana)
|
var c = ComposingText()
|
||||||
let map = c.inputIndexToSurfaceIndexMap()
|
sequentialInput(&c, sequence: "kyouhaiitenkida", inputStyle: .roman2kana)
|
||||||
|
let map = c.inputIndexToSurfaceIndexMap()
|
||||||
|
|
||||||
// Note: 現在の実装では、アドホックな対処によってnが"ん"に切り替わる
|
// Note: 現在の実装では、アドホックな対処によってnが"ん"に切り替わる
|
||||||
XCTAssertEqual(c.input[10], .init(character: "ん", inputStyle: .direct))
|
XCTAssertEqual(c.input[10], .init(character: "ん", inputStyle: .direct))
|
||||||
|
|
||||||
XCTAssertEqual(map[0], 0) // ""
|
XCTAssertEqual(map[0], 0) // ""
|
||||||
XCTAssertEqual(map[1], nil) // k
|
XCTAssertEqual(map[1], nil) // k
|
||||||
XCTAssertEqual(map[2], nil) // y
|
XCTAssertEqual(map[2], nil) // y
|
||||||
XCTAssertEqual(map[3], 2) // o
|
XCTAssertEqual(map[3], 2) // o
|
||||||
XCTAssertEqual(map[4], 3) // u
|
XCTAssertEqual(map[4], 3) // u
|
||||||
XCTAssertEqual(map[5], nil) // h
|
XCTAssertEqual(map[5], nil) // h
|
||||||
XCTAssertEqual(map[6], 4) // a
|
XCTAssertEqual(map[6], 4) // a
|
||||||
XCTAssertEqual(map[7], 5) // i
|
XCTAssertEqual(map[7], 5) // i
|
||||||
XCTAssertEqual(map[8], 6) // i
|
XCTAssertEqual(map[8], 6) // i
|
||||||
XCTAssertEqual(map[9], nil) // t
|
XCTAssertEqual(map[9], nil) // t
|
||||||
XCTAssertEqual(map[10], 7) // e
|
XCTAssertEqual(map[10], 7) // e
|
||||||
XCTAssertEqual(map[11], 8) // n // アドホックな対処の影響。nの場合はnilであるべき。
|
XCTAssertEqual(map[11], 8) // n // アドホックな対処の影響。nの場合はnilであるべき。
|
||||||
XCTAssertEqual(map[12], nil) // k
|
XCTAssertEqual(map[12], nil) // k
|
||||||
XCTAssertEqual(map[13], 9) // i
|
XCTAssertEqual(map[13], 9) // i
|
||||||
XCTAssertEqual(map[14], nil) // d
|
XCTAssertEqual(map[14], nil) // d
|
||||||
XCTAssertEqual(map[15], 10) // a
|
XCTAssertEqual(map[15], 10) // a
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
var c = ComposingText()
|
||||||
|
sequentialInput(&c, sequence: "sakujoshori", inputStyle: .roman2kana)
|
||||||
|
let map = c.inputIndexToSurfaceIndexMap()
|
||||||
|
let reversedMap = (0 ..< c.convertTarget.count + 1).compactMap {
|
||||||
|
if map.values.contains($0) {
|
||||||
|
String(c.convertTarget.prefix($0))
|
||||||
|
} else {
|
||||||
|
nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
XCTAssertFalse(reversedMap.contains("さくじ"))
|
||||||
|
XCTAssertFalse(reversedMap.contains("さくじょし"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user