fix: now FullInputProcessing.swift natively works with new index system

This commit is contained in:
ensan-hcl
2025-07-14 21:17:33 +09:00
parent 7374b18eae
commit 16363be738
9 changed files with 103 additions and 70 deletions

View File

@ -32,17 +32,17 @@ extension Kana2Kanji {
let inputCount: Int = inputData.input.count let inputCount: Int = inputData.input.count
let surfaceCount = inputData.convertTarget.count let surfaceCount = inputData.convertTarget.count
let result: LatticeNode = LatticeNode.EOSNode let result: LatticeNode = LatticeNode.EOSNode
let i2sMap = inputData.inputIndexToSurfaceIndexMap() let i2sMap = LatticeDualIndexMap(inputData)
let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, inputIndexToSurfaceIndexMap: i2sMap) let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, map: i2sMap)
let rawNodes = latticeIndices.map { (iIndex, sIndex) in let rawNodes = latticeIndices.map { index in
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex { let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
(sIndex, nil) (sIndex, nil)
} else { } else {
nil nil
} }
return dicdataStore.getLOUDSDataInRange( return dicdataStore.getLOUDSDataInRange(
inputData: inputData, inputData: inputData,
from: iIndex, from: index.inputIndex,
surfaceRange: surfaceRange, surfaceRange: surfaceRange,
needTypoCorrection: needTypoCorrection needTypoCorrection: needTypoCorrection
) )
@ -72,11 +72,7 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue} node.values = node.prevs.map {$0.totalValue + wValue}
} }
// index // index
let nextIndex: (inputIndex: Int?, surfaceIndex: Int?) = switch node.range.endIndex { let nextIndex = i2sMap.dualIndex(for: node.range.endIndex)
case .input(let index): (index, i2sMap[index])
case .surface(let index): (i2sMap.filter { $0.value == index}.first?.key, index)
}
print(nextIndex, node.data.word, node.data.ruby)
// count // count
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount { if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
self.updateResultNode(with: node, resultNode: result) self.updateResultNode(with: node, resultNode: result)

View File

@ -36,7 +36,9 @@ struct Kana2Kanji {
let lastMid = data.clauses.last!.clause.mid let lastMid = data.clauses.last!.clause.mid
let composingCount: ComposingCount = data.clauses.reduce(into: .inputCount(0)) { let composingCount: ComposingCount = data.clauses.reduce(into: .inputCount(0)) {
$0 = .composite($0, $1.clause.range.count) for range in $1.clause.ranges {
$0 = .composite($0, range.count)
}
} }
return Candidate( return Candidate(
text: text, text: text,

View File

@ -12,6 +12,54 @@ struct LatticeNodeArray: Sequence {
} }
} }
struct LatticeDualIndexMap {
private(set) var inputIndexToSurfaceIndexMap: [Int: Int]
init(_ composingText: ComposingText) {
self.inputIndexToSurfaceIndexMap = composingText.inputIndexToSurfaceIndexMap()
}
enum DualIndex: Sendable, Equatable, Hashable {
case inputIndex(Int)
case surfaceIndex(Int)
case bothIndex(inputIndex: Int, surfaceIndex: Int)
var inputIndex: Int? {
switch self {
case .inputIndex(let index), .bothIndex(let index, _):
index
case .surfaceIndex:
nil
}
}
var surfaceIndex: Int? {
switch self {
case .inputIndex:
nil
case .surfaceIndex(let index), .bothIndex(_, let index):
index
}
}
}
func dualIndex(for latticeIndex: Lattice.LatticeIndex) -> DualIndex {
switch latticeIndex {
case .input(let iIndex):
if let sIndex = self.inputIndexToSurfaceIndexMap[iIndex] {
.bothIndex(inputIndex: iIndex, surfaceIndex: sIndex)
} else {
.inputIndex(iIndex)
}
case .surface(let sIndex):
if let iIndex = self.inputIndexToSurfaceIndexMap.filter({ $0.value == sIndex}).first?.key {
.bothIndex(inputIndex: iIndex, surfaceIndex: sIndex)
} else {
.surfaceIndex(sIndex)
}
}
}
}
struct Lattice: Sequence { struct Lattice: Sequence {
typealias Element = LatticeNodeArray typealias Element = LatticeNodeArray
@ -44,22 +92,22 @@ struct Lattice: Sequence {
private var inputIndexedNodes: [[LatticeNode]] private var inputIndexedNodes: [[LatticeNode]]
private var surfaceIndexedNodes: [[LatticeNode]] private var surfaceIndexedNodes: [[LatticeNode]]
static func indices(inputCount: Int, surfaceCount: Int, inputIndexToSurfaceIndexMap: [Int: Int]) -> [(inputIndex: Int?, surfaceIndex: Int?)] { static func indices(inputCount: Int, surfaceCount: Int, map: LatticeDualIndexMap) -> [LatticeDualIndexMap.DualIndex] {
var indices: [(inputIndex: Int?, surfaceIndex: Int?)] = [] var indices: [LatticeDualIndexMap.DualIndex] = []
var sIndexPointer = 0 var sIndexPointer = 0
for i in 0 ..< inputCount { for i in 0 ..< inputCount {
if let sIndex = inputIndexToSurfaceIndexMap[i] { if let sIndex = map.inputIndexToSurfaceIndexMap[i] {
for j in sIndexPointer ..< sIndex { for j in sIndexPointer ..< sIndex {
indices.append((nil, j)) indices.append(.surfaceIndex(j))
} }
indices.append((i, sIndex)) indices.append(.bothIndex(inputIndex: i, surfaceIndex: sIndex))
sIndexPointer = sIndex + 1 sIndexPointer = sIndex + 1
} else { } else {
indices.append((i, nil)) indices.append(.inputIndex(i))
} }
} }
for j in sIndexPointer ..< surfaceCount { for j in sIndexPointer ..< surfaceCount {
indices.append((nil, j)) indices.append(.surfaceIndex(j))
} }
return indices return indices
} }
@ -124,7 +172,7 @@ struct Lattice: Sequence {
} }
} }
subscript(index index: (inputIndex: Int?, surfaceIndex: Int?)) -> LatticeNodeArray { subscript(index index: LatticeDualIndexMap.DualIndex) -> LatticeNodeArray {
get { get {
let iNodes: [LatticeNode] = if let iIndex = index.inputIndex { self.inputIndexedNodes[iIndex] } else { [] } let iNodes: [LatticeNode] = if let iIndex = index.inputIndex { self.inputIndexedNodes[iIndex] } else { [] }
let sNodes: [LatticeNode] = if let sIndex = index.surfaceIndex { self.surfaceIndexedNodes[sIndex] } else { [] } let sNodes: [LatticeNode] = if let sIndex = index.surfaceIndex { self.surfaceIndexedNodes[sIndex] } else { [] }
@ -132,7 +180,7 @@ struct Lattice: Sequence {
} }
} }
func indexedNodes(indices: [(inputIndex: Int?, surfaceIndex: Int?)]) -> some Sequence<(isHead: Bool, nodes: LatticeNodeArray)> { func indexedNodes(indices: [LatticeDualIndexMap.DualIndex]) -> some Sequence<(isHead: Bool, nodes: LatticeNodeArray)> {
indices.lazy.map { index in indices.lazy.map { index in
return (index.inputIndex == 0 && index.surfaceIndex == 0, self[index: index]) return (index.inputIndex == 0 && index.surfaceIndex == 0, self[index: index])
} }
@ -221,25 +269,6 @@ struct Lattice: Sequence {
} }
} }
func merged(with other: Self) -> Self? {
return switch (self, other) {
case (let .surface(l, ml), let .surface(mr, r)):
if ml == mr {
.surface(from: l, to: r)
} else {
nil
}
case (let .input(l, ml), let .input(mr, r)):
if ml == mr {
.input(from: l, to: r)
} else {
nil
}
case (.surface, .input), (.input, .surface):
nil
}
}
func offseted(inputOffset: Int, surfaceOffset: Int) -> Self { func offseted(inputOffset: Int, surfaceOffset: Int) -> Self {
switch self { switch self {
case .surface(from: let from, to: let to): case .surface(from: let from, to: let to):

View File

@ -22,12 +22,15 @@ extension Kana2Kanji {
/// - note: /// - note:
/// ///
func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] { func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] {
debug("getPredictionCandidates", composingText, lastClause.range, lastClause.text) debug("getPredictionCandidates", composingText, lastClause.ranges, lastClause.text)
let lastRuby = switch lastClause.range { let lastRuby = lastClause.ranges.reduce(into: "") {
case let .input(left, right): let ruby = switch $1 {
ComposingText.getConvertTarget(for: composingText.input[left..<right]).toKatakana() case let .input(left, right):
case let .surface(left, right): ComposingText.getConvertTarget(for: composingText.input[left..<right]).toKatakana()
String(composingText.convertTarget.dropFirst(left).prefix(right - left)) case let .surface(left, right):
String(composingText.convertTarget.dropFirst(left).prefix(right - left))
}
$0.append(ruby)
} }
let lastRubyCount = lastRuby.count let lastRubyCount = lastRuby.count
let datas: [DicdataElement] let datas: [DicdataElement]

View File

@ -59,7 +59,7 @@ extension RegisteredNodeProtocol {
guard let prev else { guard let prev else {
let unit = ClauseDataUnit() let unit = ClauseDataUnit()
unit.mid = self.data.mid unit.mid = self.data.mid
unit.range = self.range unit.ranges = [self.range]
return CandidateData(clauses: [(clause: unit, value: .zero)], data: []) return CandidateData(clauses: [(clause: unit, value: .zero)], data: [])
} }
var lastcandidate = prev.getCandidateData() // registerd var lastcandidate = prev.getCandidateData() // registerd
@ -75,11 +75,7 @@ extension RegisteredNodeProtocol {
if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) { if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) {
// //
lastClause.text.append(self.data.word) lastClause.text.append(self.data.word)
if let newRange = lastClause.range.merged(with: self.range) { lastClause.ranges.append(self.range)
lastClause.range = newRange
} else {
fatalError("このケースは想定していません。")
}
// //
if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) { if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) {
lastClause.mid = self.data.mid lastClause.mid = self.data.mid
@ -92,7 +88,7 @@ extension RegisteredNodeProtocol {
else { else {
let unit = ClauseDataUnit() let unit = ClauseDataUnit()
unit.text = self.data.word unit.text = self.data.word
unit.range = self.range unit.ranges.append(self.range)
if DicdataStore.includeMMValueCalculation(self.data) { if DicdataStore.includeMMValueCalculation(self.data) {
unit.mid = self.data.mid unit.mid = self.data.mid
} }

View File

@ -17,32 +17,28 @@ final class ClauseDataUnit {
/// The text of the unit. /// The text of the unit.
var text: String = "" var text: String = ""
/// The range of the unit in input text. /// The range of the unit in input text.
var range: Lattice.LatticeRange = .zero var ranges: [Lattice.LatticeRange] = []
/// Merge the given unit to this unit. /// Merge the given unit to this unit.
/// - Parameter: /// - Parameter:
/// - unit: The unit to merge. /// - unit: The unit to merge.
func merge(with unit: ClauseDataUnit) { func merge(with unit: ClauseDataUnit) {
self.text.append(unit.text) self.text.append(unit.text)
if let newRange = self.range.merged(with: unit.range) { self.ranges.append(contentsOf: unit.ranges)
self.range = newRange
} else {
fatalError("このケースは想定していません。")
}
self.nextLcid = unit.nextLcid self.nextLcid = unit.nextLcid
} }
} }
extension ClauseDataUnit: Equatable { extension ClauseDataUnit: Equatable {
static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool { static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool {
lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.range == rhs.range lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.ranges == rhs.ranges
} }
} }
#if DEBUG #if DEBUG
extension ClauseDataUnit: CustomDebugStringConvertible { extension ClauseDataUnit: CustomDebugStringConvertible {
var debugDescription: String { var debugDescription: String {
"ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), range: \(range))" "ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), ranges: \(ranges))"
} }
} }
#endif #endif
@ -78,7 +74,18 @@ public enum ComposingCount: Equatable, Sendable {
case surfaceCount(Int) case surfaceCount(Int)
/// ///
indirect case composite(Self, Self) indirect case composite(lhs: Self, rhs: Self)
static func composite(_ lhs: Self, _ rhs: Self) -> Self {
switch (lhs, rhs) {
case (.inputCount(let l), .inputCount(let r)):
.inputCount(l + r)
case (.surfaceCount(let l), .surfaceCount(let r)):
.surfaceCount(l + r)
default:
.composite(lhs: lhs, rhs: rhs)
}
}
} }
/// ///

View File

@ -472,7 +472,7 @@ import EfficientNGram
return Candidate( return Candidate(
text: first.clause.text, text: first.clause.text,
value: first.value, value: first.value,
composingCount: first.clause.range.count, composingCount: first.clause.ranges.reduce(into: .inputCount(0)) { $0 = .composite($0, $1.count) },
lastMid: first.clause.mid, lastMid: first.clause.mid,
data: Array(candidateData.data[0...count]) data: Array(candidateData.data[0...count])
) )

View File

@ -14,19 +14,19 @@ final class ClauseDataUnitTests: XCTestCase {
do { do {
let unit1 = ClauseDataUnit() let unit1 = ClauseDataUnit()
unit1.text = "僕が" unit1.text = "僕が"
unit1.range = .input(from: 0, to: 3) unit1.ranges = [.input(from: 0, to: 3)]
unit1.mid = 0 unit1.mid = 0
unit1.nextLcid = 0 unit1.nextLcid = 0
let unit2 = ClauseDataUnit() let unit2 = ClauseDataUnit()
unit2.text = "走る" unit2.text = "走る"
unit2.range = .input(from: 3, to: 6) unit2.ranges = [.input(from: 3, to: 6)]
unit2.mid = 1 unit2.mid = 1
unit2.nextLcid = 1 unit2.nextLcid = 1
unit1.merge(with: unit2) unit1.merge(with: unit2)
XCTAssertEqual(unit1.text, "僕が走る") XCTAssertEqual(unit1.text, "僕が走る")
XCTAssertEqual(unit1.range, .input(from: 0, to: 6)) XCTAssertEqual(unit1.ranges, [.input(from: 0, to: 3), .input(from: 3, to: 6)])
XCTAssertEqual(unit1.nextLcid, 1) XCTAssertEqual(unit1.nextLcid, 1)
XCTAssertEqual(unit1.mid, 0) XCTAssertEqual(unit1.mid, 0)
} }
@ -34,19 +34,19 @@ final class ClauseDataUnitTests: XCTestCase {
do { do {
let unit1 = ClauseDataUnit() let unit1 = ClauseDataUnit()
unit1.text = "君は" unit1.text = "君は"
unit1.range = .input(from: 0, to: 3) unit1.ranges = [.input(from: 0, to: 3)]
unit1.mid = 0 unit1.mid = 0
unit1.nextLcid = 0 unit1.nextLcid = 0
let unit2 = ClauseDataUnit() let unit2 = ClauseDataUnit()
unit2.text = "笑った" unit2.text = "笑った"
unit2.range = .input(from: 3, to: 7) unit2.ranges = [.input(from: 3, to: 7)]
unit2.mid = 3 unit2.mid = 3
unit2.nextLcid = 3 unit2.nextLcid = 3
unit1.merge(with: unit2) unit1.merge(with: unit2)
XCTAssertEqual(unit1.text, "君は笑った") XCTAssertEqual(unit1.text, "君は笑った")
XCTAssertEqual(unit1.range, .input(from: 0, to: 7)) XCTAssertEqual(unit1.ranges, [.input(from: 0, to: 3), .input(from: 3, to: 7)])
XCTAssertEqual(unit1.nextLcid, 3) XCTAssertEqual(unit1.nextLcid, 3)
XCTAssertEqual(unit1.mid, 0) XCTAssertEqual(unit1.mid, 0)
} }

View File

@ -58,13 +58,13 @@ final class RegisteredNodeTests: XCTestCase {
let clause1 = ClauseDataUnit() let clause1 = ClauseDataUnit()
clause1.text = "我輩は" clause1.text = "我輩は"
clause1.nextLcid = CIDData..cid clause1.nextLcid = CIDData..cid
clause1.range = .input(from: 0, to: 5) clause1.ranges = [.input(from: 0, to: 5)]
clause1.mid = 1 clause1.mid = 1
let clause2 = ClauseDataUnit() let clause2 = ClauseDataUnit()
clause2.text = "猫です" clause2.text = "猫です"
clause2.nextLcid = CIDData.EOS.cid clause2.nextLcid = CIDData.EOS.cid
clause2.range = .input(from: 5, to: 9) clause2.ranges = [.input(from: 5, to: 9)]
clause2.mid = 3 clause2.mid = 3
let expectedResult: CandidateData = CandidateData( let expectedResult: CandidateData = CandidateData(