mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
fix: now FullInputProcessing.swift natively works with new index system
This commit is contained in:
@ -32,17 +32,17 @@ extension Kana2Kanji {
|
||||
let inputCount: Int = inputData.input.count
|
||||
let surfaceCount = inputData.convertTarget.count
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let i2sMap = inputData.inputIndexToSurfaceIndexMap()
|
||||
let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, inputIndexToSurfaceIndexMap: i2sMap)
|
||||
let rawNodes = latticeIndices.map { (iIndex, sIndex) in
|
||||
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex {
|
||||
let i2sMap = LatticeDualIndexMap(inputData)
|
||||
let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, map: i2sMap)
|
||||
let rawNodes = latticeIndices.map { index in
|
||||
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
|
||||
(sIndex, nil)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
return dicdataStore.getLOUDSDataInRange(
|
||||
inputData: inputData,
|
||||
from: iIndex,
|
||||
from: index.inputIndex,
|
||||
surfaceRange: surfaceRange,
|
||||
needTypoCorrection: needTypoCorrection
|
||||
)
|
||||
@ -72,11 +72,7 @@ extension Kana2Kanji {
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 後続ノードのindex(正規化する)
|
||||
let nextIndex: (inputIndex: Int?, surfaceIndex: Int?) = switch node.range.endIndex {
|
||||
case .input(let index): (index, i2sMap[index])
|
||||
case .surface(let index): (i2sMap.filter { $0.value == index}.first?.key, index)
|
||||
}
|
||||
print(nextIndex, node.data.word, node.data.ruby)
|
||||
let nextIndex = i2sMap.dualIndex(for: node.range.endIndex)
|
||||
// 文字数がcountと等しい場合登録する
|
||||
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
|
@ -36,7 +36,9 @@ struct Kana2Kanji {
|
||||
let lastMid = data.clauses.last!.clause.mid
|
||||
|
||||
let composingCount: ComposingCount = data.clauses.reduce(into: .inputCount(0)) {
|
||||
$0 = .composite($0, $1.clause.range.count)
|
||||
for range in $1.clause.ranges {
|
||||
$0 = .composite($0, range.count)
|
||||
}
|
||||
}
|
||||
return Candidate(
|
||||
text: text,
|
||||
|
@ -12,6 +12,54 @@ struct LatticeNodeArray: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
struct LatticeDualIndexMap {
|
||||
private(set) var inputIndexToSurfaceIndexMap: [Int: Int]
|
||||
init(_ composingText: ComposingText) {
|
||||
self.inputIndexToSurfaceIndexMap = composingText.inputIndexToSurfaceIndexMap()
|
||||
}
|
||||
|
||||
enum DualIndex: Sendable, Equatable, Hashable {
|
||||
case inputIndex(Int)
|
||||
case surfaceIndex(Int)
|
||||
case bothIndex(inputIndex: Int, surfaceIndex: Int)
|
||||
|
||||
var inputIndex: Int? {
|
||||
switch self {
|
||||
case .inputIndex(let index), .bothIndex(let index, _):
|
||||
index
|
||||
case .surfaceIndex:
|
||||
nil
|
||||
}
|
||||
}
|
||||
|
||||
var surfaceIndex: Int? {
|
||||
switch self {
|
||||
case .inputIndex:
|
||||
nil
|
||||
case .surfaceIndex(let index), .bothIndex(_, let index):
|
||||
index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func dualIndex(for latticeIndex: Lattice.LatticeIndex) -> DualIndex {
|
||||
switch latticeIndex {
|
||||
case .input(let iIndex):
|
||||
if let sIndex = self.inputIndexToSurfaceIndexMap[iIndex] {
|
||||
.bothIndex(inputIndex: iIndex, surfaceIndex: sIndex)
|
||||
} else {
|
||||
.inputIndex(iIndex)
|
||||
}
|
||||
case .surface(let sIndex):
|
||||
if let iIndex = self.inputIndexToSurfaceIndexMap.filter({ $0.value == sIndex}).first?.key {
|
||||
.bothIndex(inputIndex: iIndex, surfaceIndex: sIndex)
|
||||
} else {
|
||||
.surfaceIndex(sIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Lattice: Sequence {
|
||||
typealias Element = LatticeNodeArray
|
||||
|
||||
@ -44,22 +92,22 @@ struct Lattice: Sequence {
|
||||
private var inputIndexedNodes: [[LatticeNode]]
|
||||
private var surfaceIndexedNodes: [[LatticeNode]]
|
||||
|
||||
static func indices(inputCount: Int, surfaceCount: Int, inputIndexToSurfaceIndexMap: [Int: Int]) -> [(inputIndex: Int?, surfaceIndex: Int?)] {
|
||||
var indices: [(inputIndex: Int?, surfaceIndex: Int?)] = []
|
||||
static func indices(inputCount: Int, surfaceCount: Int, map: LatticeDualIndexMap) -> [LatticeDualIndexMap.DualIndex] {
|
||||
var indices: [LatticeDualIndexMap.DualIndex] = []
|
||||
var sIndexPointer = 0
|
||||
for i in 0 ..< inputCount {
|
||||
if let sIndex = inputIndexToSurfaceIndexMap[i] {
|
||||
if let sIndex = map.inputIndexToSurfaceIndexMap[i] {
|
||||
for j in sIndexPointer ..< sIndex {
|
||||
indices.append((nil, j))
|
||||
indices.append(.surfaceIndex(j))
|
||||
}
|
||||
indices.append((i, sIndex))
|
||||
indices.append(.bothIndex(inputIndex: i, surfaceIndex: sIndex))
|
||||
sIndexPointer = sIndex + 1
|
||||
} else {
|
||||
indices.append((i, nil))
|
||||
indices.append(.inputIndex(i))
|
||||
}
|
||||
}
|
||||
for j in sIndexPointer ..< surfaceCount {
|
||||
indices.append((nil, j))
|
||||
indices.append(.surfaceIndex(j))
|
||||
}
|
||||
return indices
|
||||
}
|
||||
@ -124,7 +172,7 @@ struct Lattice: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
subscript(index index: (inputIndex: Int?, surfaceIndex: Int?)) -> LatticeNodeArray {
|
||||
subscript(index index: LatticeDualIndexMap.DualIndex) -> LatticeNodeArray {
|
||||
get {
|
||||
let iNodes: [LatticeNode] = if let iIndex = index.inputIndex { self.inputIndexedNodes[iIndex] } else { [] }
|
||||
let sNodes: [LatticeNode] = if let sIndex = index.surfaceIndex { self.surfaceIndexedNodes[sIndex] } else { [] }
|
||||
@ -132,7 +180,7 @@ struct Lattice: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
func indexedNodes(indices: [(inputIndex: Int?, surfaceIndex: Int?)]) -> some Sequence<(isHead: Bool, nodes: LatticeNodeArray)> {
|
||||
func indexedNodes(indices: [LatticeDualIndexMap.DualIndex]) -> some Sequence<(isHead: Bool, nodes: LatticeNodeArray)> {
|
||||
indices.lazy.map { index in
|
||||
return (index.inputIndex == 0 && index.surfaceIndex == 0, self[index: index])
|
||||
}
|
||||
@ -221,25 +269,6 @@ struct Lattice: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
func merged(with other: Self) -> Self? {
|
||||
return switch (self, other) {
|
||||
case (let .surface(l, ml), let .surface(mr, r)):
|
||||
if ml == mr {
|
||||
.surface(from: l, to: r)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
case (let .input(l, ml), let .input(mr, r)):
|
||||
if ml == mr {
|
||||
.input(from: l, to: r)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
case (.surface, .input), (.input, .surface):
|
||||
nil
|
||||
}
|
||||
}
|
||||
|
||||
func offseted(inputOffset: Int, surfaceOffset: Int) -> Self {
|
||||
switch self {
|
||||
case .surface(from: let from, to: let to):
|
||||
|
@ -22,12 +22,15 @@ extension Kana2Kanji {
|
||||
/// - note:
|
||||
/// この関数の役割は意味連接の考慮にある。
|
||||
func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] {
|
||||
debug("getPredictionCandidates", composingText, lastClause.range, lastClause.text)
|
||||
let lastRuby = switch lastClause.range {
|
||||
case let .input(left, right):
|
||||
ComposingText.getConvertTarget(for: composingText.input[left..<right]).toKatakana()
|
||||
case let .surface(left, right):
|
||||
String(composingText.convertTarget.dropFirst(left).prefix(right - left))
|
||||
debug("getPredictionCandidates", composingText, lastClause.ranges, lastClause.text)
|
||||
let lastRuby = lastClause.ranges.reduce(into: "") {
|
||||
let ruby = switch $1 {
|
||||
case let .input(left, right):
|
||||
ComposingText.getConvertTarget(for: composingText.input[left..<right]).toKatakana()
|
||||
case let .surface(left, right):
|
||||
String(composingText.convertTarget.dropFirst(left).prefix(right - left))
|
||||
}
|
||||
$0.append(ruby)
|
||||
}
|
||||
let lastRubyCount = lastRuby.count
|
||||
let datas: [DicdataElement]
|
||||
|
@ -59,7 +59,7 @@ extension RegisteredNodeProtocol {
|
||||
guard let prev else {
|
||||
let unit = ClauseDataUnit()
|
||||
unit.mid = self.data.mid
|
||||
unit.range = self.range
|
||||
unit.ranges = [self.range]
|
||||
return CandidateData(clauses: [(clause: unit, value: .zero)], data: [])
|
||||
}
|
||||
var lastcandidate = prev.getCandidateData() // 自分に至るregisterdそれぞれのデータに処理
|
||||
@ -75,11 +75,7 @@ extension RegisteredNodeProtocol {
|
||||
if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) {
|
||||
// 文節ではないので、最後に追加する。
|
||||
lastClause.text.append(self.data.word)
|
||||
if let newRange = lastClause.range.merged(with: self.range) {
|
||||
lastClause.range = newRange
|
||||
} else {
|
||||
fatalError("このケースは想定していません。")
|
||||
}
|
||||
lastClause.ranges.append(self.range)
|
||||
// 最初だった場合を想定している
|
||||
if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) {
|
||||
lastClause.mid = self.data.mid
|
||||
@ -92,7 +88,7 @@ extension RegisteredNodeProtocol {
|
||||
else {
|
||||
let unit = ClauseDataUnit()
|
||||
unit.text = self.data.word
|
||||
unit.range = self.range
|
||||
unit.ranges.append(self.range)
|
||||
if DicdataStore.includeMMValueCalculation(self.data) {
|
||||
unit.mid = self.data.mid
|
||||
}
|
||||
|
@ -17,32 +17,28 @@ final class ClauseDataUnit {
|
||||
/// The text of the unit.
|
||||
var text: String = ""
|
||||
/// The range of the unit in input text.
|
||||
var range: Lattice.LatticeRange = .zero
|
||||
var ranges: [Lattice.LatticeRange] = []
|
||||
|
||||
/// Merge the given unit to this unit.
|
||||
/// - Parameter:
|
||||
/// - unit: The unit to merge.
|
||||
func merge(with unit: ClauseDataUnit) {
|
||||
self.text.append(unit.text)
|
||||
if let newRange = self.range.merged(with: unit.range) {
|
||||
self.range = newRange
|
||||
} else {
|
||||
fatalError("このケースは想定していません。")
|
||||
}
|
||||
self.ranges.append(contentsOf: unit.ranges)
|
||||
self.nextLcid = unit.nextLcid
|
||||
}
|
||||
}
|
||||
|
||||
extension ClauseDataUnit: Equatable {
|
||||
static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool {
|
||||
lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.range == rhs.range
|
||||
lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.ranges == rhs.ranges
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
extension ClauseDataUnit: CustomDebugStringConvertible {
|
||||
var debugDescription: String {
|
||||
"ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), range: \(range))"
|
||||
"ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), ranges: \(ranges))"
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -78,7 +74,18 @@ public enum ComposingCount: Equatable, Sendable {
|
||||
case surfaceCount(Int)
|
||||
|
||||
/// 複数のカウントの連結
|
||||
indirect case composite(Self, Self)
|
||||
indirect case composite(lhs: Self, rhs: Self)
|
||||
|
||||
static func composite(_ lhs: Self, _ rhs: Self) -> Self {
|
||||
switch (lhs, rhs) {
|
||||
case (.inputCount(let l), .inputCount(let r)):
|
||||
.inputCount(l + r)
|
||||
case (.surfaceCount(let l), .surfaceCount(let r)):
|
||||
.surfaceCount(l + r)
|
||||
default:
|
||||
.composite(lhs: lhs, rhs: rhs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 変換候補のデータ
|
||||
|
@ -472,7 +472,7 @@ import EfficientNGram
|
||||
return Candidate(
|
||||
text: first.clause.text,
|
||||
value: first.value,
|
||||
composingCount: first.clause.range.count,
|
||||
composingCount: first.clause.ranges.reduce(into: .inputCount(0)) { $0 = .composite($0, $1.count) },
|
||||
lastMid: first.clause.mid,
|
||||
data: Array(candidateData.data[0...count])
|
||||
)
|
||||
|
@ -14,19 +14,19 @@ final class ClauseDataUnitTests: XCTestCase {
|
||||
do {
|
||||
let unit1 = ClauseDataUnit()
|
||||
unit1.text = "僕が"
|
||||
unit1.range = .input(from: 0, to: 3)
|
||||
unit1.ranges = [.input(from: 0, to: 3)]
|
||||
unit1.mid = 0
|
||||
unit1.nextLcid = 0
|
||||
|
||||
let unit2 = ClauseDataUnit()
|
||||
unit2.text = "走る"
|
||||
unit2.range = .input(from: 3, to: 6)
|
||||
unit2.ranges = [.input(from: 3, to: 6)]
|
||||
unit2.mid = 1
|
||||
unit2.nextLcid = 1
|
||||
|
||||
unit1.merge(with: unit2)
|
||||
XCTAssertEqual(unit1.text, "僕が走る")
|
||||
XCTAssertEqual(unit1.range, .input(from: 0, to: 6))
|
||||
XCTAssertEqual(unit1.ranges, [.input(from: 0, to: 3), .input(from: 3, to: 6)])
|
||||
XCTAssertEqual(unit1.nextLcid, 1)
|
||||
XCTAssertEqual(unit1.mid, 0)
|
||||
}
|
||||
@ -34,19 +34,19 @@ final class ClauseDataUnitTests: XCTestCase {
|
||||
do {
|
||||
let unit1 = ClauseDataUnit()
|
||||
unit1.text = "君は"
|
||||
unit1.range = .input(from: 0, to: 3)
|
||||
unit1.ranges = [.input(from: 0, to: 3)]
|
||||
unit1.mid = 0
|
||||
unit1.nextLcid = 0
|
||||
|
||||
let unit2 = ClauseDataUnit()
|
||||
unit2.text = "笑った"
|
||||
unit2.range = .input(from: 3, to: 7)
|
||||
unit2.ranges = [.input(from: 3, to: 7)]
|
||||
unit2.mid = 3
|
||||
unit2.nextLcid = 3
|
||||
|
||||
unit1.merge(with: unit2)
|
||||
XCTAssertEqual(unit1.text, "君は笑った")
|
||||
XCTAssertEqual(unit1.range, .input(from: 0, to: 7))
|
||||
XCTAssertEqual(unit1.ranges, [.input(from: 0, to: 3), .input(from: 3, to: 7)])
|
||||
XCTAssertEqual(unit1.nextLcid, 3)
|
||||
XCTAssertEqual(unit1.mid, 0)
|
||||
}
|
||||
|
@ -58,13 +58,13 @@ final class RegisteredNodeTests: XCTestCase {
|
||||
let clause1 = ClauseDataUnit()
|
||||
clause1.text = "我輩は"
|
||||
clause1.nextLcid = CIDData.一般名詞.cid
|
||||
clause1.range = .input(from: 0, to: 5)
|
||||
clause1.ranges = [.input(from: 0, to: 5)]
|
||||
clause1.mid = 1
|
||||
|
||||
let clause2 = ClauseDataUnit()
|
||||
clause2.text = "猫です"
|
||||
clause2.nextLcid = CIDData.EOS.cid
|
||||
clause2.range = .input(from: 5, to: 9)
|
||||
clause2.ranges = [.input(from: 5, to: 9)]
|
||||
clause2.mid = 3
|
||||
|
||||
let expectedResult: CandidateData = CandidateData(
|
||||
|
Reference in New Issue
Block a user