wip 前進

This commit is contained in:
Miwa / Ensan
2025-07-14 01:52:54 +09:00
parent 02fcdd4dc1
commit 7374b18eae
2 changed files with 47 additions and 22 deletions

View File

@ -6,6 +6,7 @@
// Copyright © 2020 ensan. All rights reserved.
//
import Algorithms
import Foundation
import SwiftUtils
@ -32,37 +33,27 @@ extension Kana2Kanji {
let surfaceCount = inputData.convertTarget.count
let result: LatticeNode = LatticeNode.EOSNode
let i2sMap = inputData.inputIndexToSurfaceIndexMap()
var rawNodes = (.zero ..< inputCount).map {
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = i2sMap[$0] {
let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, inputIndexToSurfaceIndexMap: i2sMap)
let rawNodes = latticeIndices.map { (iIndex, sIndex) in
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex {
(sIndex, nil)
} else {
nil
}
return dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: $0,
from: iIndex,
surfaceRange: surfaceRange,
needTypoCorrection: needTypoCorrection
)
}
for sIndex in 0 ..< inputData.convertTarget.count where !i2sMap.values.contains(sIndex) {
// inputIndexsIndexrawNodes
rawNodes.append(
dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: nil,
surfaceRange: (sIndex, nil),
needTypoCorrection: needTypoCorrection
)
)
}
let lattice: Lattice = Lattice(
inputCount: inputCount,
surfaceCount: surfaceCount,
rawNodes: rawNodes
)
// inodes
for (i, nodeArray) in lattice.indexedNodes() {
for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
// node
for node in nodeArray {
if node.prevs.isEmpty {
@ -73,7 +64,7 @@ extension Kana2Kanji {
}
//
let wValue: PValue = node.data.value()
if i.isZero {
if isHead {
// values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else {
@ -81,13 +72,13 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue}
}
// index
let nextIndex: Lattice.LatticeIndex = switch node.range.endIndex {
case .input(let index): if let sIndex = i2sMap[index] { .surface(sIndex) } else { node.range.endIndex }
case .surface: node.range.endIndex
let nextIndex: (inputIndex: Int?, surfaceIndex: Int?) = switch node.range.endIndex {
case .input(let index): (index, i2sMap[index])
case .surface(let index): (i2sMap.filter { $0.value == index}.first?.key, index)
}
print(nextIndex, node.data.word, node.data.ruby, lattice[index: nextIndex].count)
print(nextIndex, node.data.word, node.data.ruby)
// count
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
self.updateResultNode(with: node, resultNode: result)
} else {
self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)
@ -104,7 +95,7 @@ extension Kana2Kanji {
}
}
/// N-Best
func updateNextNodes(with node: LatticeNode, nextNodes: [LatticeNode], nBest: Int) {
func updateNextNodes(with node: LatticeNode, nextNodes: some Sequence<LatticeNode>, nBest: Int) {
for nextnode in nextNodes {
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
continue

View File

@ -44,6 +44,26 @@ struct Lattice: Sequence {
private var inputIndexedNodes: [[LatticeNode]]
private var surfaceIndexedNodes: [[LatticeNode]]
static func indices(inputCount: Int, surfaceCount: Int, inputIndexToSurfaceIndexMap: [Int: Int]) -> [(inputIndex: Int?, surfaceIndex: Int?)] {
var indices: [(inputIndex: Int?, surfaceIndex: Int?)] = []
var sIndexPointer = 0
for i in 0 ..< inputCount {
if let sIndex = inputIndexToSurfaceIndexMap[i] {
for j in sIndexPointer ..< sIndex {
indices.append((nil, j))
}
indices.append((i, sIndex))
sIndexPointer = sIndex + 1
} else {
indices.append((i, nil))
}
}
for j in sIndexPointer ..< surfaceCount {
indices.append((nil, j))
}
return indices
}
func prefix(inputCount: Int, surfaceCount: Int) -> Lattice {
let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in
switch node.range.endIndex {
@ -104,6 +124,20 @@ struct Lattice: Sequence {
}
}
subscript(index index: (inputIndex: Int?, surfaceIndex: Int?)) -> LatticeNodeArray {
get {
let iNodes: [LatticeNode] = if let iIndex = index.inputIndex { self.inputIndexedNodes[iIndex] } else { [] }
let sNodes: [LatticeNode] = if let sIndex = index.surfaceIndex { self.surfaceIndexedNodes[sIndex] } else { [] }
return LatticeNodeArray(inputIndexedNodes: iNodes, surfaceIndexedNodes: sNodes)
}
}
func indexedNodes(indices: [(inputIndex: Int?, surfaceIndex: Int?)]) -> some Sequence<(isHead: Bool, nodes: LatticeNodeArray)> {
indices.lazy.map { index in
return (index.inputIndex == 0 && index.surfaceIndex == 0, self[index: index])
}
}
func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> {
self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) }
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })