wip 前進

This commit is contained in:
Miwa / Ensan
2025-07-14 01:52:54 +09:00
parent 02fcdd4dc1
commit 7374b18eae
2 changed files with 47 additions and 22 deletions

View File

@@ -6,6 +6,7 @@
// Copyright © 2020 ensan. All rights reserved. // Copyright © 2020 ensan. All rights reserved.
// //
import Algorithms
import Foundation import Foundation
import SwiftUtils import SwiftUtils
@@ -32,37 +33,27 @@ extension Kana2Kanji {
let surfaceCount = inputData.convertTarget.count let surfaceCount = inputData.convertTarget.count
let result: LatticeNode = LatticeNode.EOSNode let result: LatticeNode = LatticeNode.EOSNode
let i2sMap = inputData.inputIndexToSurfaceIndexMap() let i2sMap = inputData.inputIndexToSurfaceIndexMap()
var rawNodes = (.zero ..< inputCount).map { let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, inputIndexToSurfaceIndexMap: i2sMap)
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = i2sMap[$0] { let rawNodes = latticeIndices.map { (iIndex, sIndex) in
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex {
(sIndex, nil) (sIndex, nil)
} else { } else {
nil nil
} }
return dicdataStore.getLOUDSDataInRange( return dicdataStore.getLOUDSDataInRange(
inputData: inputData, inputData: inputData,
from: $0, from: iIndex,
surfaceRange: surfaceRange, surfaceRange: surfaceRange,
needTypoCorrection: needTypoCorrection needTypoCorrection: needTypoCorrection
) )
} }
for sIndex in 0 ..< inputData.convertTarget.count where !i2sMap.values.contains(sIndex) {
// inputIndexsIndexrawNodes
rawNodes.append(
dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: nil,
surfaceRange: (sIndex, nil),
needTypoCorrection: needTypoCorrection
)
)
}
let lattice: Lattice = Lattice( let lattice: Lattice = Lattice(
inputCount: inputCount, inputCount: inputCount,
surfaceCount: surfaceCount, surfaceCount: surfaceCount,
rawNodes: rawNodes rawNodes: rawNodes
) )
// inodes // inodes
for (i, nodeArray) in lattice.indexedNodes() { for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
// node // node
for node in nodeArray { for node in nodeArray {
if node.prevs.isEmpty { if node.prevs.isEmpty {
@@ -73,7 +64,7 @@ extension Kana2Kanji {
} }
// //
let wValue: PValue = node.data.value() let wValue: PValue = node.data.value()
if i.isZero { if isHead {
// values // values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else { } else {
@@ -81,13 +72,13 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue} node.values = node.prevs.map {$0.totalValue + wValue}
} }
// index // index
let nextIndex: Lattice.LatticeIndex = switch node.range.endIndex { let nextIndex: (inputIndex: Int?, surfaceIndex: Int?) = switch node.range.endIndex {
case .input(let index): if let sIndex = i2sMap[index] { .surface(sIndex) } else { node.range.endIndex } case .input(let index): (index, i2sMap[index])
case .surface: node.range.endIndex case .surface(let index): (i2sMap.filter { $0.value == index}.first?.key, index)
} }
print(nextIndex, node.data.word, node.data.ruby, lattice[index: nextIndex].count) print(nextIndex, node.data.word, node.data.ruby)
// count // count
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) { if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
self.updateResultNode(with: node, resultNode: result) self.updateResultNode(with: node, resultNode: result)
} else { } else {
self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best) self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)
@@ -104,7 +95,7 @@ extension Kana2Kanji {
} }
} }
/// N-Best /// N-Best
func updateNextNodes(with node: LatticeNode, nextNodes: [LatticeNode], nBest: Int) { func updateNextNodes(with node: LatticeNode, nextNodes: some Sequence<LatticeNode>, nBest: Int) {
for nextnode in nextNodes { for nextnode in nextNodes {
if self.dicdataStore.shouldBeRemoved(data: nextnode.data) { if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
continue continue

View File

@@ -44,6 +44,26 @@ struct Lattice: Sequence {
private var inputIndexedNodes: [[LatticeNode]] private var inputIndexedNodes: [[LatticeNode]]
private var surfaceIndexedNodes: [[LatticeNode]] private var surfaceIndexedNodes: [[LatticeNode]]
static func indices(inputCount: Int, surfaceCount: Int, inputIndexToSurfaceIndexMap: [Int: Int]) -> [(inputIndex: Int?, surfaceIndex: Int?)] {
var indices: [(inputIndex: Int?, surfaceIndex: Int?)] = []
var sIndexPointer = 0
for i in 0 ..< inputCount {
if let sIndex = inputIndexToSurfaceIndexMap[i] {
for j in sIndexPointer ..< sIndex {
indices.append((nil, j))
}
indices.append((i, sIndex))
sIndexPointer = sIndex + 1
} else {
indices.append((i, nil))
}
}
for j in sIndexPointer ..< surfaceCount {
indices.append((nil, j))
}
return indices
}
func prefix(inputCount: Int, surfaceCount: Int) -> Lattice { func prefix(inputCount: Int, surfaceCount: Int) -> Lattice {
let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in
switch node.range.endIndex { switch node.range.endIndex {
@@ -104,6 +124,20 @@ struct Lattice: Sequence {
} }
} }
subscript(index index: (inputIndex: Int?, surfaceIndex: Int?)) -> LatticeNodeArray {
get {
let iNodes: [LatticeNode] = if let iIndex = index.inputIndex { self.inputIndexedNodes[iIndex] } else { [] }
let sNodes: [LatticeNode] = if let sIndex = index.surfaceIndex { self.surfaceIndexedNodes[sIndex] } else { [] }
return LatticeNodeArray(inputIndexedNodes: iNodes, surfaceIndexedNodes: sNodes)
}
}
func indexedNodes(indices: [(inputIndex: Int?, surfaceIndex: Int?)]) -> some Sequence<(isHead: Bool, nodes: LatticeNodeArray)> {
indices.lazy.map { index in
return (index.inputIndex == 0 && index.surfaceIndex == 0, self[index: index])
}
}
func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> { func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> {
self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) } self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) }
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) }) .chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })