fix: seems like everything is now perfectly working, right?

This commit is contained in:
ensan-hcl
2025-07-15 04:40:34 +09:00
parent 16363be738
commit 814a6b080b
8 changed files with 90 additions and 74 deletions

View File

@ -29,11 +29,11 @@ extension Kana2Kanji {
/// (4)
func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) {
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)")
let result: LatticeNode = LatticeNode.EOSNode
let inputCount: Int = inputData.input.count
let surfaceCount = inputData.convertTarget.count
let result: LatticeNode = LatticeNode.EOSNode
let i2sMap = LatticeDualIndexMap(inputData)
let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, map: i2sMap)
let indexMap = LatticeDualIndexMap(inputData)
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
let rawNodes = latticeIndices.map { index in
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
(sIndex, nil)
@ -72,7 +72,7 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue}
}
// index
let nextIndex = i2sMap.dualIndex(for: node.range.endIndex)
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
// count
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
self.updateResultNode(with: node, resultNode: result)

View File

@ -1,3 +1,4 @@
import Algorithms
import Foundation
import SwiftUtils
@ -20,16 +21,31 @@ extension Kana2Kanji {
/// (4)
func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) {
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)")
let inputCount: Int = inputData.input.count
let surfaceCount: Int = inputData.convertTarget.count
let result: LatticeNode = LatticeNode.EOSNode
let inputCount: Int = inputData.input.count
let surfaceCount = inputData.convertTarget.count
let indexMap = LatticeDualIndexMap(inputData)
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
let rawNodes = latticeIndices.map { index in
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
(sIndex, nil)
} else {
nil
}
return dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: index.inputIndex,
surfaceRange: surfaceRange,
needTypoCorrection: false
)
}
let lattice: Lattice = Lattice(
inputCount: inputCount,
surfaceCount: surfaceCount,
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}
rawNodes: rawNodes
)
// inodes
for (i, nodeArray) in lattice.indexedNodes() {
for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
// node
for node in nodeArray {
if node.prevs.isEmpty {
@ -37,7 +53,7 @@ extension Kana2Kanji {
}
//
let wValue: PValue = node.data.value()
if i.isZero {
if isHead {
// values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else {
@ -45,9 +61,9 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue}
}
//
let nextIndex = node.range.endIndex
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
// count
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
for index in node.prevs.indices {
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
//

View File

@ -6,6 +6,7 @@
// Copyright © 2020 ensan. All rights reserved.
//
import Algorithms
import Foundation
import SwiftUtils
@ -24,9 +25,11 @@ extension Kana2Kanji {
let convertedSurfaceCount = previousResult.inputData.convertTarget.count - surfaceCount
// (1)
let start = RegisteredNode.fromLastCandidate(completedData)
let indexMap = LatticeDualIndexMap(inputData)
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
let lattice = previousResult.lattice.suffix(inputCount: inputCount, surfaceCount: surfaceCount)
for (i, nodeArray) in lattice.indexedNodes() {
let prevs: [RegisteredNode] = if i.isZero {
for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
let prevs: [RegisteredNode] = if isHead {
[start]
} else {
[]
@ -40,7 +43,7 @@ extension Kana2Kanji {
// (2)
let result = LatticeNode.EOSNode
for (i, nodeArray) in lattice.indexedNodes() {
for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
for node in nodeArray {
if node.prevs.isEmpty {
continue
@ -50,7 +53,7 @@ extension Kana2Kanji {
}
//
let wValue = node.data.value()
if i.isZero {
if isHead {
// values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else {
@ -58,8 +61,8 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue}
}
//
let nextIndex = node.range.endIndex
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
if nextIndex.inputIndex == inputCount || nextIndex.surfaceIndex == surfaceCount {
self.updateResultNode(with: node, resultNode: result)
} else {
self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)

View File

@ -40,10 +40,12 @@ extension Kana2Kanji {
debug("kana2lattice_changed", inputData, counts, previousResult.inputData, inputCount, commonInputCount)
// (1)
let indexMap = LatticeDualIndexMap(inputData)
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
var lattice = previousResult.lattice.prefix(inputCount: commonInputCount, surfaceCount: commonSurfaceCount)
let terminalNodes: Lattice
if counts.addedInput == 0 {
if counts.addedInput == 0 && counts.addedSurface == 0 {
terminalNodes = Lattice(
inputCount: inputCount,
surfaceCount: surfaceCount,
@ -55,12 +57,29 @@ extension Kana2Kanji {
)
} else {
// (2)
let rawNodes = latticeIndices.map { index in
let inputRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let iIndex = index.inputIndex {
(iIndex, max(commonInputCount, iIndex) ..< inputCount)
} else {
nil
}
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
(sIndex, max(commonSurfaceCount, sIndex) ..< surfaceCount)
} else {
nil
}
return self.dicdataStore.getLOUDSDataInRange(
inputData: inputData,
from: inputRange?.startIndex,
toIndexRange: inputRange?.endIndexRange,
surfaceRange: surfaceRange,
needTypoCorrection: needTypoCorrection
)
}
let addedNodes: Lattice = Lattice(
inputCount: inputCount,
surfaceCount: surfaceCount,
rawNodes: (0..<inputCount).map {(i: Int) in
self.dicdataStore.getLOUDSDataInRange(inputData: inputData, from: i, toIndexRange: max(commonInputCount, i) ..< inputCount, needTypoCorrection: needTypoCorrection)
}
rawNodes: rawNodes
)
// (3)
@ -73,7 +92,7 @@ extension Kana2Kanji {
continue
}
//
let nextIndex = node.range.endIndex
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
self.updateNextNodes(with: node, nextNodes: addedNodes[index: nextIndex], nBest: N_best)
}
}
@ -103,8 +122,8 @@ extension Kana2Kanji {
// values
node.values = node.prevs.map {$0.totalValue + wValue}
}
let nextIndex = node.range.endIndex
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
self.updateResultNode(with: node, resultNode: result)
} else {
self.updateNextNodes(with: node, nextNodes: terminalNodes[index: nextIndex], nBest: N_best)

View File

@ -12,8 +12,8 @@ struct LatticeNodeArray: Sequence {
}
}
struct LatticeDualIndexMap {
private(set) var inputIndexToSurfaceIndexMap: [Int: Int]
struct LatticeDualIndexMap: Sendable {
private var inputIndexToSurfaceIndexMap: [Int: Int]
init(_ composingText: ComposingText) {
self.inputIndexToSurfaceIndexMap = composingText.inputIndexToSurfaceIndexMap()
}
@ -58,6 +58,26 @@ struct LatticeDualIndexMap {
}
}
}
func indices(inputCount: Int, surfaceCount: Int) -> [DualIndex] {
var indices: [DualIndex] = []
var sIndexPointer = 0
for i in 0 ..< inputCount {
if let sIndex = self.inputIndexToSurfaceIndexMap[i] {
for j in sIndexPointer ..< sIndex {
indices.append(.surfaceIndex(j))
}
indices.append(.bothIndex(inputIndex: i, surfaceIndex: sIndex))
sIndexPointer = sIndex + 1
} else {
indices.append(.inputIndex(i))
}
}
for j in sIndexPointer ..< surfaceCount {
indices.append(.surfaceIndex(j))
}
return indices
}
}
struct Lattice: Sequence {
@ -74,7 +94,6 @@ struct Lattice: Sequence {
for nodes in rawNodes {
guard let first = nodes.first else { continue }
print(nodes.mapSet { $0.range.startIndex }, nodes.count)
switch first.range.startIndex {
case .surface(let i):
self.surfaceIndexedNodes[i].append(contentsOf: nodes)
@ -92,26 +111,6 @@ struct Lattice: Sequence {
private var inputIndexedNodes: [[LatticeNode]]
private var surfaceIndexedNodes: [[LatticeNode]]
static func indices(inputCount: Int, surfaceCount: Int, map: LatticeDualIndexMap) -> [LatticeDualIndexMap.DualIndex] {
var indices: [LatticeDualIndexMap.DualIndex] = []
var sIndexPointer = 0
for i in 0 ..< inputCount {
if let sIndex = map.inputIndexToSurfaceIndexMap[i] {
for j in sIndexPointer ..< sIndex {
indices.append(.surfaceIndex(j))
}
indices.append(.bothIndex(inputIndex: i, surfaceIndex: sIndex))
sIndexPointer = sIndex + 1
} else {
indices.append(.inputIndex(i))
}
}
for j in sIndexPointer ..< surfaceCount {
indices.append(.surfaceIndex(j))
}
return indices
}
func prefix(inputCount: Int, surfaceCount: Int) -> Lattice {
let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in
switch node.range.endIndex {
@ -121,12 +120,12 @@ struct Lattice: Sequence {
value <= surfaceCount
}
}
let newInputIndexedNodes = Array(self.inputIndexedNodes.prefix(inputCount).map {(nodes: [LatticeNode]) in
let newInputIndexedNodes = self.inputIndexedNodes.prefix(inputCount).map {(nodes: [LatticeNode]) in
nodes.filter(filterClosure)
}.drop(while: \.isEmpty))
let newSurfaceIndexedNodes = Array(self.surfaceIndexedNodes.prefix(surfaceCount).map {(nodes: [LatticeNode]) in
}
let newSurfaceIndexedNodes = self.surfaceIndexedNodes.prefix(surfaceCount).map {(nodes: [LatticeNode]) in
nodes.filter(filterClosure)
}.drop(while: \.isEmpty))
}
return Lattice(inputIndexedNodes: newInputIndexedNodes, surfaceIndexedNodes: newSurfaceIndexedNodes)
}
@ -157,21 +156,6 @@ struct Lattice: Sequence {
}
}
subscript(inputIndex i: Int) -> [LatticeNode] {
get {
self.inputIndexedNodes[i]
}
}
subscript(index index: LatticeIndex) -> [LatticeNode] {
get {
switch index {
case .input(let i): self.inputIndexedNodes[i]
case .surface(let i): self.surfaceIndexedNodes[i]
}
}
}
subscript(index index: LatticeDualIndexMap.DualIndex) -> LatticeNodeArray {
get {
let iNodes: [LatticeNode] = if let iIndex = index.inputIndex { self.inputIndexedNodes[iIndex] } else { [] }
@ -186,11 +170,6 @@ struct Lattice: Sequence {
}
}
func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> {
self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) }
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })
}
struct Iterator: IteratorProtocol {
init(lattice: Lattice) {
self.lattice = lattice

View File

@ -538,7 +538,7 @@ import EfficientNGram
seenCandidate.formUnion(clause_candidates.map {$0.text})
//
let dicCandidates: [Candidate] = result.lattice[inputIndex: 0]
let dicCandidates: [Candidate] = result.lattice[index: .bothIndex(inputIndex: 0, surfaceIndex: 0)]
.map {
Candidate(
text: $0.data.word,

View File

@ -523,7 +523,6 @@ public final class DicdataStore {
useMemory: self.learningManager.enabled,
needTypoCorrection: needTypoCorrection
)
print(stringToInfo)
// MARK: indices
for (identifier, value) in indices {
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in

View File

@ -645,7 +645,7 @@ extension ComposingText {
let commonSurface = self.convertTarget.commonPrefix(with: previousData.convertTarget)
let deletedSurface = previousData.convertTarget.count - commonSurface.count
let addedSurface = self.convertTarget.suffix(from: commonSurface.startIndex).count
let addedSurface = self.convertTarget.count - commonSurface.count
return (deleted, added, deletedSurface, addedSurface)
}