mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
fix: seems like everything is now perfectly working, right?
This commit is contained in:
@ -29,11 +29,11 @@ extension Kana2Kanji {
|
||||
/// (4)ノードをアップデートした上で返却する。
|
||||
func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) {
|
||||
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)")
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let inputCount: Int = inputData.input.count
|
||||
let surfaceCount = inputData.convertTarget.count
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let i2sMap = LatticeDualIndexMap(inputData)
|
||||
let latticeIndices = Lattice.indices(inputCount: inputCount, surfaceCount: surfaceCount, map: i2sMap)
|
||||
let indexMap = LatticeDualIndexMap(inputData)
|
||||
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
|
||||
let rawNodes = latticeIndices.map { index in
|
||||
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
|
||||
(sIndex, nil)
|
||||
@ -72,7 +72,7 @@ extension Kana2Kanji {
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 後続ノードのindex(正規化する)
|
||||
let nextIndex = i2sMap.dualIndex(for: node.range.endIndex)
|
||||
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
|
||||
// 文字数がcountと等しい場合登録する
|
||||
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
|
@ -1,3 +1,4 @@
|
||||
import Algorithms
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
@ -20,16 +21,31 @@ extension Kana2Kanji {
|
||||
/// (4)ノードをアップデートした上で返却する。
|
||||
func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) {
|
||||
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)")
|
||||
let inputCount: Int = inputData.input.count
|
||||
let surfaceCount: Int = inputData.convertTarget.count
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let inputCount: Int = inputData.input.count
|
||||
let surfaceCount = inputData.convertTarget.count
|
||||
let indexMap = LatticeDualIndexMap(inputData)
|
||||
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
|
||||
let rawNodes = latticeIndices.map { index in
|
||||
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
|
||||
(sIndex, nil)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
return dicdataStore.getLOUDSDataInRange(
|
||||
inputData: inputData,
|
||||
from: index.inputIndex,
|
||||
surfaceRange: surfaceRange,
|
||||
needTypoCorrection: false
|
||||
)
|
||||
}
|
||||
let lattice: Lattice = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}
|
||||
rawNodes: rawNodes
|
||||
)
|
||||
// 「i文字目から始まるnodes」に対して
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
|
||||
// それぞれのnodeに対して
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
@ -37,7 +53,7 @@ extension Kana2Kanji {
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue: PValue = node.data.value()
|
||||
if i.isZero {
|
||||
if isHead {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
@ -45,9 +61,9 @@ extension Kana2Kanji {
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.range.endIndex
|
||||
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
|
||||
// 文字数がcountと等しい場合登録する
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
|
||||
for index in node.prevs.indices {
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
|
||||
// 学習データやユーザ辞書由来の場合は素通しする
|
||||
|
@ -6,6 +6,7 @@
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Algorithms
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
@ -24,9 +25,11 @@ extension Kana2Kanji {
|
||||
let convertedSurfaceCount = previousResult.inputData.convertTarget.count - surfaceCount
|
||||
// (1)
|
||||
let start = RegisteredNode.fromLastCandidate(completedData)
|
||||
let indexMap = LatticeDualIndexMap(inputData)
|
||||
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
|
||||
let lattice = previousResult.lattice.suffix(inputCount: inputCount, surfaceCount: surfaceCount)
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
let prevs: [RegisteredNode] = if i.isZero {
|
||||
for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
|
||||
let prevs: [RegisteredNode] = if isHead {
|
||||
[start]
|
||||
} else {
|
||||
[]
|
||||
@ -40,7 +43,7 @@ extension Kana2Kanji {
|
||||
// (2)
|
||||
let result = LatticeNode.EOSNode
|
||||
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
for (isHead, nodeArray) in lattice.indexedNodes(indices: latticeIndices) {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
@ -50,7 +53,7 @@ extension Kana2Kanji {
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue = node.data.value()
|
||||
if i.isZero {
|
||||
if isHead {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
@ -58,8 +61,8 @@ extension Kana2Kanji {
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.range.endIndex
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
|
||||
if nextIndex.inputIndex == inputCount || nextIndex.surfaceIndex == surfaceCount {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
} else {
|
||||
self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)
|
||||
|
@ -40,10 +40,12 @@ extension Kana2Kanji {
|
||||
debug("kana2lattice_changed", inputData, counts, previousResult.inputData, inputCount, commonInputCount)
|
||||
|
||||
// (1)
|
||||
let indexMap = LatticeDualIndexMap(inputData)
|
||||
let latticeIndices = indexMap.indices(inputCount: inputCount, surfaceCount: surfaceCount)
|
||||
var lattice = previousResult.lattice.prefix(inputCount: commonInputCount, surfaceCount: commonSurfaceCount)
|
||||
|
||||
let terminalNodes: Lattice
|
||||
if counts.addedInput == 0 {
|
||||
if counts.addedInput == 0 && counts.addedSurface == 0 {
|
||||
terminalNodes = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
@ -55,12 +57,29 @@ extension Kana2Kanji {
|
||||
)
|
||||
} else {
|
||||
// (2)
|
||||
let rawNodes = latticeIndices.map { index in
|
||||
let inputRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let iIndex = index.inputIndex {
|
||||
(iIndex, max(commonInputCount, iIndex) ..< inputCount)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
let surfaceRange: (startIndex: Int, endIndexRange: Range<Int>?)? = if let sIndex = index.surfaceIndex {
|
||||
(sIndex, max(commonSurfaceCount, sIndex) ..< surfaceCount)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
return self.dicdataStore.getLOUDSDataInRange(
|
||||
inputData: inputData,
|
||||
from: inputRange?.startIndex,
|
||||
toIndexRange: inputRange?.endIndexRange,
|
||||
surfaceRange: surfaceRange,
|
||||
needTypoCorrection: needTypoCorrection
|
||||
)
|
||||
}
|
||||
let addedNodes: Lattice = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
rawNodes: (0..<inputCount).map {(i: Int) in
|
||||
self.dicdataStore.getLOUDSDataInRange(inputData: inputData, from: i, toIndexRange: max(commonInputCount, i) ..< inputCount, needTypoCorrection: needTypoCorrection)
|
||||
}
|
||||
rawNodes: rawNodes
|
||||
)
|
||||
|
||||
// (3)
|
||||
@ -73,7 +92,7 @@ extension Kana2Kanji {
|
||||
continue
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.range.endIndex
|
||||
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
|
||||
self.updateNextNodes(with: node, nextNodes: addedNodes[index: nextIndex], nBest: N_best)
|
||||
}
|
||||
}
|
||||
@ -103,8 +122,8 @@ extension Kana2Kanji {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
let nextIndex = node.range.endIndex
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
let nextIndex = indexMap.dualIndex(for: node.range.endIndex)
|
||||
if nextIndex.inputIndex == inputCount && nextIndex.surfaceIndex == surfaceCount {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
} else {
|
||||
self.updateNextNodes(with: node, nextNodes: terminalNodes[index: nextIndex], nBest: N_best)
|
||||
|
@ -12,8 +12,8 @@ struct LatticeNodeArray: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
struct LatticeDualIndexMap {
|
||||
private(set) var inputIndexToSurfaceIndexMap: [Int: Int]
|
||||
struct LatticeDualIndexMap: Sendable {
|
||||
private var inputIndexToSurfaceIndexMap: [Int: Int]
|
||||
init(_ composingText: ComposingText) {
|
||||
self.inputIndexToSurfaceIndexMap = composingText.inputIndexToSurfaceIndexMap()
|
||||
}
|
||||
@ -58,6 +58,26 @@ struct LatticeDualIndexMap {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func indices(inputCount: Int, surfaceCount: Int) -> [DualIndex] {
|
||||
var indices: [DualIndex] = []
|
||||
var sIndexPointer = 0
|
||||
for i in 0 ..< inputCount {
|
||||
if let sIndex = self.inputIndexToSurfaceIndexMap[i] {
|
||||
for j in sIndexPointer ..< sIndex {
|
||||
indices.append(.surfaceIndex(j))
|
||||
}
|
||||
indices.append(.bothIndex(inputIndex: i, surfaceIndex: sIndex))
|
||||
sIndexPointer = sIndex + 1
|
||||
} else {
|
||||
indices.append(.inputIndex(i))
|
||||
}
|
||||
}
|
||||
for j in sIndexPointer ..< surfaceCount {
|
||||
indices.append(.surfaceIndex(j))
|
||||
}
|
||||
return indices
|
||||
}
|
||||
}
|
||||
|
||||
struct Lattice: Sequence {
|
||||
@ -74,7 +94,6 @@ struct Lattice: Sequence {
|
||||
|
||||
for nodes in rawNodes {
|
||||
guard let first = nodes.first else { continue }
|
||||
print(nodes.mapSet { $0.range.startIndex }, nodes.count)
|
||||
switch first.range.startIndex {
|
||||
case .surface(let i):
|
||||
self.surfaceIndexedNodes[i].append(contentsOf: nodes)
|
||||
@ -92,26 +111,6 @@ struct Lattice: Sequence {
|
||||
private var inputIndexedNodes: [[LatticeNode]]
|
||||
private var surfaceIndexedNodes: [[LatticeNode]]
|
||||
|
||||
static func indices(inputCount: Int, surfaceCount: Int, map: LatticeDualIndexMap) -> [LatticeDualIndexMap.DualIndex] {
|
||||
var indices: [LatticeDualIndexMap.DualIndex] = []
|
||||
var sIndexPointer = 0
|
||||
for i in 0 ..< inputCount {
|
||||
if let sIndex = map.inputIndexToSurfaceIndexMap[i] {
|
||||
for j in sIndexPointer ..< sIndex {
|
||||
indices.append(.surfaceIndex(j))
|
||||
}
|
||||
indices.append(.bothIndex(inputIndex: i, surfaceIndex: sIndex))
|
||||
sIndexPointer = sIndex + 1
|
||||
} else {
|
||||
indices.append(.inputIndex(i))
|
||||
}
|
||||
}
|
||||
for j in sIndexPointer ..< surfaceCount {
|
||||
indices.append(.surfaceIndex(j))
|
||||
}
|
||||
return indices
|
||||
}
|
||||
|
||||
func prefix(inputCount: Int, surfaceCount: Int) -> Lattice {
|
||||
let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in
|
||||
switch node.range.endIndex {
|
||||
@ -121,12 +120,12 @@ struct Lattice: Sequence {
|
||||
value <= surfaceCount
|
||||
}
|
||||
}
|
||||
let newInputIndexedNodes = Array(self.inputIndexedNodes.prefix(inputCount).map {(nodes: [LatticeNode]) in
|
||||
let newInputIndexedNodes = self.inputIndexedNodes.prefix(inputCount).map {(nodes: [LatticeNode]) in
|
||||
nodes.filter(filterClosure)
|
||||
}.drop(while: \.isEmpty))
|
||||
let newSurfaceIndexedNodes = Array(self.surfaceIndexedNodes.prefix(surfaceCount).map {(nodes: [LatticeNode]) in
|
||||
}
|
||||
let newSurfaceIndexedNodes = self.surfaceIndexedNodes.prefix(surfaceCount).map {(nodes: [LatticeNode]) in
|
||||
nodes.filter(filterClosure)
|
||||
}.drop(while: \.isEmpty))
|
||||
}
|
||||
|
||||
return Lattice(inputIndexedNodes: newInputIndexedNodes, surfaceIndexedNodes: newSurfaceIndexedNodes)
|
||||
}
|
||||
@ -157,21 +156,6 @@ struct Lattice: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
subscript(inputIndex i: Int) -> [LatticeNode] {
|
||||
get {
|
||||
self.inputIndexedNodes[i]
|
||||
}
|
||||
}
|
||||
|
||||
subscript(index index: LatticeIndex) -> [LatticeNode] {
|
||||
get {
|
||||
switch index {
|
||||
case .input(let i): self.inputIndexedNodes[i]
|
||||
case .surface(let i): self.surfaceIndexedNodes[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
subscript(index index: LatticeDualIndexMap.DualIndex) -> LatticeNodeArray {
|
||||
get {
|
||||
let iNodes: [LatticeNode] = if let iIndex = index.inputIndex { self.inputIndexedNodes[iIndex] } else { [] }
|
||||
@ -186,11 +170,6 @@ struct Lattice: Sequence {
|
||||
}
|
||||
}
|
||||
|
||||
func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> {
|
||||
self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) }
|
||||
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })
|
||||
}
|
||||
|
||||
struct Iterator: IteratorProtocol {
|
||||
init(lattice: Lattice) {
|
||||
self.lattice = lattice
|
||||
|
@ -538,7 +538,7 @@ import EfficientNGram
|
||||
seenCandidate.formUnion(clause_candidates.map {$0.text})
|
||||
|
||||
// 最初の辞書データ
|
||||
let dicCandidates: [Candidate] = result.lattice[inputIndex: 0]
|
||||
let dicCandidates: [Candidate] = result.lattice[index: .bothIndex(inputIndex: 0, surfaceIndex: 0)]
|
||||
.map {
|
||||
Candidate(
|
||||
text: $0.data.word,
|
||||
|
@ -523,7 +523,6 @@ public final class DicdataStore {
|
||||
useMemory: self.learningManager.enabled,
|
||||
needTypoCorrection: needTypoCorrection
|
||||
)
|
||||
print(stringToInfo)
|
||||
// MARK: 検索によって得たindicesから辞書データを実際に取り出していく
|
||||
for (identifier, value) in indices {
|
||||
let result: [DicdataElement] = self.getDicdataFromLoudstxt3(identifier: identifier, indices: value).compactMap { (data) -> DicdataElement? in
|
||||
|
@ -645,7 +645,7 @@ extension ComposingText {
|
||||
|
||||
let commonSurface = self.convertTarget.commonPrefix(with: previousData.convertTarget)
|
||||
let deletedSurface = previousData.convertTarget.count - commonSurface.count
|
||||
let addedSurface = self.convertTarget.suffix(from: commonSurface.startIndex).count
|
||||
let addedSurface = self.convertTarget.count - commonSurface.count
|
||||
return (deleted, added, deletedSurface, addedSurface)
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user