Files
Miwa / Ensan 0795b8cf84 [Experimental] ConvertGraphを実装し、その上での完全一致変換を実装 (#47)
* ConvertGraphを実装し、その上での完全一致変換を実装

* 名前空間を汚染していたので修正

* Implementation completed (without test)

* move directory to use default dictionary

* fix implementations to enable conversion

* add test cases

* Backward searchで発見された候補を明示的に削除

* fix tests

* simplify
2024-02-24 23:21:44 +09:00

212 lines
10 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// ConvertGraph.swift
//
//
// Created by miwa on 2024/02/23.
//
import XCTest
import Foundation
@testable import KanaKanjiConverterModule
struct ConvertGraph: InputGraphProtocol {
struct Node: InputGraphNodeProtocol {
var latticeNodes: [LatticeNode]
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
var correction: InputGraph.Correction = .none
}
var nodes: [Node] = [
// root node
Node(latticeNodes: [], displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
]
var structure: InputGraphStructure = InputGraphStructure()
static func build(input: LookupGraph, nodeIndex2LatticeNode: [Int: [LatticeNode]]) -> Self {
let nodes = input.nodes.enumerated().map { (index, node) in
Node(latticeNodes: nodeIndex2LatticeNode[index, default: []], displayedTextRange: node.displayedTextRange, inputElementsRange: node.inputElementsRange, correction: node.correction)
}
return Self(nodes: nodes, structure: input.structure)
}
}
extension ConvertGraph {
///
final class LatticeNode: CustomStringConvertible {
///
public let data: DicdataElement
/// `N_best`
var prevs: [RegisteredNode] = []
/// `prevs`
var values: [PValue] = []
/// inputData.inputrange
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
/// `EOS`
static var EOSNode: LatticeNode {
LatticeNode(data: DicdataElement.EOSData, displayedTextRange: .unknown, inputElementsRange: .unknown)
}
init(data: DicdataElement, displayedTextRange: InputGraphStructure.Range, inputElementsRange: InputGraphStructure.Range, prevs: [RegisteredNode] = []) {
self.data = data
self.values = [data.value()]
self.displayedTextRange = displayedTextRange
self.inputElementsRange = inputElementsRange
self.prevs = prevs
}
/// `LatticeNode``RegisteredNode`
/// `LatticeNode``RegisteredNode`1
func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode {
// FIXME:
RegisteredNode(
data: self.data,
registered: self.prevs[index],
totalValue: value,
displayedTextRange: self.displayedTextRange,
inputElementsRange: self.inputElementsRange
)
}
var description: String {
"LatticeNode(data: \(data), ...)"
}
}
struct RegisteredNode: RegisteredNodeProtocol {
///
let data: DicdataElement
/// 1
let prev: (any RegisteredNodeProtocol)?
///
let totalValue: PValue
/// inputData.inputrange
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, displayedTextRange: InputGraphStructure.Range, inputElementsRange: InputGraphStructure.Range) {
self.data = data
self.prev = registered
self.totalValue = totalValue
self.displayedTextRange = displayedTextRange
self.inputElementsRange = inputElementsRange
}
///
/// - Returns:
static func BOSNode() -> RegisteredNode {
RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
}
}
}
/// `struct``RegisteredNode`Existential Type
/// - Note: `indirect enum`
protocol RegisteredNodeProtocol {
var data: DicdataElement {get}
var prev: (any RegisteredNodeProtocol)? {get}
var totalValue: PValue {get}
/// inputData.inputrange
var displayedTextRange: InputGraphStructure.Range {get}
var inputElementsRange: InputGraphStructure.Range {get}
}
extension ConvertGraph {
func convertAll(option: borrowing ConvertRequestOptions, dicdataStore: DicdataStore) -> LatticeNode {
let result: LatticeNode = LatticeNode.EOSNode
result.displayedTextRange = .startIndex(self.structure.displayedTextEndIndexToNodeIndices.endIndex)
result.inputElementsRange = .startIndex(self.structure.inputElementsEndIndexToNodeIndices.endIndex)
var processStack = Array(self.nodes.enumerated().reversed())
var processedIndices: IndexSet = [0] // root
var invalidIndices: IndexSet = []
// inodes
while let (i, graphNode) = processStack.popLast() {
//
guard !processedIndices.contains(i), !invalidIndices.contains(i) else {
continue
}
// prevNode
let prevIndices = self.structure.prevIndices(displayedTextStartIndex: graphNode.displayedTextRange.startIndex, inputElementsStartIndex: graphNode.inputElementsRange.startIndex)
guard !prevIndices.isEmpty else {
invalidIndices.insert(i)
continue
}
var unprocessedPrevs: [(Int, Node)] = []
for prevIndex in prevIndices {
if !processedIndices.contains(prevIndex) && !invalidIndices.contains(prevIndex) {
unprocessedPrevs.append((prevIndex, self.nodes[prevIndex]))
}
}
// prevNodestack
guard unprocessedPrevs.isEmpty else {
processStack.append((i, graphNode))
processStack.append(contentsOf: unprocessedPrevs)
continue
}
print(i, graphNode.displayedTextRange, graphNode.inputElementsRange)
processedIndices.insert(i)
//
for node in graphNode.latticeNodes {
if node.prevs.isEmpty {
continue
}
if dicdataStore.shouldBeRemoved(data: node.data) {
continue
}
//
let wValue: PValue = node.data.value()
if i == 0 {
// values
node.values = node.prevs.map {$0.totalValue + wValue + dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else {
// values
node.values = node.prevs.map {$0.totalValue + wValue}
}
// LatticeNode
let nextIndices = self.structure.nextIndices(
displayedTextEndIndex: node.displayedTextRange.endIndex,
inputElementsEndIndex: node.inputElementsRange.endIndex
)
// count
if nextIndices.isEmpty || self.structure.inputElementsStartIndexToNodeIndices.endIndex == node.inputElementsRange.endIndex {
for index in node.prevs.indices {
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
result.prevs.append(newnode)
}
} else {
for nextIndex in nextIndices {
// nodenextnode
for nextnode in self.nodes[nextIndex].latticeNodes {
// node.registered.isEmpty
if dicdataStore.shouldBeRemoved(data: nextnode.data) {
continue
}
//
let ccValue: PValue = dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
// nodeprevnode
for (index, value) in node.values.enumerated() {
let newValue: PValue = ccValue + value
// index
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
if lastindex == option.N_best {
continue
}
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
//
if nextnode.prevs.count >= option.N_best {
nextnode.prevs.removeLast()
}
// removeinsert (insertO(N))
nextnode.prevs.insert(newnode, at: lastindex)
}
}
}
}
}
}
return result
}
}