Files
2024-03-17 18:32:44 +09:00

254 lines
12 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// InputGraph.swift
//
//
// Created by miwa on 2024/02/21.
//
import Foundation
import DequeModule
@testable import KanaKanjiConverterModule
import XCTest
struct InputGraph {
struct Node: Equatable, CustomStringConvertible {
var character: Character
var inputElementsRange: InputGraphRange
var correction: CorrectGraph.Correction = .none
var description: String {
let `is` = inputElementsRange.startIndex?.description ?? "?"
let ie = inputElementsRange.endIndex?.description ?? "?"
return "Node(\"\(character)\", i(\(`is`)..<\(ie)), isTypo: \(correction.isTypo))"
}
}
var nodes: [Node] = [
// root node
Node(character: "\0", inputElementsRange: .endIndex(0), correction: .none)
]
/// NextIndex
var allowedNextIndex: [Int: IndexSet] = [:]
/// prevIndex
var allowedPrevIndex: [Int: IndexSet] = [:]
/// correctGraph
var nextCorrectNodeIndices: [Int: IndexSet] = [:]
mutating func update(_ correctGraph: CorrectGraph, nodeIndex: Int) {
let cgNode = correctGraph.nodes[nodeIndex]
//
// 1. nodeIndexnextCorrectNodeIndices
// 2. cgNodes[nodeIndex]
// 3.
// cgNodeinsert
let prevNodeIndices: [Int] = self.nextCorrectNodeIndices.lazy.filter {
$0.value.contains(nodeIndex)
}.map {
$0.key
}
let newIndex = self.nodes.endIndex
self.nodes.append(Node(character: cgNode.value, inputElementsRange: cgNode.inputElementsRange, correction: cgNode.correction))
//
self.allowedPrevIndex[newIndex] = IndexSet(prevNodeIndices)
for prevNodeIndex in prevNodeIndices {
self.allowedNextIndex[prevNodeIndex, default: IndexSet()].insert(newIndex)
}
// correct graphnext node
self.nextCorrectNodeIndices[newIndex] = correctGraph.allowedNextIndex[nodeIndex]
//
let startNode = InputGraphInputStyle.init(from: cgNode.inputStyle).replaceSuffixTree
// nodes
typealias SearchItem = (
suffixTreeNode: ReplaceSuffixTree.Node,
// 辿
route: [Int],
//
foundValue: Replacement?,
correction: CorrectGraph.Correction
)
typealias Match = (
//
replacement: Replacement,
// route
route: [Int]
)
struct Replacement: Hashable {
var route: [Int]
var value: String
}
var backSearchMatch: [Match] = []
var stack: [SearchItem] = [(startNode, [newIndex], foundValue: nil, correction: cgNode.correction)]
while let (cSuffixTreeNode, cRoute, cFoundValue, cCorrection) = stack.popLast() {
// must not be empty
let cNodeIndex = cRoute[0]
if let bNode = cSuffixTreeNode.find(key: self.nodes[cNodeIndex].character) {
for prevGraphNodeIndex in self.allowedPrevIndex[cNodeIndex, default: IndexSet()] {
// TODO: InputGraph.NodeInputStyle.ID
stack.append(
(
bNode,
// FIXME:
[prevGraphNodeIndex] + cRoute,
// bNodevalue
foundValue: bNode.value.map {Replacement(route: cRoute, value: $0)} ?? cFoundValue,
cCorrection.isTypo ? .typo : self.nodes[prevGraphNodeIndex].correction
)
)
}
} else {
// bNodebackSearcMatch
if let cFoundValue {
backSearchMatch.append((cFoundValue, cRoute))
}
}
}
// backSearchMatch
let replacementToTarget = Dictionary(grouping: backSearchMatch, by: \.replacement)
for (replacement, matches) in replacementToTarget {
// MARK: replace
// 1. valuenode
// 2. routenodeinvalidate
// MARK:
let startIndex = self.nodes[replacement.route[0]].inputElementsRange.startIndex
let endIndex = self.nodes[replacement.route[replacement.route.endIndex - 1]].inputElementsRange.endIndex
let characters = Array(replacement.value)
let correction: CorrectGraph.Correction = if replacement.route.allSatisfy({!self.nodes[$0].correction.isTypo}) {
.none
} else {
.typo
}
let newNodes = characters.indices.map { index in
let range: InputGraphRange = if index == characters.startIndex && index == characters.endIndex - 1 {
.init(startIndex: startIndex, endIndex: endIndex)
} else if index == characters.startIndex {
.init(startIndex: startIndex, endIndex: nil)
} else if index == characters.endIndex - 1 {
.init(startIndex: nil, endIndex: endIndex)
} else {
.unknown
}
return Node(character: characters[index], inputElementsRange: range, correction: correction)
}
let firstIndex = self.nodes.endIndex
let lastIndex = self.nodes.endIndex + newNodes.count - 1
self.nodes.append(contentsOf: newNodes)
// MARK: next/prev調
// firstIndex:
// routereplaceindex
let prevIndices = matches.compactMap { match in
assert(match.route.hasSuffix(replacement.route))
return match.route.dropLast(replacement.route.count).last
}
self.allowedPrevIndex[firstIndex] = IndexSet(prevIndices)
for i in prevIndices {
// firstIndexreplacement
self.allowedNextIndex[i, default: IndexSet()].insert(firstIndex)
}
//
for i in firstIndex ..< lastIndex {
self.allowedNextIndex[i, default: IndexSet()].insert(i + 1)
self.allowedPrevIndex[i + 1, default: IndexSet()].insert(i)
}
// lastIndex: correctGraph
self.nextCorrectNodeIndices[lastIndex] = correctGraph.allowedNextIndex[nodeIndex]
}
// for
for replacement in replacementToTarget.keys {
//
self.nextCorrectNodeIndices[replacement.route.last!] = IndexSet()
self.allowedPrevIndex[replacement.route.last!] = IndexSet()
}
}
consuming func clean() -> Self {
var newGraph = Self(nodes: [])
var indices: [(nodeIndex: Int, fromIndex: Int?)] = [(0, nil)]
var processedNodeIndices: [Int: Int] = [:]
while let (nodeIndex, fromIndex) = indices.popLast() {
let newIndex = if let newIndex = processedNodeIndices[nodeIndex] {
newIndex
} else {
{
let newIndex = newGraph.nodes.endIndex
newGraph.nodes.append(self.nodes[nodeIndex])
newGraph.nextCorrectNodeIndices[newIndex] = self.nextCorrectNodeIndices[nodeIndex]
return newIndex
}()
}
if let fromIndex {
newGraph.allowedNextIndex[fromIndex, default: IndexSet()].insert(newIndex)
newGraph.allowedPrevIndex[newIndex, default: IndexSet()].insert(fromIndex)
}
for nextNodeIndex in self.allowedNextIndex[nodeIndex, default: IndexSet()] {
indices.append((nextNodeIndex, newIndex))
}
processedNodeIndices[nodeIndex] = newIndex
}
return newGraph
}
///
/// - warning: 使
mutating func _applyAdditionalCorrectGraph(_ newCorrectGraph: CorrectGraph, addedNodeIndices: IndexSet) {
// InputGraph
// ex. tscorrectGraphta
var processedIndices = IndexSet()
var nodeIndices = Array(addedNodeIndices.reversed())
while let nodeIndex = nodeIndices.popLast() {
if processedIndices.contains(nodeIndex) {
continue
}
// addedNodeIndicesprev
let prevIndices = newCorrectGraph.allowedPrevIndex[nodeIndex, default: IndexSet()].intersection(addedNodeIndices)
//
let diff = prevIndices.subtracting(processedIndices)
guard diff.isEmpty else {
nodeIndices.append(nodeIndex)
nodeIndices.append(contentsOf: diff)
continue
}
processedIndices.insert(nodeIndex)
// root
assert(nodeIndex != 0)
self.update(newCorrectGraph, nodeIndex: nodeIndex)
nodeIndices.append(contentsOf: newCorrectGraph.allowedNextIndex[nodeIndex, default: IndexSet()])
}
}
static func build(input: CorrectGraph) -> Self {
var inputGraph = Self()
// update
var nodeIndices = Array([0])
var processedIndices = IndexSet()
while let nodeIndex = nodeIndices.popLast() {
if processedIndices.contains(nodeIndex) {
continue
}
let prevIndices = input.allowedPrevIndex[nodeIndex, default: IndexSet()]
//
let diff = prevIndices.subtracting(processedIndices)
guard diff.isEmpty else {
nodeIndices.append(nodeIndex)
nodeIndices.append(contentsOf: diff)
continue
}
processedIndices.insert(nodeIndex)
// root
if nodeIndex != 0 {
inputGraph.update(input, nodeIndex: nodeIndex)
} else {
// nextCorrectNodeIndices
inputGraph.nextCorrectNodeIndices[0] = input.allowedNextIndex[0]
}
nodeIndices.append(contentsOf: input.allowedNextIndex[nodeIndex, default: IndexSet()])
}
return inputGraph
}
}