Files

280 lines
15 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// LookupGraph.swift
//
//
// Created by miwa on 2024/03/31.
//
import Foundation
@testable import KanaKanjiConverterModule
struct LookupGraph {
struct Node: Equatable {
var character: Character
var charId: UInt8
var inputElementsRange: InputGraphRange
var correction: CorrectGraph.Correction = .none
}
var nodes: [Node] = [
// root node
Node(character: "\0", charId: 0x00, inputElementsRange: .endIndex(0))
]
/// NextIndex
var allowedNextIndex: [Int: IndexSet] = [:]
/// prevIndex
var allowedPrevIndex: [Int: IndexSet] = [:]
/// node indexloudsindex
var loudsNodeIndex: [Int: [Int: Int]] = [:]
static func build(input: consuming InputGraph, character2CharId: (Character) -> UInt8) -> Self {
let nodes = input.nodes.map {
Node(character: $0.character, charId: character2CharId($0.character), inputElementsRange: $0.inputElementsRange, correction: $0.correction)
}
return Self(nodes: nodes, allowedNextIndex: input.allowedNextIndex, allowedPrevIndex: input.allowedPrevIndex)
}
func nextIndexWithMatch(_ nodeIndex: Int, cacheNodeIndex: Int, cacheGraph: borrowing LookupGraph) -> [(Int, Int?)] {
let seeds: [Int] = Array(self.allowedNextIndex[nodeIndex, default: []])
let cached = cacheGraph.allowedNextIndex[cacheNodeIndex, default: []].map {($0, cacheGraph.nodes[$0])}
return seeds.map { seed in
if let first = cached.first(where: {$0.1.charId == self.nodes[seed].charId}) {
(seed, first.0)
} else {
(seed, nil)
}
}
}
mutating func byfixNodeIndices(in louds: LOUDS, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [Int]]) {
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
// loudsLookupGraph
var graphNodeEndIndexToLoudsNodeIndex: [Int: Int] = [:]
typealias SearchItem = (
nodeIndex: Int,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(startGraphNodeIndex, 1)]
while let (cNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = self.nodes[cNodeIndex]
// nextNodes
if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (index, loudsNodeIndex)
})
} else {
continue
}
}
self.loudsNodeIndex[startGraphNodeIndex] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
mutating func differentialByfixSearch(
in louds: LOUDS,
cacheLookupGraph: LookupGraph,
graphNodeIndex: (start: Int, cache: Int),
lookupGraphMatch: inout [Int: Int]
) -> (IndexSet, [Int: [Int]]) {
guard var graphNodeEndIndexToLoudsNodeIndex = cacheLookupGraph.loudsNodeIndex[graphNodeIndex.cache] else {
return self.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex.start)
}
// lookupGraph.current.nodes[graphNodeIndex.start]lookupGraph.cache.nodes[graphNodeIndex.cache]
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
typealias SearchItem = (
nodeIndex: Int,
/// cachenodeIndexnilnil
cacheNodeIndex: Int?,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(graphNodeIndex.start, graphNodeIndex.cache, 1)]
while let (cNodeIndex, cCacheNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = self.nodes[cNodeIndex]
if let cCacheNodeIndex, let loudsNodeIndex = graphNodeEndIndexToLoudsNodeIndex[cCacheNodeIndex] {
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
// next nodes
stack.append(contentsOf: self.nextIndexWithMatch(cNodeIndex, cacheNodeIndex: cCacheNodeIndex, cacheGraph: cacheLookupGraph).map {
($0.0, $0.1, loudsNodeIndex)
})
//
lookupGraphMatch[cNodeIndex] = cCacheNodeIndex
}
//
else if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (index, nil, loudsNodeIndex)
})
}
}
self.loudsNodeIndex[graphNodeIndex.start] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
}
extension DicdataStore {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> (LookupGraph, ConvertGraph) {
var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
var stack = Array(lookupGraph.allowedNextIndex[0, default: []])
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
var processedIndexSet = IndexSet()
while let graphNodeIndex = stack.popLast() {
//
guard !processedIndexSet.contains(graphNodeIndex) else {
continue
}
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else {
continue
}
/// graphNodeIndex
/// * loudsNodeIndices: loudsloudstxt
/// * loudsNodeIndex2GraphNodeEndIndices: loudsNodeIndexgraphNodeIndex
let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex)
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option)
// latticeNodes
var latticeNodes: [ConvertGraph.LatticeNode] = []
for (loudsNodeIndex, dicdata) in dicdataWithIndex {
for endNodeIndex in loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []] {
let inputElementsRange = InputGraphRange(
startIndex: graphNode.inputElementsRange.startIndex,
endIndex: lookupGraph.nodes[endNodeIndex].inputElementsRange.endIndex
)
if graphNode.inputElementsRange.startIndex == 0 {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange, prevs: [.BOSNode()])
})
} else {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange)
})
}
}
}
graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes
// index
processedIndexSet.insert(graphNodeIndex)
stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []])
}
return (lookupGraph, ConvertGraph(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes))
}
func buildConvertGraphDifferential(
inputGraph: consuming InputGraph,
cacheLookupGraph: LookupGraph,
option: ConvertRequestOptions
) -> (
lookupGraph: LookupGraph,
convertGraph: ConvertGraph,
lookupGraphMatch: [Int: Int]
) {
var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
typealias StackItem = (
currentLookupGraphNodeIndex: Int,
cacheLookupGraphNodeIndex: Int?
)
// BOS
// BOS
var stack: [StackItem] = lookupGraph.nextIndexWithMatch(0, cacheNodeIndex: 0, cacheGraph: cacheLookupGraph)
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
var processedIndexSet = IndexSet()
var lookupGraphMatch: [Int: Int] = [:]
while let (graphNodeIndex, cacheGraphNodeIndex) = stack.popLast() {
//
guard !processedIndexSet.contains(graphNodeIndex) else {
continue
}
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(query: String(graphNode.character.toKatakana())) else {
continue
}
/// graphNodeIndex
/// * loudsNodeIndices: loudsloudstxt
/// * loudsNodeIndex2GraphNodeEndIndices: loudsNodeIndexgraphNodeIndex
let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = if let cacheGraphNodeIndex {
lookupGraph.differentialByfixSearch(in: louds, cacheLookupGraph: cacheLookupGraph, graphNodeIndex: (graphNodeIndex, cacheGraphNodeIndex), lookupGraphMatch: &lookupGraphMatch)
} else {
lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex)
}
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option)
// latticeNodes
var latticeNodes: [ConvertGraph.LatticeNode] = []
for (loudsNodeIndex, dicdata) in dicdataWithIndex {
for endNodeIndex in loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []] {
let inputElementsRange = InputGraphRange(
startIndex: graphNode.inputElementsRange.startIndex,
endIndex: lookupGraph.nodes[endNodeIndex].inputElementsRange.endIndex
)
if graphNode.inputElementsRange.startIndex == 0 {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange, prevs: [.BOSNode()])
})
} else {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, endNodeIndex: endNodeIndex, inputElementsRange: inputElementsRange)
})
}
}
}
graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes
// index
processedIndexSet.insert(graphNodeIndex)
if let cacheGraphNodeIndex {
stack.append(contentsOf: lookupGraph.nextIndexWithMatch(graphNodeIndex, cacheNodeIndex: cacheGraphNodeIndex, cacheGraph: cacheLookupGraph))
} else {
stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []].map {($0, nil)})
}
}
return (lookupGraph, ConvertGraph(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes), lookupGraphMatch)
}
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
// split = 2048
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for (key, value) in dict {
// FIXME: use local option
// trueIndexkeyIndexsplit-1=2047&
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {(trueIndex: $0, keyIndex: $0 & 2047)}, option: option))
}
return data
}
}