split files

This commit is contained in:
Miwa / Ensan
2024-03-31 00:10:38 +09:00
parent 5714b320da
commit 35675207a3
2 changed files with 197 additions and 187 deletions

View File

@ -0,0 +1,197 @@
//
// LookupGraph.swift
//
//
// Created by miwa on 2024/03/31.
//
import Foundation
@testable import KanaKanjiConverterModule
struct LookupGraph {
struct Node: Equatable {
var character: Character
var charId: UInt8
var inputElementsRange: InputGraphRange
var correction: CorrectGraph.Correction = .none
}
var nodes: [Node] = [
// root node
Node(character: "\0", charId: 0x00, inputElementsRange: .endIndex(0))
]
/// NextIndex
var allowedNextIndex: [Int: IndexSet] = [:]
/// prevIndex
var allowedPrevIndex: [Int: IndexSet] = [:]
/// node indexloudsindex
var loudsNodeIndex: [Int: [Int: Int]] = [:]
static func build(input: consuming InputGraph, character2CharId: (Character) -> UInt8) -> Self {
let nodes = input.nodes.map {
Node(character: $0.character, charId: character2CharId($0.character), inputElementsRange: $0.inputElementsRange, correction: $0.correction)
}
return Self(nodes: nodes, allowedNextIndex: input.allowedNextIndex, allowedPrevIndex: input.allowedPrevIndex)
}
mutating func byfixNodeIndices(in louds: LOUDS, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [Int]]) {
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
// loudsLookupGraph
var graphNodeEndIndexToLoudsNodeIndex: [Int: Int] = [:]
typealias SearchItem = (
nodeIndex: Int,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(startGraphNodeIndex, 1)]
while let (cNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = self.nodes[cNodeIndex]
// nextNodes
if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (index, loudsNodeIndex)
})
} else {
continue
}
}
self.loudsNodeIndex[startGraphNodeIndex] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
mutating func differentialByfixSearch(in louds: LOUDS, cacheLookupGraph: LookupGraph, graphNodeIndex: (start: Int, cache: Int)) -> (IndexSet, [Int: [Int]]) {
guard var graphNodeEndIndexToLoudsNodeIndex = cacheLookupGraph.loudsNodeIndex[graphNodeIndex.cache] else {
return self.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex.start)
}
// lookupGraph.current.nodes[graphNodeIndex.start]lookupGraph.cache.nodes[graphNodeIndex.cache]
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
typealias SearchItem = (
nodeIndex: Int,
/// cachenodeIndexnilnil
cacheNodeIndex: Int?,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(graphNodeIndex.start, graphNodeIndex.cache, 1)]
while let (cNodeIndex, cCacheNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = self.nodes[cNodeIndex]
if let cCacheNodeIndex, let loudsNodeIndex = graphNodeEndIndexToLoudsNodeIndex[cCacheNodeIndex] {
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
// next nodes
let cachedNextNodes = cacheLookupGraph.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, cacheLookupGraph.nodes[$0].charId) }
let currentNextNodes = self.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, self.nodes[$0].charId) }
for currentNextNode in currentNextNodes {
if let item = cachedNextNodes.first(where: {$0.1 == currentNextNode.1}) {
stack.append((currentNextNode.0, item.0, loudsNodeIndex))
} else {
stack.append((currentNextNode.0, nil, loudsNodeIndex))
}
}
}
//
else if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (index, nil, loudsNodeIndex)
})
}
}
self.loudsNodeIndex[graphNodeIndex.start] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
}
extension DicdataStore {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> ConvertGraph {
var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
var stack = Array(lookupGraph.allowedNextIndex[0, default: []])
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
var processedIndexSet = IndexSet()
while let graphNodeIndex = stack.popLast() {
//
guard !processedIndexSet.contains(graphNodeIndex) else {
continue
}
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else {
continue
}
/// graphNodeIndex
/// * loudsNodeIndices: loudsloudstxt
/// * loudsNodeIndex2GraphNodeEndIndices: loudsNodeIndexgraphNodeIndex
let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex)
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option)
// latticeNodes
var latticeNodes: [ConvertGraph.LatticeNode] = []
for (loudsNodeIndex, dicdata) in dicdataWithIndex {
for endNodeIndex in loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []] {
let inputElementsRange = InputGraphRange(
startIndex: graphNode.inputElementsRange.startIndex,
endIndex: lookupGraph.nodes[endNodeIndex].inputElementsRange.endIndex
)
if graphNode.inputElementsRange.startIndex == 0 {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, nextConvertNodeIndices: lookupGraph.allowedNextIndex[endNodeIndex, default: []], inputElementsRange: inputElementsRange, prevs: [.BOSNode()])
})
} else {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, nextConvertNodeIndices: lookupGraph.allowedNextIndex[endNodeIndex, default: []], inputElementsRange: inputElementsRange)
})
}
}
}
graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes
// index
processedIndexSet.insert(graphNodeIndex)
stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []])
}
return ConvertGraph.build(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)
}
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
// split = 2048
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for (key, value) in dict {
// FIXME: use local option
// trueIndexkeyIndexsplit-1=2047&
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {(trueIndex: $0, keyIndex: $0 & 2047)}, option: option))
}
return data
}
}

View File

@ -9,193 +9,6 @@ import XCTest
import Foundation
@testable import KanaKanjiConverterModule
struct LookupGraph {
struct Node: Equatable {
var character: Character
var charId: UInt8
var inputElementsRange: InputGraphRange
var correction: CorrectGraph.Correction = .none
}
var nodes: [Node] = [
// root node
Node(character: "\0", charId: 0x00, inputElementsRange: .endIndex(0))
]
/// NextIndex
var allowedNextIndex: [Int: IndexSet] = [:]
/// prevIndex
var allowedPrevIndex: [Int: IndexSet] = [:]
/// node indexloudsindex
var loudsNodeIndex: [Int: [Int: Int]] = [:]
static func build(input: consuming InputGraph, character2CharId: (Character) -> UInt8) -> Self {
let nodes = input.nodes.map {
Node(character: $0.character, charId: character2CharId($0.character), inputElementsRange: $0.inputElementsRange, correction: $0.correction)
}
return Self(nodes: nodes, allowedNextIndex: input.allowedNextIndex, allowedPrevIndex: input.allowedPrevIndex)
}
mutating func byfixNodeIndices(in louds: LOUDS, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [Int]]) {
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
// loudsLookupGraph
var graphNodeEndIndexToLoudsNodeIndex: [Int: Int] = [:]
typealias SearchItem = (
nodeIndex: Int,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(startGraphNodeIndex, 1)]
while let (cNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = self.nodes[cNodeIndex]
// nextNodes
if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (index, loudsNodeIndex)
})
} else {
continue
}
}
self.loudsNodeIndex[startGraphNodeIndex] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
mutating func differentialByfixSearch(in louds: LOUDS, cacheLookupGraph: LookupGraph, graphNodeIndex: (start: Int, cache: Int)) -> (IndexSet, [Int: [Int]]) {
guard var graphNodeEndIndexToLoudsNodeIndex = cacheLookupGraph.loudsNodeIndex[graphNodeIndex.cache] else {
return self.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex.start)
}
// lookupGraph.current.nodes[graphNodeIndex.start]lookupGraph.cache.nodes[graphNodeIndex.cache]
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
typealias SearchItem = (
nodeIndex: Int,
/// cachenodeIndexnilnil
cacheNodeIndex: Int?,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(graphNodeIndex.start, graphNodeIndex.cache, 1)]
while let (cNodeIndex, cCacheNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = self.nodes[cNodeIndex]
if let cCacheNodeIndex, let loudsNodeIndex = graphNodeEndIndexToLoudsNodeIndex[cCacheNodeIndex] {
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
// next nodes
let cachedNextNodes = cacheLookupGraph.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, cacheLookupGraph.nodes[$0].charId) }
let currentNextNodes = self.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, self.nodes[$0].charId) }
for currentNextNode in currentNextNodes {
if let item = cachedNextNodes.first(where: {$0.1 == currentNextNode.1}) {
stack.append((currentNextNode.0, item.0, loudsNodeIndex))
} else {
stack.append((currentNextNode.0, nil, loudsNodeIndex))
}
}
}
//
else if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (index, nil, loudsNodeIndex)
})
}
}
self.loudsNodeIndex[graphNodeIndex.start] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
}
extension DicdataStore {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> ConvertGraph {
var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
var stack = Array(lookupGraph.allowedNextIndex[0, default: []])
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
var processedIndexSet = IndexSet()
while let graphNodeIndex = stack.popLast() {
//
guard !processedIndexSet.contains(graphNodeIndex) else {
continue
}
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else {
continue
}
/// graphNodeIndex
/// * loudsNodeIndices: loudsloudstxt
/// * loudsNodeIndex2GraphNodeEndIndices: loudsNodeIndexgraphNodeIndex
let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex)
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option)
// latticeNodes
var latticeNodes: [ConvertGraph.LatticeNode] = []
for (loudsNodeIndex, dicdata) in dicdataWithIndex {
for endNodeIndex in loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []] {
let inputElementsRange = InputGraphRange(
startIndex: graphNode.inputElementsRange.startIndex,
endIndex: lookupGraph.nodes[endNodeIndex].inputElementsRange.endIndex
)
if graphNode.inputElementsRange.startIndex == 0 {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, nextConvertNodeIndices: lookupGraph.allowedNextIndex[endNodeIndex, default: []], inputElementsRange: inputElementsRange, prevs: [.BOSNode()])
})
} else {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, nextConvertNodeIndices: lookupGraph.allowedNextIndex[endNodeIndex, default: []], inputElementsRange: inputElementsRange)
})
}
}
}
graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes
// index
processedIndexSet.insert(graphNodeIndex)
stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []])
}
return ConvertGraph.build(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)
}
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
// split = 2048
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for (key, value) in dict {
// FIXME: use local option
// trueIndexkeyIndexsplit-1=2047&
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {(trueIndex: $0, keyIndex: $0 & 2047)}, option: option))
}
return data
}
}
final class LookupGraphTests: XCTestCase {
func requestOptions() -> ConvertRequestOptions {
.withDefaultDictionary(requireJapanesePrediction: false, requireEnglishPrediction: false, keyboardLanguage: .ja_JP, learningType: .nothing, memoryDirectoryURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""), metadata: .init(appVersionString: "Test"))