LookupGraphの差分構築を実装

This commit is contained in:
Miwa / Ensan
2024-03-31 00:08:54 +09:00
parent fdbaabb83c
commit 5714b320da

View File

@ -25,6 +25,9 @@ struct LookupGraph {
var allowedNextIndex: [Int: IndexSet] = [:]
/// prevIndex
var allowedPrevIndex: [Int: IndexSet] = [:]
/// node indexloudsindex
var loudsNodeIndex: [Int: [Int: Int]] = [:]
static func build(input: consuming InputGraph, character2CharId: (Character) -> UInt8) -> Self {
let nodes = input.nodes.map {
@ -32,27 +35,28 @@ struct LookupGraph {
}
return Self(nodes: nodes, allowedNextIndex: input.allowedNextIndex, allowedPrevIndex: input.allowedPrevIndex)
}
}
extension LOUDS {
func byfixNodeIndices(_ lookupGraph: LookupGraph, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [Int]]) {
mutating func byfixNodeIndices(in louds: LOUDS, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [Int]]) {
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
// loudsLookupGraph
var graphNodeEndIndexToLoudsNodeIndex: [Int: Int] = [:]
typealias SearchItem = (
nodeIndex: Int,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(startGraphNodeIndex, 1)]
while let (cNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = lookupGraph.nodes[cNodeIndex]
let cNode = self.nodes[cNodeIndex]
// nextNodes
if let loudsNodeIndex = self.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = lookupGraph.allowedNextIndex[cNodeIndex, default: IndexSet()]
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = lookupGraph.nodes[index]
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
@ -67,6 +71,64 @@ extension LOUDS {
continue
}
}
self.loudsNodeIndex[startGraphNodeIndex] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
mutating func differentialByfixSearch(in louds: LOUDS, cacheLookupGraph: LookupGraph, graphNodeIndex: (start: Int, cache: Int)) -> (IndexSet, [Int: [Int]]) {
guard var graphNodeEndIndexToLoudsNodeIndex = cacheLookupGraph.loudsNodeIndex[graphNodeIndex.cache] else {
return self.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex.start)
}
// lookupGraph.current.nodes[graphNodeIndex.start]lookupGraph.cache.nodes[graphNodeIndex.cache]
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [Int]] = [:]
typealias SearchItem = (
nodeIndex: Int,
/// cachenodeIndexnilnil
cacheNodeIndex: Int?,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(graphNodeIndex.start, graphNodeIndex.cache, 1)]
while let (cNodeIndex, cCacheNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
let cNode = self.nodes[cNodeIndex]
if let cCacheNodeIndex, let loudsNodeIndex = graphNodeEndIndexToLoudsNodeIndex[cCacheNodeIndex] {
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
// next nodes
let cachedNextNodes = cacheLookupGraph.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, cacheLookupGraph.nodes[$0].charId) }
let currentNextNodes = self.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, self.nodes[$0].charId) }
for currentNextNode in currentNextNodes {
if let item = cachedNextNodes.first(where: {$0.1 == currentNextNode.1}) {
stack.append((currentNextNode.0, item.0, loudsNodeIndex))
} else {
stack.append((currentNextNode.0, nil, loudsNodeIndex))
}
}
}
//
else if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
graphNodeEndIndexToLoudsNodeIndex[cNodeIndex] = loudsNodeIndex
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
let nextIndices = self.allowedNextIndex[cNodeIndex, default: IndexSet()]
stack.append(contentsOf: nextIndices.compactMap { index in
let node = self.nodes[index]
// endIndex
// endIndex調
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (index, nil, loudsNodeIndex)
})
}
}
self.loudsNodeIndex[graphNodeIndex.start] = graphNodeEndIndexToLoudsNodeIndex
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
@ -74,7 +136,7 @@ extension LOUDS {
extension DicdataStore {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> ConvertGraph {
let lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
var stack = Array(lookupGraph.allowedNextIndex[0, default: []])
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
var processedIndexSet = IndexSet()
@ -90,8 +152,8 @@ extension DicdataStore {
/// graphNodeIndex
/// * loudsNodeIndices: loudsloudstxt
/// * loudsNodeIndex2GraphNodeEndIndices: loudsNodeIndexgraphNodeIndex
let (loudsNodeIndices, loudsNodeIndex2GraphNodeEndIndices) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: graphNodeIndex)
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: loudsNodeIndices, option: option)
let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex)
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option)
// latticeNodes
var latticeNodes: [ConvertGraph.LatticeNode] = []
@ -118,7 +180,7 @@ extension DicdataStore {
processedIndexSet.insert(graphNodeIndex)
stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []])
}
return ConvertGraph.build(input: consume lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)
return ConvertGraph.build(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)
}
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
@ -157,10 +219,10 @@ final class LookupGraphTests: XCTestCase {
.init(character: "", inputStyle: .direct)
])
let inputGraph = InputGraph.build(input: correctGraph)
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
let startNodeIndex = inputGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
@ -198,10 +260,10 @@ final class LookupGraphTests: XCTestCase {
.init(character: "", inputStyle: .direct)
])
let inputGraph = InputGraph.build(input: correctGraph)
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
@ -231,10 +293,10 @@ final class LookupGraphTests: XCTestCase {
.init(character: "", inputStyle: .direct)
])
let inputGraph = InputGraph.build(input: correctGraph)
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
@ -273,10 +335,10 @@ final class LookupGraphTests: XCTestCase {
.init(character: "i", inputStyle: .roman2kana)
])
let inputGraph = InputGraph.build(input: correctGraph)
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
@ -310,10 +372,10 @@ final class LookupGraphTests: XCTestCase {
.init(character: "i", inputStyle: .roman2kana)
])
let inputGraph = InputGraph.build(input: correctGraph)
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
var lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: values.character2CharId)
let startNodeIndex = lookupGraph.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let (loudsNodeIndices, _) = lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
@ -333,4 +395,98 @@ final class LookupGraphTests: XCTestCase {
[]
)
}
func testByfixNodeIndices_たいか_add_く() throws {
let values = setup()
guard let louds = LOUDS.load("", option: requestOptions()) else {
XCTFail()
return
}
let correctGraph1 = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
])
let inputGraph1 = InputGraph.build(input: correctGraph1)
var lookupGraph1 = LookupGraph.build(input: inputGraph1, character2CharId: values.character2CharId)
let startNodeIndex1 = lookupGraph1.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph1.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex1)
_ = lookupGraph1.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex1 ?? 0)
let correctGraph2 = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct) // added
])
let inputGraph2 = InputGraph.build(input: correctGraph2)
var lookupGraph2 = LookupGraph.build(input: inputGraph2, character2CharId: values.character2CharId)
let startNodeIndex2 = lookupGraph2.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph2.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex2)
let (loudsNodeIndices2, _) = lookupGraph2.differentialByfixSearch(in: louds, cacheLookupGraph: lookupGraph1, graphNodeIndex: (startNodeIndex2 ?? 0, startNodeIndex1 ?? 0))
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices2, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "タイ"})
XCTAssertTrue(dicdata.contains {$0.word == "他意"})
//
XCTAssertTrue(dicdata.contains {$0.word == "対価"})
//
XCTAssertTrue(dicdata.contains {$0.word == "大河"})
//
XCTAssertTrue(dicdata.contains {$0.word == "体格"})
//
XCTAssertTrue(dicdata.contains {$0.word == "退学"})
// all keys
XCTAssertEqual(
dicdata.mapSet {$0.ruby}.symmetricDifference(["", "タイ", "タイカ", "タイガ", "タイカク", "タイガク"]),
[]
)
}
func testByfixNodeIndices_たいか_remove_く() throws {
let values = setup()
guard let louds = LOUDS.load("", option: requestOptions()) else {
XCTFail()
return
}
let correctGraph1 = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
])
let inputGraph1 = InputGraph.build(input: correctGraph1)
var lookupGraph1 = LookupGraph.build(input: inputGraph1, character2CharId: values.character2CharId)
let startNodeIndex1 = lookupGraph1.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph1.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex1)
_ = lookupGraph1.byfixNodeIndices(in: louds, startGraphNodeIndex: startNodeIndex1 ?? 0)
let correctGraph2 = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
])
let inputGraph2 = InputGraph.build(input: correctGraph2)
var lookupGraph2 = LookupGraph.build(input: inputGraph2, character2CharId: values.character2CharId)
let startNodeIndex2 = lookupGraph2.allowedNextIndex[0, default: IndexSet()].first(where: { lookupGraph2.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex2)
let (loudsNodeIndices2, _) = lookupGraph2.differentialByfixSearch(in: louds, cacheLookupGraph: lookupGraph1, graphNodeIndex: (startNodeIndex2 ?? 0, startNodeIndex1 ?? 0))
let dicdataWithIndex = values.dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices2, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "タイ"})
XCTAssertTrue(dicdata.contains {$0.word == "他意"})
//
XCTAssertFalse(dicdata.contains {$0.ruby == "タイカ"})
//
XCTAssertFalse(dicdata.contains {$0.ruby == "タイガ"})
// all keys
XCTAssertEqual(
dicdata.mapSet {$0.ruby}.symmetricDifference(["", "タイ"]),
[]
)
}
}