_experimental_additionalの実装にも反映

This commit is contained in:
Miwa / Ensan
2024-03-31 01:35:32 +09:00
parent 199129fd68
commit ac3b8cd4bf
3 changed files with 100 additions and 21 deletions

View File

@ -26,11 +26,13 @@ struct ConvertGraph {
/// prevIndex
var allowedPrevIndex: [Int: IndexSet] = [:]
static func build(input: LookupGraph, nodeIndex2LatticeNode: [Int: [LatticeNode]]) -> Self {
init(input: LookupGraph, nodeIndex2LatticeNode: [Int: [LatticeNode]]) {
let nodes = input.nodes.enumerated().map { (index, node) in
Node(latticeNodes: nodeIndex2LatticeNode[index, default: []], inputElementsRange: node.inputElementsRange, correction: node.correction)
}
return Self(nodes: nodes, allowedNextIndex: input.allowedNextIndex, allowedPrevIndex: input.allowedPrevIndex)
self.nodes = nodes
self.allowedPrevIndex = input.allowedPrevIndex
self.allowedNextIndex = input.allowedNextIndex
}
}

View File

@ -34,6 +34,18 @@ struct LookupGraph {
return Self(nodes: nodes, allowedNextIndex: input.allowedNextIndex, allowedPrevIndex: input.allowedPrevIndex)
}
func nextIndexWithMatch(_ nodeIndex: Int, cacheNodeIndex: Int, cacheGraph: borrowing LookupGraph) -> [(Int, Int?)] {
let seeds: [Int] = Array(self.allowedNextIndex[nodeIndex, default: []])
let cached = cacheGraph.allowedNextIndex[cacheNodeIndex, default: []].map {($0, cacheGraph.nodes[$0])}
return seeds.map { seed in
if let first = cached.first(where: {$0.1.charId == self.nodes[seed].charId}) {
(seed, first.0)
} else {
(seed, nil)
}
}
}
mutating func byfixNodeIndices(in louds: LOUDS, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [Int]]) {
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
@ -95,16 +107,9 @@ struct LookupGraph {
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append(cNodeIndex)
indexSet.insert(loudsNodeIndex)
// next nodes
let cachedNextNodes = cacheLookupGraph.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, cacheLookupGraph.nodes[$0].charId) }
let currentNextNodes = self.allowedNextIndex[cCacheNodeIndex, default: []].map { ($0, self.nodes[$0].charId) }
for currentNextNode in currentNextNodes {
if let item = cachedNextNodes.first(where: {$0.1 == currentNextNode.1}) {
stack.append((currentNextNode.0, item.0, loudsNodeIndex))
} else {
stack.append((currentNextNode.0, nil, loudsNodeIndex))
}
}
stack.append(contentsOf: self.nextIndexWithMatch(cNodeIndex, cacheNodeIndex: cCacheNodeIndex, cacheGraph: cacheLookupGraph).map {
($0.0, $0.1, loudsNodeIndex)
})
}
//
else if let loudsNodeIndex = louds.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
@ -133,7 +138,7 @@ struct LookupGraph {
}
extension DicdataStore {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> ConvertGraph {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> (LookupGraph, ConvertGraph) {
var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
var stack = Array(lookupGraph.allowedNextIndex[0, default: []])
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
@ -178,9 +183,72 @@ extension DicdataStore {
processedIndexSet.insert(graphNodeIndex)
stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []])
}
return ConvertGraph.build(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)
return (lookupGraph, ConvertGraph(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes))
}
func buildConvertGraphDifferential(inputGraph: consuming InputGraph, cacheLookupGraph: LookupGraph, option: ConvertRequestOptions) -> (LookupGraph, ConvertGraph) {
var lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) })
typealias StackItem = (
currentLookupGraphNodeIndex: Int,
cacheLookupGraphNodeIndex: Int?
)
// BOS
// BOS
var stack: [StackItem] = lookupGraph.nextIndexWithMatch(0, cacheNodeIndex: 0, cacheGraph: cacheLookupGraph)
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
var processedIndexSet = IndexSet()
while let (graphNodeIndex, cacheGraphNodeIndex) = stack.popLast() {
//
guard !processedIndexSet.contains(graphNodeIndex) else {
continue
}
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else {
continue
}
/// graphNodeIndex
/// * loudsNodeIndices: loudsloudstxt
/// * loudsNodeIndex2GraphNodeEndIndices: loudsNodeIndexgraphNodeIndex
let (indexSet, loudsNodeIndex2GraphNodeEndIndices) = if let cacheGraphNodeIndex {
lookupGraph.differentialByfixSearch(in: louds, cacheLookupGraph: cacheLookupGraph, graphNodeIndex: (graphNodeIndex, cacheGraphNodeIndex))
} else {
lookupGraph.byfixNodeIndices(in: louds, startGraphNodeIndex: graphNodeIndex)
}
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: indexSet, option: option)
// latticeNodes
var latticeNodes: [ConvertGraph.LatticeNode] = []
for (loudsNodeIndex, dicdata) in dicdataWithIndex {
for endNodeIndex in loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []] {
let inputElementsRange = InputGraphRange(
startIndex: graphNode.inputElementsRange.startIndex,
endIndex: lookupGraph.nodes[endNodeIndex].inputElementsRange.endIndex
)
if graphNode.inputElementsRange.startIndex == 0 {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, nextConvertNodeIndices: lookupGraph.allowedNextIndex[endNodeIndex, default: []], inputElementsRange: inputElementsRange, prevs: [.BOSNode()])
})
} else {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, nextConvertNodeIndices: lookupGraph.allowedNextIndex[endNodeIndex, default: []], inputElementsRange: inputElementsRange)
})
}
}
}
graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes
// index
processedIndexSet.insert(graphNodeIndex)
if let cacheGraphNodeIndex {
stack.append(contentsOf: lookupGraph.nextIndexWithMatch(graphNodeIndex, cacheNodeIndex: cacheGraphNodeIndex, cacheGraph: cacheLookupGraph))
} else {
stack.append(contentsOf: lookupGraph.allowedNextIndex[graphNodeIndex, default: []].map {($0, nil)})
}
}
return (lookupGraph, ConvertGraph(input: lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes))
}
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
// split = 2048
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})

View File

@ -15,6 +15,7 @@ extension Kana2Kanji {
var endNode: ConvertGraph.LatticeNode
var correctGraph: CorrectGraph
var inputGraph: InputGraph
var lookupGraph: LookupGraph
var convertGraph: ConvertGraph
}
func _experimental_all(_ inputData: ComposingText, option: ConvertRequestOptions) -> Result {
@ -24,10 +25,10 @@ extension Kana2Kanji {
let inputGraph = InputGraph.build(input: correctGraph)
// convertGraph
print(#file, "lookup", inputGraph)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: option)
let (lookupGraph, convertGraph) = self.dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: option)
print(#file, "convert")
let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore)
return Result(endNode: result, correctGraph: correctGraph, inputGraph: inputGraph, convertGraph: convertGraph)
return Result(endNode: result, correctGraph: correctGraph, inputGraph: inputGraph, lookupGraph: lookupGraph, convertGraph: convertGraph)
}
func _experimental_additional(
@ -44,13 +45,13 @@ extension Kana2Kanji {
}
// FIXME: inputGraph
let inputGraph = InputGraph.build(input: previousResult.correctGraph)
// TODO:
// convertGraph
print(#file, "lookup", previousResult.inputGraph)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: option)
let (lookupGraph, convertGraph) = self.dicdataStore.buildConvertGraphDifferential(inputGraph: inputGraph, cacheLookupGraph: previousResult.lookupGraph, option: option)
print(#file, "convert")
// TODO:
let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore)
return Result(endNode: result, correctGraph: previousResult.correctGraph, inputGraph: inputGraph, convertGraph: convertGraph)
return Result(endNode: result, correctGraph: previousResult.correctGraph, inputGraph: inputGraph, lookupGraph: lookupGraph, convertGraph: convertGraph)
}
}
@ -81,7 +82,7 @@ final class ExperimentalConversionTests: XCTestCase {
c.insertAtCursorPosition("たいかく", inputStyle: .direct)
let correctGraph = CorrectGraph.build(input: c.input)
let inputGraph = InputGraph.build(input: consume correctGraph)
let convertGraph = dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: requestOptions())
let (_, convertGraph) = dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: requestOptions())
XCTAssertEqual(
convertGraph.nodes.first {
$0.latticeNodes.contains(where: {$0.data.word == ""})
@ -158,7 +159,7 @@ final class ExperimentalConversionTests: XCTestCase {
c.insertAtCursorPosition("youshouki", inputStyle: .roman2kana)
let correctGraph = CorrectGraph.build(input: c.input)
let inputGraph = InputGraph.build(input: consume correctGraph)
let convertGraph = dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: requestOptions())
let (_, convertGraph) = dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: requestOptions())
XCTAssertEqual(
convertGraph.nodes.first {
$0.latticeNodes.contains(where: {$0.data.word == ""})
@ -246,5 +247,13 @@ final class ExperimentalConversionTests: XCTestCase {
)
XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太鼓")) //
XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太古")) //
c.insertAtCursorPosition("", inputStyle: .direct)
let thirdResult = kana2kanji._experimental_additional(
composingText: c,
additionalInputsStartIndex: 3,
previousResult: secondResult,
option: requestOptions()
)
XCTAssertTrue(thirdResult.endNode.joinedPrevs().contains("大国")) //
}
}