giving up differential input graph construction

This commit is contained in:
Miwa / Ensan
2024-03-17 18:32:44 +09:00
parent e9142810cf
commit a084e86033
5 changed files with 175 additions and 61 deletions

View File

@ -56,10 +56,11 @@ struct CorrectGraph {
return index
}
mutating func insertConnectedTypoNodes(values: [Character], startIndex: Int, endIndex: Int, inputStyle: InputGraphInputStyle.ID, lastIndexSet: IndexSet) -> Int {
private mutating func insertConnectedTypoNodes(values: [Character], startIndex: Int, endIndex: Int, inputStyle: InputGraphInputStyle.ID, lastIndexSet: IndexSet) -> (lastIndex: Int, insertedIndexSet: IndexSet) {
guard !values.isEmpty else {
fatalError("values must not be empty")
}
var insertedIndexSet = IndexSet()
var lastIndexSet = lastIndexSet
for (i, c) in zip(values.indices, values) {
let inputElementRange: InputGraphRange = if i == values.startIndex && i+1 == values.endIndex {
@ -77,12 +78,16 @@ struct CorrectGraph {
correction: .typo,
value: c
)
lastIndexSet = IndexSet(integer: self.insert(node, nextTo: lastIndexSet))
let nodeIndex = self.insert(node, nextTo: lastIndexSet)
lastIndexSet = IndexSet(integer: nodeIndex)
insertedIndexSet.insert(nodeIndex)
}
return lastIndexSet.first!
return (lastIndexSet.first!, insertedIndexSet)
}
mutating func update(with item: ComposingText.InputElement, index: Int, input: [ComposingText.InputElement]) {
@discardableResult
mutating func update(with item: ComposingText.InputElement, index: Int, input: [ComposingText.InputElement]) -> IndexSet {
var insertedIndexSet = IndexSet()
//
do {
let nodeIndex = self.insert(
@ -95,6 +100,7 @@ struct CorrectGraph {
nextTo: self.inputIndexToEndNodeIndices[index, default: IndexSet()]
)
self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex)
insertedIndexSet.insert(nodeIndex)
}
//
@ -123,7 +129,7 @@ struct CorrectGraph {
if value.isEmpty {
continue
} else if value.count > 1 {
let nodeIndex = self.insertConnectedTypoNodes(
let (nodeIndex, indexSet) = self.insertConnectedTypoNodes(
values: Array(value),
startIndex: index - cRouteCount + 1,
endIndex: index + 1,
@ -131,6 +137,7 @@ struct CorrectGraph {
lastIndexSet: self.inputIndexToEndNodeIndices[index - cRouteCount + 1, default: IndexSet()]
)
self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex)
insertedIndexSet.formUnion(indexSet)
} else {
let nodeIndex = self.insert(
Node(
@ -142,10 +149,12 @@ struct CorrectGraph {
nextTo: self.inputIndexToEndNodeIndices[index - cRouteCount + 1, default: IndexSet()]
)
self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex)
insertedIndexSet.insert(nodeIndex)
}
}
}
}
return insertedIndexSet
}
static func build(input: [ComposingText.InputElement]) -> Self {

View File

@ -38,47 +38,56 @@ enum CorrectSuffixTree {
"s": Node([
"g": .terminal("ga"),
"m": .terminal("ma"),
"t": .terminal("ta")
"t": .terminal("ta"),
"y": .terminal("ya")
]),
"q": Node([
"g": .terminal("ga"),
"m": .terminal("ma"),
"t": .terminal("ta")
"t": .terminal("ta"),
"y": .terminal("ya")
]),
"d": Node([
"g": .terminal("ge"),
"m": .terminal("me"),
"t": .terminal("te")
"t": .terminal("te"),
"y": .terminal("ya")
]),
"r": Node([
"g": .terminal("ge"),
"m": .terminal("me"),
"t": .terminal("te")
"t": .terminal("te"),
"y": .terminal("ya")
]),
"w": Node([
"g": .terminal("ge"),
"m": .terminal("me"),
"t": .terminal("te")
"t": .terminal("te"),
"y": .terminal("ya")
]),
"k": Node([
"g": .terminal("gi"),
"m": .terminal("mi"),
"t": .terminal("ti")
"t": .terminal("ti"),
"y": .terminal("ya")
]),
"l": Node([
"g": .terminal("go"),
"m": .terminal("mo"),
"t": .terminal("to")
"t": .terminal("to"),
"y": .terminal("ya")
]),
"p": Node([
"g": .terminal("go"),
"m": .terminal("mo"),
"t": .terminal("to")
"t": .terminal("to"),
"y": .terminal("ya")
]),
"j": Node([
"g": .terminal("gu"),
"m": .terminal("mu"),
"t": .terminal("tu")
"t": .terminal("tu"),
"y": .terminal("ya")
])
])
}()

View File

@ -149,7 +149,6 @@ struct InputGraph {
for i in prevIndices {
// firstIndexreplacement
self.allowedNextIndex[i, default: IndexSet()].insert(firstIndex)
self.allowedNextIndex[i, default: IndexSet()].remove(replacement.route[0])
}
//
for i in firstIndex ..< lastIndex {
@ -194,13 +193,40 @@ struct InputGraph {
return newGraph
}
///
/// - warning: 使
mutating func _applyAdditionalCorrectGraph(_ newCorrectGraph: CorrectGraph, addedNodeIndices: IndexSet) {
// InputGraph
// ex. tscorrectGraphta
var processedIndices = IndexSet()
var nodeIndices = Array(addedNodeIndices.reversed())
while let nodeIndex = nodeIndices.popLast() {
if processedIndices.contains(nodeIndex) {
continue
}
// addedNodeIndicesprev
let prevIndices = newCorrectGraph.allowedPrevIndex[nodeIndex, default: IndexSet()].intersection(addedNodeIndices)
//
let diff = prevIndices.subtracting(processedIndices)
guard diff.isEmpty else {
nodeIndices.append(nodeIndex)
nodeIndices.append(contentsOf: diff)
continue
}
processedIndices.insert(nodeIndex)
// root
assert(nodeIndex != 0)
self.update(newCorrectGraph, nodeIndex: nodeIndex)
nodeIndices.append(contentsOf: newCorrectGraph.allowedNextIndex[nodeIndex, default: IndexSet()])
}
}
static func build(input: CorrectGraph) -> Self {
var inputGraph = Self()
// update
var nodeIndices = Array([0])
var processedIndices = IndexSet()
while let nodeIndex = nodeIndices.popLast() {
print("build", input.nodes[nodeIndex].value)
if processedIndices.contains(nodeIndex) {
continue
}

View File

@ -128,9 +128,9 @@ final class InputGraphTests: XCTestCase {
inputGraph.nodes.first(where: {$0.character == "s"}),
.init(character: "s", inputElementsRange: .range(2, 3), correction: .none)
)
//
XCTAssertNil(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .startIndex(1)})
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .startIndex(1)}),
.init(character: "t", inputElementsRange: .startIndex(1), correction: .typo)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == ""}),
@ -149,14 +149,17 @@ final class InputGraphTests: XCTestCase {
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .range(0, 1), correction: .none)
)
XCTAssertNil(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(1, 2)})
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(1, 2)}),
.init(character: "t", inputElementsRange: .range(1, 2), correction: .none)
)
XCTAssertNil(
inputGraph.nodes.first(where: {$0.character == "s"})
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "s"}),
.init(character: "s", inputElementsRange: .range(2, 3), correction: .none)
)
XCTAssertNil(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .startIndex(1)})
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .startIndex(1)}),
.init(character: "t", inputElementsRange: .startIndex(1), correction: .typo)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == ""}),
@ -222,8 +225,13 @@ final class InputGraphTests: XCTestCase {
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .startIndex(0), correction: .none)
)
XCTAssertNil(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(0, 1)})
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(0, 1)}),
.init(character: "t", inputElementsRange: .range(0, 1), correction: .none)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .range(1, 2)}),
.init(character: "t", inputElementsRange: .range(1, 2), correction: .none)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .endIndex(2)}),
@ -245,6 +253,8 @@ final class InputGraphTests: XCTestCase {
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .endIndex(3), correction: .none)
)
// [t(1)t(2) t(3)]t(2)a
XCTAssertEqual(inputGraph.nodes.filter({$0.character == ""}).count, 1)
}
func testBuildSimpleRoman2KanaInput_3文字_nta() throws {
let correctGraph = CorrectGraph.build(input: [
@ -333,6 +343,42 @@ final class InputGraphTests: XCTestCase {
)
}
func testBuildSimpleRoman2KanaInput_4文字_tysa() throws {
// /ty
let correctGraph = CorrectGraph.build(input: [
.init(character: "t", inputStyle: .roman2kana),
.init(character: "y", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana)
])
// clean
let inputGraph = InputGraph.build(input: correctGraph).clean()
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "t"}),
.init(character: "t", inputElementsRange: .range(0, 1), correction: .none)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "y" && !$0.correction.isTypo}),
.init(character: "y", inputElementsRange: .range(1, 2), correction: .none)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .range(2, 4), correction: .none)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .startIndex(0), correction: .typo)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == "" && $0.correction == .typo}),
.init(character: "", inputElementsRange: .endIndex(3), correction: .typo)
)
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .range(3, 4), correction: .none)
)
}
func testBuildMixedInput_2文字_ts() throws {
let correctGraph = CorrectGraph.build(input: [
.init(character: "t", inputStyle: .roman2kana),

View File

@ -11,41 +11,46 @@ import Foundation
import XCTest
extension Kana2Kanji {
func _experimental_all(_ inputData: ComposingText, option: ConvertRequestOptions) -> ConvertGraph.LatticeNode {
struct Result {
var endNode: ConvertGraph.LatticeNode
var correctGraph: CorrectGraph
var inputGraph: InputGraph
var convertGraph: ConvertGraph
}
func _experimental_all(_ inputData: ComposingText, option: ConvertRequestOptions) -> Result {
//
print(#file, "start")
let correctGraph = CorrectGraph.build(input: inputData.input)
let inputGraph = InputGraph.build(input: consume correctGraph)
let inputGraph = InputGraph.build(input: correctGraph)
// convertGraph
print(#file, "lookup", inputGraph)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: consume inputGraph, option: option)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: inputGraph, option: option)
print(#file, "convert")
let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore)
return result
return Result(endNode: result, correctGraph: correctGraph, inputGraph: inputGraph, convertGraph: convertGraph)
}
func _experimental_additional(
composingText: ComposingText,
additionalInputsStartIndex: Int,
previousCorrectGraph: consuming CorrectGraph,
previousInputGraph: consuming InputGraph,
previousLookupGraph: consuming LookupGraph,
previousConvertGraph: consuming ConvertGraph,
previousResult: consuming Result,
option: ConvertRequestOptions
) -> ConvertGraph.LatticeNode {
) -> Result {
//
print(#file, "start")
var insertedIndexSet = IndexSet()
for i in additionalInputsStartIndex ..< composingText.input.endIndex {
previousCorrectGraph.update(with: composingText.input[i], index: i, input: composingText.input)
insertedIndexSet.formUnion(previousResult.correctGraph.update(with: composingText.input[i], index: i, input: composingText.input))
}
// FIXME: inputGraph
let inputGraph = InputGraph.build(input: previousResult.correctGraph)
// TODO:
let inputGraph = InputGraph.build(input: consume previousCorrectGraph)
// convertGraph
print(#file, "lookup", inputGraph)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: consume inputGraph, option: option)
print(#file, "lookup", previousResult.inputGraph)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: previousResult.inputGraph, option: option)
print(#file, "convert")
let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore)
return result
return Result(endNode: result, correctGraph: previousResult.correctGraph, inputGraph: previousResult.inputGraph, convertGraph: convertGraph)
}
}
@ -92,8 +97,8 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("たい", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("タイ")) //
XCTAssertTrue(result.joinedPrevs().contains("")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("タイ")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("")) //
}
func testConversion_いか() throws {
@ -102,9 +107,9 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("いか", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("以下")) //
XCTAssertTrue(result.joinedPrevs().contains("伊賀")) //
print(result.joinedPrevs())
XCTAssertTrue(result.endNode.joinedPrevs().contains("以下")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("伊賀")) //
print(result.endNode.joinedPrevs())
}
func testConversion_たいか() throws {
@ -113,10 +118,10 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("たいか", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("対価")) //
XCTAssertTrue(result.joinedPrevs().contains("大河")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("対価")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("大河")) //
// FIXME:
print(result.joinedPrevs())
print(result.endNode.joinedPrevs())
}
func testConversion_たいかく() throws {
@ -125,8 +130,8 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("たいかく", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("体格")) //
XCTAssertTrue(result.joinedPrevs().contains("退学")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("体格")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("退学")) //
}
func testConversion_むらさき() throws {
@ -135,7 +140,7 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("むらさき", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("")) //
}
func testBuildConvertGraph_youshouki() throws {
@ -160,7 +165,7 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("youshouki", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("幼少期")) //
XCTAssertTrue(result.endNode.joinedPrevs().contains("幼少期")) //
}
func testConversion_みらいえいが() throws {
@ -170,13 +175,13 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("みらいえいが", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("未来映画"))
XCTAssertTrue(result.endNode.joinedPrevs().contains("未来映画"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("miraieiga", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("未来映画"))
XCTAssertTrue(result.endNode.joinedPrevs().contains("未来映画"))
}
}
@ -187,32 +192,51 @@ final class ExperimentalConversionTests: XCTestCase {
var c = ComposingText()
c.insertAtCursorPosition("sitta", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("知った"))
XCTAssertTrue(result.endNode.joinedPrevs().contains("知った"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("unda", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("産んだ"))
XCTAssertTrue(result.endNode.joinedPrevs().contains("産んだ"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("ixtsuta", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("言った"))
XCTAssertTrue(result.endNode.joinedPrevs().contains("言った"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("its", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("いた"))
XCTAssertTrue(result.endNode.joinedPrevs().contains("いた"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("itsi", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
print(result.joinedPrevs())
XCTAssertTrue(result.joinedPrevs().contains("痛い"))
print(result.endNode.joinedPrevs())
XCTAssertTrue(result.endNode.joinedPrevs().contains("痛い"))
}
}
func testConversion_incremental_たい() throws {
let dicdataStore = DicdataStore(requestOptions: requestOptions())
let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore)
var c = ComposingText()
c.insertAtCursorPosition("たい", inputStyle: .direct)
let firstResult = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(firstResult.endNode.joinedPrevs().contains("タイ")) //
XCTAssertTrue(firstResult.endNode.joinedPrevs().contains("")) //
c.insertAtCursorPosition("", inputStyle: .direct)
let secondResult = kana2kanji._experimental_additional(
composingText: c,
additionalInputsStartIndex: 2,
previousResult: firstResult,
option: requestOptions()
)
XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太鼓")) //
XCTAssertTrue(secondResult.endNode.joinedPrevs().contains("太古")) //
}
}