差分ビルド用の実装

This commit is contained in:
Miwa / Ensan
2024-03-17 12:07:41 +09:00
parent d936559296
commit 44cecf2242
7 changed files with 297 additions and 259 deletions

View File

@ -6,8 +6,7 @@
//
import Foundation
@testable import KanaKanjiConverterModule
import XCTest
import KanaKanjiConverterModule
struct CorrectGraph {
enum Correction: CustomStringConvertible {
@ -36,6 +35,8 @@ struct CorrectGraph {
var allowedNextIndex: [Int: IndexSet] = [:]
/// prevIndex
var allowedPrevIndex: [Int: IndexSet] = [:]
/// `ComposingText``inputs`endIndex
var inputIndexToEndNodeIndices: [Int: IndexSet] = [0: IndexSet(integer: 0)]
struct Node: Equatable, Sendable {
var inputElementsRange: InputGraphRange
@ -81,172 +82,77 @@ struct CorrectGraph {
return lastIndexSet.first!
}
static func build(input: [ComposingText.InputElement]) -> Self {
var correctGraph = Self()
var inputIndexToEndNodeIndices: [Int: IndexSet] = [0: IndexSet(integer: 0)]
for (index, item) in zip(input.indices, input) {
//
do {
let nodeIndex = correctGraph.insert(
Node(
inputElementsRange: .range(index, index + 1),
inputStyle: InputGraphInputStyle(from: input[index].inputStyle).id,
correction: .none,
value: item.character
),
nextTo: inputIndexToEndNodeIndices[index, default: IndexSet()]
)
inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex)
}
//
let correctPrefixTree = switch item.inputStyle {
case .roman2kana: CorrectPrefixTree.roman2kana
case .direct: CorrectPrefixTree.direct
}
typealias Match = (replace: String, inputCount: Int)
typealias SearchItem = (
node: CorrectPrefixTree.Node,
nextIndex: Int,
route: [Character],
inputStyleId: InputGraphInputStyle.ID
mutating func update(with item: ComposingText.InputElement, index: Int, input: [ComposingText.InputElement]) {
//
do {
let nodeIndex = self.insert(
Node(
inputElementsRange: .range(index, index + 1),
inputStyle: InputGraphInputStyle(from: input[index].inputStyle).id,
correction: .none,
value: item.character
),
nextTo: self.inputIndexToEndNodeIndices[index, default: IndexSet()]
)
var stack: [SearchItem] = [
(correctPrefixTree, index, [], .all)
]
while let (cNode, cIndex, cRoute, cInputStyleId) = stack.popLast() {
guard cIndex < input.endIndex else {
continue
}
let inputStyleId = InputGraphInputStyle(from: input[cIndex].inputStyle).id
guard cInputStyleId.isCompatible(with: inputStyleId) else {
continue
}
if let nNode = cNode.find(key: input[cIndex].character) {
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], inputStyleId))
for value in nNode.value {
if value.isEmpty {
continue
} else if value.count > 1 {
let nodeIndex = correctGraph.insertConnectedTypoNodes(
values: Array(value),
startIndex: index,
endIndex: index + cRoute.count + 1,
self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex)
}
//
let correctSuffixTree = InputGraphInputStyle(from: item.inputStyle).correctSuffixTree
typealias SearchItem = (
node: CorrectSuffixTree.Node,
nextIndex: Int,
routeCount: Int,
inputStyleId: InputGraphInputStyle.ID
)
var stack: [SearchItem] = [
(correctSuffixTree, index, 1, .all)
]
// backward search
while let (cNode, cIndex, cRouteCount, cInputStyleId) = stack.popLast() {
guard cIndex >= input.startIndex else {
continue
}
let inputStyleId = InputGraphInputStyle(from: input[cIndex].inputStyle).id
guard cInputStyleId.isCompatible(with: inputStyleId) else {
continue
}
if let nNode = cNode.find(key: input[cIndex].character) {
stack.append((nNode, cIndex - 1, cRouteCount + 1, inputStyleId))
for value in nNode.value {
if value.isEmpty {
continue
} else if value.count > 1 {
let nodeIndex = self.insertConnectedTypoNodes(
values: Array(value),
startIndex: index - cRouteCount + 1,
endIndex: index + 1,
inputStyle: inputStyleId,
lastIndexSet: self.inputIndexToEndNodeIndices[index - cRouteCount + 1, default: IndexSet()]
)
self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex)
} else {
let nodeIndex = self.insert(
Node(
inputElementsRange: .range(index - cRouteCount + 1, index + 1),
inputStyle: inputStyleId,
lastIndexSet: inputIndexToEndNodeIndices[index, default: IndexSet()]
)
inputIndexToEndNodeIndices[index + cRoute.count + 1, default: IndexSet()].insert(nodeIndex)
} else {
let nodeIndex = correctGraph.insert(
Node(
inputElementsRange: .range(index, index + cRoute.count + 1),
inputStyle: inputStyleId,
correction: .typo,
value: value.first!
),
nextTo: inputIndexToEndNodeIndices[index, default: IndexSet()]
)
inputIndexToEndNodeIndices[index + cRoute.count + 1, default: IndexSet()].insert(nodeIndex)
}
correction: .typo,
value: value.first!
),
nextTo: self.inputIndexToEndNodeIndices[index - cRouteCount + 1, default: IndexSet()]
)
self.inputIndexToEndNodeIndices[index + 1, default: IndexSet()].insert(nodeIndex)
}
}
}
}
}
static func build(input: [ComposingText.InputElement]) -> Self {
var correctGraph = Self()
for (index, item) in zip(input.indices, input) {
correctGraph.update(with: item, index: index, input: input)
}
return correctGraph
}
}
final class CorrectGraphTests: XCTestCase {
func testBuildSimpleDirectInput() throws {
let graph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
}
func testBuildSimpleDirectInputWithTypo() throws {
let graph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .typo, value: "")
)
}
func testBuildMultipleDirectInputWithTypo() throws {
let graph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemFlickDirect, correction: .typo, value: "")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(2, 3), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
if let index = graph.nodes.firstIndex(where: {$0.value == ""}) {
XCTAssertEqual(graph.allowedPrevIndex[index, default: .init()].count, 2)
} else {
XCTAssertThrowsError("Should not be nil")
}
}
func testBuildSimpleRomanInput() throws {
let graph = CorrectGraph.build(input: [
.init(character: "k", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "k"}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemRomanKana, correction: .none, value: "k")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "a"}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemRomanKana, correction: .none, value: "a")
)
}
func testBuildSimpleRomanInputWithTypo() throws {
let graph = CorrectGraph.build(input: [
.init(character: "t", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .roman2kana)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "t" && $0.inputElementsRange == .range(0, 1)}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemRomanKana, correction: .none, value: "t")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "s"}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemRomanKana, correction: .none, value: "s")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "t" && $0.inputElementsRange == .startIndex(0)}),
.init(inputElementsRange: .startIndex(0), inputStyle: .systemRomanKana, correction: .typo, value: "t")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "a"}),
.init(inputElementsRange: .endIndex(2), inputStyle: .systemRomanKana, correction: .typo, value: "a")
)
if let index = graph.nodes.firstIndex(where: {$0.value == "a"}) {
let indices = graph.allowedPrevIndex[index, default: .init()]
XCTAssertEqual(indices.count, 1)
XCTAssertEqual(
indices.first,
graph.nodes.firstIndex(where: {$0.value == "t" && $0.inputElementsRange == .startIndex(0)})
)
} else {
XCTAssertThrowsError("Should not be nil")
}
}
}

View File

@ -0,0 +1,106 @@
//
// CorrectGraphTests.swift
//
//
// Created by miwa on 2024/02/21.
//
import Foundation
@testable import KanaKanjiConverterModule
import XCTest
final class CorrectGraphTests: XCTestCase {
func testBuildSimpleDirectInput() throws {
let graph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
}
func testBuildSimpleDirectInputWithTypo() throws {
let graph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemFlickDirect, correction: .typo, value: "")
)
}
func testBuildMultipleDirectInputWithTypo() throws {
let graph = CorrectGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemFlickDirect, correction: .typo, value: "")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == ""}),
.init(inputElementsRange: .range(2, 3), inputStyle: .systemFlickDirect, correction: .none, value: "")
)
if let index = graph.nodes.firstIndex(where: {$0.value == ""}) {
XCTAssertEqual(graph.allowedPrevIndex[index, default: .init()].count, 2)
} else {
XCTAssertThrowsError("Should not be nil")
}
}
func testBuildSimpleRomanInput() throws {
let graph = CorrectGraph.build(input: [
.init(character: "k", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "k"}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemRomanKana, correction: .none, value: "k")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "a"}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemRomanKana, correction: .none, value: "a")
)
}
func testBuildSimpleRomanInputWithTypo() throws {
let graph = CorrectGraph.build(input: [
.init(character: "t", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .roman2kana)
])
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "t" && $0.inputElementsRange == .range(0, 1)}),
.init(inputElementsRange: .range(0, 1), inputStyle: .systemRomanKana, correction: .none, value: "t")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "s"}),
.init(inputElementsRange: .range(1, 2), inputStyle: .systemRomanKana, correction: .none, value: "s")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "t" && $0.inputElementsRange == .startIndex(0)}),
.init(inputElementsRange: .startIndex(0), inputStyle: .systemRomanKana, correction: .typo, value: "t")
)
XCTAssertEqual(
graph.nodes.first(where: {$0.value == "a"}),
.init(inputElementsRange: .endIndex(2), inputStyle: .systemRomanKana, correction: .typo, value: "a")
)
if let index = graph.nodes.firstIndex(where: {$0.value == "a"}) {
let indices = graph.allowedPrevIndex[index, default: .init()]
XCTAssertEqual(indices.count, 1)
XCTAssertEqual(
indices.first,
graph.nodes.firstIndex(where: {$0.value == "t" && $0.inputElementsRange == .startIndex(0)})
)
} else {
XCTAssertThrowsError("Should not be nil")
}
}
}

View File

@ -1,88 +0,0 @@
//
// CorrectPrefixTree.swift
//
//
// Created by miwa on 2024/02/23.
//
import Foundation
@testable import KanaKanjiConverterModule
import XCTest
// prefix tree
enum CorrectPrefixTree {
final class Node {
init(_ children: [Character: Node] = [:], value: [String] = []) {
self.children = children
self.value = value
}
static func terminal(_ value: [String]) -> Node {
Node(value: value)
}
var children: [Character: Node] = [:]
var value: [String]
func find(key: Character) -> Node? {
return children[key]
}
func insert(route: some Collection<Character>, value: consuming [String]) {
if let first = route.first {
if let tree = self.children[first] {
tree.insert(route: route.dropFirst(), value: consume value)
} else {
let tree = Node()
tree.insert(route: route.dropFirst(), value: consume value)
self.children[first] = tree
}
} else {
self.value = consume value
}
}
}
static let roman2kana: Node = {
Node([
"g": Node([
"s": .terminal(["ga"]),
"q": .terminal(["ga"]),
"d": .terminal(["ge"]),
"r": .terminal(["ge"]),
"w": .terminal(["ge"]),
"k": .terminal(["gi"]),
"l": .terminal(["go"]),
"p": .terminal(["go"]),
"j": .terminal(["gu"])
]),
"m": Node([
"s": .terminal(["ma"]),
"q": .terminal(["ma"]),
"d": .terminal(["me"]),
"r": .terminal(["me"]),
"w": .terminal(["me"]),
"k": .terminal(["mi"]),
"l": .terminal(["mo"]),
"p": .terminal(["mo"]),
"j": .terminal(["mu"])
]),
"t": Node([
"s": .terminal(["ta"]),
"q": .terminal(["ta"]),
"d": .terminal(["te"]),
"r": .terminal(["te"]),
"w": .terminal(["te"]),
"k": .terminal(["ti"]),
"l": .terminal(["to"]),
"p": .terminal(["to"]),
"j": .terminal(["tu"])
])
])
}()
static let direct: Node = {
Node([
"": .terminal([""]),
"": .terminal(["", ""])
])
}()
}

View File

@ -0,0 +1,93 @@
//
// CorrectSuffixTree.swift
//
//
// Created by miwa on 2024/02/23.
//
import Foundation
@testable import KanaKanjiConverterModule
import XCTest
/// suffix tree
enum CorrectSuffixTree {
final class Node {
init(_ children: [Character: Node] = [:], value: [String] = []) {
self.children = children
self.value = value
}
static func terminal(_ value: [String]) -> Node {
Node(value: value)
}
static func terminal(_ value: String) -> Node {
Node(value: [value])
}
var children: [Character: Node] = [:]
var value: [String]
func find(key: Character) -> Node? {
return children[key]
}
}
static let roman2kana: Node = {
Node([
"s": Node([
"g": .terminal("ga"),
"m": .terminal("ma"),
"t": .terminal("ta")
]),
"q": Node([
"g": .terminal("ga"),
"m": .terminal("ma"),
"t": .terminal("ta")
]),
"d": Node([
"g": .terminal("ge"),
"m": .terminal("me"),
"t": .terminal("te")
]),
"r": Node([
"g": .terminal("ge"),
"m": .terminal("me"),
"t": .terminal("te")
]),
"w": Node([
"g": .terminal("ge"),
"m": .terminal("me"),
"t": .terminal("te")
]),
"k": Node([
"g": .terminal("gi"),
"m": .terminal("mi"),
"t": .terminal("ti")
]),
"l": Node([
"g": .terminal("go"),
"m": .terminal("mo"),
"t": .terminal("to")
]),
"p": Node([
"g": .terminal("go"),
"m": .terminal("mo"),
"t": .terminal("to")
]),
"j": Node([
"g": .terminal("gu"),
"m": .terminal("mu"),
"t": .terminal("tu")
])
])
}()
static let direct: Node = {
Node([
"": .terminal([""]),
"": .terminal([""]),
"": .terminal(["", ""])
])
}()
}

View File

@ -60,10 +60,10 @@ struct InputGraphInputStyle: Identifiable {
}
}
private init(id: InputGraphInputStyle.ID, replaceSuffixTree: ReplaceSuffixTree.Node, correctPrefixTree: CorrectPrefixTree.Node) {
private init(id: InputGraphInputStyle.ID, replaceSuffixTree: ReplaceSuffixTree.Node, correctSuffixTree: CorrectSuffixTree.Node) {
self.id = id
self.replaceSuffixTree = replaceSuffixTree
self.correctPrefixTree = correctPrefixTree
self.correctSuffixTree = correctSuffixTree
}
struct ID: Equatable, Hashable, Sendable, CustomStringConvertible {
@ -97,22 +97,22 @@ struct InputGraphInputStyle: Identifiable {
static let all: Self = Self(
id: .all,
replaceSuffixTree: ReplaceSuffixTree.Node(),
correctPrefixTree: CorrectPrefixTree.Node()
correctSuffixTree: CorrectSuffixTree.Node()
)
static let systemFlickDirect: Self = Self(
id: .systemFlickDirect,
replaceSuffixTree: ReplaceSuffixTree.direct,
correctPrefixTree: CorrectPrefixTree.direct
correctSuffixTree: CorrectSuffixTree.direct
)
static let systemRomanKana: Self = Self(
id: .systemRomanKana,
replaceSuffixTree: ReplaceSuffixTree.roman2kana,
correctPrefixTree: CorrectPrefixTree.roman2kana
correctSuffixTree: CorrectSuffixTree.roman2kana
)
/// `id` for the input style.
/// - warning: value `0x00-0x7F` is reserved for system space.
var id: ID
var replaceSuffixTree: ReplaceSuffixTree.Node
var correctPrefixTree: CorrectPrefixTree.Node
var correctSuffixTree: CorrectSuffixTree.Node
}

View File

@ -37,7 +37,6 @@ final class InputGraphTests: XCTestCase {
.init(character: "", inputStyle: .direct)
])
let inputGraph = InputGraph.build(input: correctGraph)
XCTAssertEqual(inputGraph.nodes.count, 5) // Root nodes
}
func testBuildSimpleRoman2KanaInput_1文字だけ() throws {
@ -151,7 +150,6 @@ final class InputGraphTests: XCTestCase {
XCTAssertNil(
inputGraph.nodes.first(where: {$0.character == "t" && $0.inputElementsRange == .startIndex(1)})
)
// groupId
XCTAssertEqual(
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .range(1, 3), correction: .typo)
@ -325,7 +323,6 @@ final class InputGraphTests: XCTestCase {
inputGraph.nodes.first(where: {$0.character == ""}),
.init(character: "", inputElementsRange: .endIndex(3), correction: .typo)
)
print(inputGraph)
}
func testBuildMixedInput_2文字_ts() throws {

View File

@ -23,6 +23,30 @@ extension Kana2Kanji {
let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore)
return result
}
func _experimental_additional(
composingText: ComposingText,
additionalInputsStartIndex: Int,
previousCorrectGraph: consuming CorrectGraph,
previousInputGraph: consuming InputGraph,
previousLookupGraph: consuming LookupGraph,
previousConvertGraph: consuming ConvertGraph,
option: ConvertRequestOptions
) -> ConvertGraph.LatticeNode {
//
print(#file, "start")
for i in additionalInputsStartIndex ..< composingText.input.endIndex {
previousCorrectGraph.update(with: composingText.input[i], index: i, input: composingText.input)
}
// TODO:
let inputGraph = InputGraph.build(input: consume previousCorrectGraph)
// convertGraph
print(#file, "lookup", inputGraph)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: consume inputGraph, option: option)
print(#file, "convert")
let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore)
return result
}
}
private extension ConvertGraph.LatticeNode {