[WIP] InputGraphベースの入力管理の実装 (#44)

* Test-level implementation of InputGraph

* move file

* implement typo-sensitiveness

* implement input style and input style boundary checking

* fix ci

* fix ci 2
This commit is contained in:
Miwa / Ensan
2024-02-22 16:56:22 +09:00
committed by GitHub
parent 52a1715e69
commit 5431bafb27
2 changed files with 538 additions and 0 deletions

View File

@ -0,0 +1,535 @@
//
// InputGraphTests.swift
//
//
// Created by miwa on 2024/02/21.
//
import Foundation
@testable import KanaKanjiConverterModule
import XCTest
// prefix tree
enum ReplacePrefixTree {
final class Node {
init(_ children: [Character: Node] = [:], value: String? = nil) {
self.children = children
self.value = value
}
static func terminal(_ value: String) -> Node {
Node(value: value)
}
var children: [Character: Node] = [:]
var value: String?
func find(key: Character) -> Node? {
return children[key]
}
func insert(route: some Collection<Character>, value: consuming String) {
if let first = route.first {
if let tree = self.children[first] {
tree.insert(route: route.dropFirst(), value: consume value)
} else {
let tree = Node()
tree.insert(route: route.dropFirst(), value: consume value)
self.children[first] = tree
}
} else {
self.value = consume value
}
}
}
static let roman2kana: Node = {
var tree = Node()
for item in KanaKanjiConverterModule.Roman2Kana.hiraganaChanges {
tree.insert(route: item.key, value: String(item.value))
}
return tree
}()
static let direct: Node = Node()
}
// prefix tree
enum CorrectPrefixTree {
final class Node {
init(_ children: [Character: Node] = [:], value: [String] = []) {
self.children = children
self.value = value
}
static func terminal(_ value: [String]) -> Node {
Node(value: value)
}
var children: [Character: Node] = [:]
var value: [String]
func find(key: Character) -> Node? {
return children[key]
}
func insert(route: some Collection<Character>, value: consuming [String]) {
if let first = route.first {
if let tree = self.children[first] {
tree.insert(route: route.dropFirst(), value: consume value)
} else {
let tree = Node()
tree.insert(route: route.dropFirst(), value: consume value)
self.children[first] = tree
}
} else {
self.value = consume value
}
}
}
static let roman2kana: Node = {
Node([
"t": Node([
"s": .terminal(["ta"]),
"z": .terminal(["ta"]),
"q": .terminal(["ta"]),
"p": .terminal(["to"]),
]),
"g": Node([
"s": .terminal(["ga"]),
"z": .terminal(["ga"]),
"q": .terminal(["ga"]),
"p": .terminal(["go"]),
])
])
}()
static let direct: Node = {
Node([
"": .terminal([""]),
"": .terminal(["", ""])
])
}()
}
struct InputGraph {
struct InputStyle: Identifiable {
init(from deprecatedInputStyle: KanaKanjiConverterModule.InputStyle) {
switch deprecatedInputStyle {
case .direct:
self = .systemFlickDirect
case .roman2kana:
self = .systemRomanKana
}
}
init(id: InputGraph.InputStyle.ID, replacePrefixTree: ReplacePrefixTree.Node, correctPrefixTree: CorrectPrefixTree.Node) {
self.id = id
self.replacePrefixTree = replacePrefixTree
self.correctPrefixTree = correctPrefixTree
}
struct ID: Equatable, Hashable, Sendable {
init(id: UInt8) {
self.id = id
}
init(from deprecatedInputStyle: KanaKanjiConverterModule.InputStyle) {
switch deprecatedInputStyle {
case .direct:
self = .systemFlickDirect
case .roman2kana:
self = .systemRomanKana
}
}
static let all = Self(id: 0x00)
static let systemFlickDirect = Self(id: 0x01)
static let systemRomanKana = Self(id: 0x02)
var id: UInt8
func isCompatible(with id: ID) -> Bool {
if self == .all {
true
} else {
self == id
}
}
}
static let all: Self = InputStyle(
id: .all,
replacePrefixTree: ReplacePrefixTree.Node(),
correctPrefixTree: CorrectPrefixTree.Node()
)
static let systemFlickDirect: Self = InputStyle(
id: .systemFlickDirect,
replacePrefixTree: ReplacePrefixTree.direct,
correctPrefixTree: CorrectPrefixTree.direct
)
static let systemRomanKana: Self = InputStyle(
id: .systemRomanKana,
replacePrefixTree: ReplacePrefixTree.roman2kana,
correctPrefixTree: CorrectPrefixTree.roman2kana
)
/// `id` for the input style.
/// - warning: value `0x00-0x7F` is reserved for system space.
var id: ID
var replacePrefixTree: ReplacePrefixTree.Node
var correctPrefixTree: CorrectPrefixTree.Node
}
enum Range: Equatable, Sendable {
case unknown
case startIndex(Int)
case endIndex(Int)
case range(Int, Int)
var startIndex: Int? {
switch self {
case .unknown, .endIndex: nil
case .startIndex(let index), .range(let index, _): index
}
}
var endIndex: Int? {
switch self {
case .unknown, .startIndex: nil
case .endIndex(let index), .range(_, let index): index
}
}
}
enum Correction {
///
case none
///
case typo
var isTypo: Bool {
self == .typo
}
}
struct Node: Equatable, CustomStringConvertible {
var character: Character
var displayedTextRange: Range
var inputElementsRange: Range
var correction: Correction = .none
var description: String {
let ds = displayedTextRange.startIndex?.description ?? "?"
let de = displayedTextRange.endIndex?.description ?? "?"
let `is` = inputElementsRange.startIndex?.description ?? "?"
let ie = inputElementsRange.endIndex?.description ?? "?"
return "Node(\"\(character)\", d(\(ds)..<\(de)), i(\(`is`)..<\(ie)), isTypo: \(correction.isTypo)"
}
}
var nodes: [Node] = [
// root node
Node(character: "\0", displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
]
/// `displayedTextStartIndexToNodeIndices[0]``displayedTextRange==.startIndex(0)``displayedTextRange==.range(0, k)`index
var displayedTextStartIndexToNodeIndices: [IndexSet] = []
var inputElementsStartIndexToNodeIndices: [IndexSet] = []
var displayedTextEndIndexToNodeIndices: [IndexSet] = [IndexSet(integer: 0)] // rootindex
var inputElementsEndIndexToNodeIndices: [IndexSet] = [IndexSet(integer: 0)] // rootindex
func next(for node: Node) -> [Node] {
var indexSet = IndexSet()
switch node.displayedTextRange {
case .unknown, .startIndex: break
case .endIndex(let endIndex), .range(_, let endIndex):
indexSet.formUnion(self.displayedTextStartIndexToNodeIndices[endIndex])
}
switch node.inputElementsRange {
case .unknown, .startIndex: break
case .endIndex(let endIndex), .range(_, let endIndex):
indexSet.formUnion(self.inputElementsStartIndexToNodeIndices[endIndex])
}
return indexSet.map{ self.nodes[$0] }
}
mutating func insert(_ node: Node) {
let index = self.nodes.count
if let startIndex = node.displayedTextRange.startIndex {
if self.displayedTextStartIndexToNodeIndices.endIndex <= startIndex {
self.displayedTextStartIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: startIndex - self.displayedTextStartIndexToNodeIndices.endIndex + 1))
}
self.displayedTextStartIndexToNodeIndices[startIndex].insert(index)
}
if let endIndex = node.displayedTextRange.endIndex {
if self.displayedTextEndIndexToNodeIndices.endIndex <= endIndex {
self.displayedTextEndIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: endIndex - self.displayedTextEndIndexToNodeIndices.endIndex + 1))
}
self.displayedTextEndIndexToNodeIndices[endIndex].insert(index)
}
if let startIndex = node.inputElementsRange.startIndex {
if self.inputElementsStartIndexToNodeIndices.endIndex <= startIndex {
self.inputElementsStartIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: startIndex - self.inputElementsStartIndexToNodeIndices.endIndex + 1))
}
self.inputElementsStartIndexToNodeIndices[startIndex].insert(index)
}
if let endIndex = node.inputElementsRange.endIndex {
if self.inputElementsEndIndexToNodeIndices.endIndex <= endIndex {
self.inputElementsEndIndexToNodeIndices.append(contentsOf: Array(repeating: IndexSet(), count: endIndex - self.inputElementsEndIndexToNodeIndices.endIndex + 1))
}
self.inputElementsEndIndexToNodeIndices[endIndex].insert(index)
}
self.nodes.append(node)
}
// EOS
mutating func finalize() {}
static func build(input: [ComposingText.InputElement]) -> Self {
var inputGraph = Self()
//
// 1. indexlongest_match
// |tsar...tsalongest_match[tsa][][][]insert
// 1.
// [ts]->[ta]
// | -> t -> s -> a -< r
// | -> ta -< a
// [tsa][ta]insert
// 2.
// inputsaltItems1
// altItems[index: [(item: String, length: Int)]]
// itsatstaaltItems[1: [(item: ta, length: 2)]]index1ta2
//
// indexindex
// replaceRulePrefixTree辿
// 0. S[(root: Node, index: Int, []: [Character])]
// 1. (node, i, chars)nodeinputs[i].character(childNode, i+1. chars + [inputs[i].character])altItems[i]
var altItems: [Int: [(replace: String, inputCount: Int)]] = [:]
// correctRulealtItems
for (index, item) in zip(input.indices, input) {
let correctPrefixTree = switch item.inputStyle {
case .roman2kana: CorrectPrefixTree.roman2kana
case .direct: CorrectPrefixTree.direct
}
typealias Match = (replace: String, inputCount: Int)
typealias SearchItem = (
node: CorrectPrefixTree.Node,
nextIndex: Int,
route: [Character],
inputStyleId: InputStyle.ID
)
var stack: [SearchItem] = [
(correctPrefixTree, index, [], .all),
]
var matches: [Match] = []
while let (cNode, cIndex, cRoute, cInputStyleId) = stack.popLast() {
guard cIndex < input.endIndex else {
continue
}
let inputStyleId = InputStyle(from: input[cIndex].inputStyle).id
guard cInputStyleId.isCompatible(with: inputStyleId) else {
continue
}
if let nNode = cNode.find(key: input[cIndex].character) {
// value
matches.append(contentsOf: nNode.value.map{($0, cIndex - index + 1)})
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], inputStyleId))
}
}
altItems[index] = matches
}
// replaceRule
for (index, item) in zip(input.indices, input) {
guard let beforeNodeIndex = inputGraph.inputElementsEndIndexToNodeIndices[index].first,
let displayedTextStartIndex = inputGraph.nodes[beforeNodeIndex].displayedTextRange.endIndex else { continue }
let replacePrefixTree = InputStyle(from: item.inputStyle).replacePrefixTree
typealias Match = (route: [Character], value: String, correction: Correction)
typealias SearchItem = (
node: ReplacePrefixTree.Node,
nextIndex: Int,
route: [Character],
inputStyleId: InputStyle.ID,
longestMatch: Match
)
var stack: [SearchItem] = [
(replacePrefixTree, index, [], .all, (route: [], value: "", correction: .none))
]
var matches: [Match] = []
while let (cNode, cIndex, cRoute, cInputStyleId, cLongestMatch) = stack.popLast() {
let continuous = cIndex < input.endIndex && cInputStyleId.isCompatible(with: .init(from: input[cIndex].inputStyle))
if continuous, let nNode = cNode.find(key: input[cIndex].character) {
if let value = nNode.value {
// valuelongestMatch
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], .init(from: input[cIndex].inputStyle), (cRoute + [input[cIndex].character], value, cLongestMatch.correction)))
} else if cRoute.isEmpty {
// value1longestMatch
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], .init(from: input[cIndex].inputStyle), ([input[cIndex].character], String(input[cIndex].character), .none)))
} else {
//
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], .init(from: input[cIndex].inputStyle), cLongestMatch))
}
} else {
if !cLongestMatch.route.isEmpty {
// longestMatchmatch
matches.append(cLongestMatch)
} else if cRoute.isEmpty {
// 1rootcharactermatch
// .->\1
matches.append((route: [input[cIndex].character], value: String(input[cIndex].character), correction: .none))
}
}
//
guard continuous else { continue }
perItem: for item in altItems[cIndex, default: []] {
// iteminputCount1
// altItemreplace
// abansnanabsnanann
// replaceamnan
// aban[an]a, n辿stack
//
var node: ReplacePrefixTree.Node? = cNode
if item.inputCount != 1 {
var chars = Array(item.replace) // FIXME: Queue
while !chars.isEmpty {
if let nNode = node?.find(key: chars.removeFirst()) {
node = nNode
} else {
continue perItem
}
}
} else {
stack.append((.init(), cIndex + item.inputCount, cRoute + Array(item.replace), .init(from: input[cIndex].inputStyle), (cRoute + Array(item.replace), item.replace, .typo)))
}
if let node {
// value
if let value = node.value {
stack.append((node, cIndex + item.inputCount, cRoute + Array(item.replace), .init(from: input[cIndex].inputStyle),(cRoute + Array(item.replace), value, .typo)))
} else {
stack.append((node, cIndex + item.inputCount, cRoute + Array(item.replace), .init(from: input[cIndex].inputStyle),(cLongestMatch.route, cLongestMatch.value, .typo)))
}
}
}
}
// matchinsert
for match in matches {
let characters = Array(match.value)
for (i, c) in zip(characters.indices, characters) {
let inputElementRange: InputGraph.Range = if i == characters.startIndex && i+1 == characters.endIndex {
.range(index, index + match.route.count)
} else if i == characters.startIndex {
.startIndex(index)
} else if i+1 == characters.endIndex {
.endIndex(i + match.route.count)
} else {
.unknown
}
let node = Node(
character: c,
displayedTextRange: .range(displayedTextStartIndex + i, displayedTextStartIndex + i + 1),
inputElementsRange: inputElementRange,
correction: match.correction
)
inputGraph.insert(node)
}
}
}
return consume inputGraph
}
}
final class InputGraphTests: XCTestCase {
func testInsert() throws {
var graph = InputGraph()
let node1 = InputGraph.Node(character: "a", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1))
let node2 = InputGraph.Node(character: "b", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 2))
graph.insert(node1)
graph.insert(node2)
XCTAssertEqual(graph.next(for: node1), [node2])
}
func testBuild() throws {
do {
let graph = InputGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(graph.nodes.count, 4) // Root nodes
print(graph.nodes)
}
do {
let graph = InputGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct)
])
XCTAssertEqual(graph.nodes.count, 5) // Root nodes
print(graph.nodes)
}
do {
let graph = InputGraph.build(input: [
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana),
])
XCTAssertEqual(graph.nodes.count, 3) // Root nodes
print(graph.nodes)
}
do {
let graph = InputGraph.build(input: [
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .roman2kana),
])
XCTAssertEqual(graph.nodes.count, 5) // Root nodes
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
)
XCTAssertEqual(
graph.nodes.first(where: {$0.character == "t"}),
.init(character: "t", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 2), correction: .none)
)
XCTAssertEqual(
graph.nodes.first(where: {$0.character == "s"}),
.init(character: "s", displayedTextRange: .range(2, 3), inputElementsRange: .range(2, 3), correction: .none)
)
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 3), correction: .typo)
)
}
do {
// ts->ta
let graph = InputGraph.build(input: [
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana),
])
XCTAssertEqual(graph.nodes.count, 6) // Root nodes
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
)
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(1, 2), inputElementsRange: .range(1, 3), correction: .typo)
)
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(2, 3), inputElementsRange: .endIndex(4), correction: .none)
)
}
do {
// ts->ta
let graph = InputGraph.build(input: [
.init(character: "t", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .direct),
])
XCTAssertEqual(
graph.nodes.first(where: {$0.character == "t"}),
.init(character: "t", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 1), correction: .none)
)
XCTAssertFalse(graph.nodes.contains(.init(character: "", displayedTextRange: .range(0, 1), inputElementsRange: .range(0, 2), correction: .typo)))
}
}
func testLOUDSLookup() throws {
}
}

View File

@ -0,0 +1,3 @@
# Experimental Tests
実験的な実装をテスト駆動で開発するためのディレクトリ。