[Experimental] ConvertGraphを実装し、その上での完全一致変換を実装 (#47)

* ConvertGraphを実装し、その上での完全一致変換を実装

* 名前空間を汚染していたので修正

* Implementation completed (without test)

* move directory to use default dictionary

* fix implementations to enable conversion

* add test cases

* Backward searchで発見された候補を明示的に削除

* fix tests

* simplify
This commit is contained in:
Miwa / Ensan
2024-02-24 23:21:44 +09:00
committed by GitHub
parent 52fc9ae4c2
commit 0795b8cf84
15 changed files with 706 additions and 327 deletions

View File

@ -73,6 +73,6 @@ public struct DicdataElement: Equatable, Hashable, Sendable {
extension DicdataElement: CustomDebugStringConvertible {
public var debugDescription: String {
"(ruby: \(self.ruby), word: \(self.word), cid: (\(self.lcid), \(self.rcid)), mid: \(self.mid), value: \(self.baseValue)+\(self.adjust)=\(self.value())"
"(ruby: \(self.ruby), word: \(self.word), cid: (\(self.lcid), \(self.rcid)), mid: \(self.mid), value: \(self.baseValue)+\(self.adjust)=\(self.value()))"
}
}

View File

@ -102,6 +102,10 @@ public final class DicdataStore {
}
}
func character2charId(_ character: Character) -> UInt8 {
self.charsID[character, default: .max]
}
private func reloadMemory() {
self.loudses.removeValue(forKey: "memory")
self.importedLoudses.remove("memory")
@ -143,7 +147,7 @@ public final class DicdataStore {
return Self.getPenalty(data: data) < -d
}
private func loadLOUDS(identifier: String) -> LOUDS? {
func loadLOUDS(identifier: String) -> LOUDS? {
if importedLoudses.contains(identifier) {
return self.loudses[identifier]
}
@ -213,7 +217,7 @@ public final class DicdataStore {
var stringToInfo = inputData.getRangesWithTypos(fromIndex, rightIndexRange: toIndexLeft ..< toIndexRight)
// MARK:
let stringSet = stringToInfo.keys.map {($0, $0.map {self.charsID[$0, default: .max]})}
let stringSet = stringToInfo.keys.map {($0, $0.map(self.character2charId))}
let (minCharIDsCount, maxCharIDsCount) = stringSet.lazy.map {$0.1.count}.minAndMax() ?? (0, -1)
// :
let group = [Character: [([Character], [UInt8])]].init(grouping: stringSet, by: {$0.0.first!})
@ -318,7 +322,7 @@ public final class DicdataStore {
// MARK: indices
// :
let strings = string2penalty.keys.map {
(key: $0, charIDs: $0.map {self.charsID[$0, default: .max]})
(key: $0, charIDs: $0.map(self.character2charId))
}
let group = [Character: [(key: [Character], charIDs: [UInt8])]].init(grouping: strings, by: {$0.key.first!})
@ -433,7 +437,7 @@ public final class DicdataStore {
} else if count == 2 {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map {self.charsID[$0, default: .max]}
let charIDs = key.map(self.character2charId)
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: 5).prefix(700)
result.append(
@ -451,7 +455,7 @@ public final class DicdataStore {
} else {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map {self.charsID[$0, default: .max]}
let charIDs = key.map(self.character2charId)
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs).prefix(700)
result.append(

View File

@ -127,24 +127,24 @@ extension LOUDS {
}
/// index
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [Int: [DicdataElement]] {
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
let binary: Data
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
} catch {
debug("getDataForLoudstxt3: \(error)")
return [:]
return []
}
let lc = binary[0..<2].toArray(of: UInt16.self)[0]
let header_endIndex: UInt32 = 2 + UInt32(lc) * UInt32(MemoryLayout<UInt32>.size)
let ui32array = binary[2..<header_endIndex].toArray(of: UInt32.self)
var result: [Int: [DicdataElement]] = [:]
var result: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for index in indices {
let startIndex = Int(ui32array[index])
let endIndex = index == (lc - 1) ? binary.endIndex : Int(ui32array[index + 1])
result[index] = parseBinary(binary: binary[startIndex ..< endIndex])
result.append((index, parseBinary(binary: binary[startIndex ..< endIndex])))
}
return result
}

View File

@ -1,213 +0,0 @@
//
// LookupGraphTests.swift
//
//
// Created by miwa on 2024/02/23.
//
import XCTest
import Foundation
@testable import KanaKanjiConverterModule
struct LookupGraph {
struct Node: Equatable {
var charId: UInt8
var loudsNodeIndices: Set<Int> = []
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
var correction: InputGraph.Correction = .none
}
var nodes: [Node] = [
// root node
Node(charId: 0x00, displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
]
var structure: InputGraphStructure = InputGraphStructure()
var root: Node {
nodes[0]
}
func nextIndices(for node: Node) -> IndexSet {
self.structure.nextIndices(
displayedTextEndIndex: node.displayedTextRange.endIndex,
inputElementsEndIndex: node.inputElementsRange.endIndex
)
}
func next(for node: Node) -> [Node] {
nextIndices(for: node).map{ self.nodes[$0] }
}
func prevIndices(for node: Node) -> IndexSet {
self.structure.prevIndices(
displayedTextStartIndex: node.displayedTextRange.startIndex,
inputElementsStartIndex: node.inputElementsRange.startIndex
)
}
func prev(for node: Node) -> [Node] {
prevIndices(for: node).map{ self.nodes[$0] }
}
mutating func remove(at index: Int) {
assert(index != 0, "Node at index 0 is root and must not be removed.")
self.structure.remove(at: index)
}
mutating func insert(_ node: Node) {
self.structure.insert(node, nodes: &self.nodes, displayedTextRange: node.displayedTextRange, inputElementsRange: node.inputElementsRange)
}
static func build(input: InputGraph, character2CharId: (Character) -> UInt8) -> Self {
let nodes = input.nodes.map {
Node(charId: character2CharId($0.character), displayedTextRange: $0.displayedTextRange, inputElementsRange: $0.inputElementsRange, correction: $0.correction)
}
return Self(nodes: nodes, structure: input.structure)
}
}
extension LOUDS {
func byfixNodeIndices(_ inputGraph: LookupGraph) -> (IndexSet, [Int: Set<Int>]) {
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeIndices: [Int: Set<Int>] = [:]
typealias SearchItem = (
node: LookupGraph.Node,
nodeIndex: Int,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = inputGraph.nextIndices(for: inputGraph.root).map { (inputGraph.nodes[$0], $0, 1) }
while let (cNode, cNodeIndex, cLastLoudsNodeIndex) = stack.popLast() {
// nextNodes
if let loudsNodeIndex = self.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
loudsNodeIndex2GraphNodeIndices[loudsNodeIndex, default: []].insert(cNodeIndex)
indexSet.insert(loudsNodeIndex)
stack.append(contentsOf: inputGraph.nextIndices(for: cNode).map { (inputGraph.nodes[$0], $0, loudsNodeIndex) })
} else {
continue
}
}
return (indexSet, loudsNodeIndex2GraphNodeIndices)
}
}
extension DicdataStore {
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [Int: [DicdataElement]] {
// split = 2048
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [Int: [DicdataElement]] = [:]
for (key, value) in dict {
// FIXME: use local value
data.merge(LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: option)) {
$0 + $1
}
}
return data
}
}
final class LookupGraphTests: XCTestCase {
static var resourceURL = Bundle.module.resourceURL!.standardizedFileURL.appendingPathComponent("DictionaryMock", isDirectory: true)
func requestOptions() -> ConvertRequestOptions {
var options: ConvertRequestOptions = .default
options.dictionaryResourceURL = Self.resourceURL
return options
}
func loadCharIDs() -> [Character: UInt8] {
do {
let string = try String(contentsOf: Self.resourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false), encoding: String.Encoding.utf8)
return [Character: UInt8](uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
} catch {
print("ファイルが見つかりませんでした")
return [:]
}
}
func testByfixNodeIndices() throws {
let dicdataStore = DicdataStore(requestOptions: requestOptions())
let charIDs = loadCharIDs()
let louds = LOUDS.load("", option: requestOptions())
XCTAssertNotNil(louds)
guard let louds else { return }
do {
let inputGraph = InputGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
])
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: {charIDs[$0.toKatakana()] ?? 0x00})
let (loudsNodeIndices, loudsNodeIndex2GraphNodeIndices) = louds.byfixNodeIndices(lookupGraph)
let dicdataWithIndex: [Int: [DicdataElement]] = dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.values.flatMapSet { $0 }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "鹿"})
XCTAssertTrue(dicdata.contains {$0.word == "歯科"})
//
XCTAssertTrue(dicdata.contains {$0.word == "滋賀"})
//
XCTAssertTrue(dicdata.contains {$0.word == "司会"})
XCTAssertTrue(dicdata.contains {$0.word == "視界"})
XCTAssertTrue(dicdata.contains {$0.word == "死界"})
//
XCTAssertTrue(dicdata.contains {$0.word == "市外"})
XCTAssertTrue(dicdata.contains {$0.word == "市街"})
XCTAssertTrue(dicdata.contains {$0.word == "死骸"})
}
do {
// ts -> ta
let inputGraph = InputGraph.build(input: [
.init(character: "s", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
])
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: {charIDs[$0.toKatakana()] ?? 0x00})
let (loudsNodeIndices, loudsNodeIndex2GraphNodeIndices) = louds.byfixNodeIndices(lookupGraph)
let dicdataWithIndex: [Int: [DicdataElement]] = dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.values.flatMapSet { $0 }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
// []
XCTAssertTrue(dicdata.contains {$0.word == ""})
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "死体"})
XCTAssertTrue(dicdata.contains {$0.word == "肢体"})
}
do {
//
let inputGraph = InputGraph.build(input: [
.init(character: "s", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
])
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: {charIDs[$0.toKatakana()] ?? 0x00})
let (loudsNodeIndices, loudsNodeIndex2GraphNodeIndices) = louds.byfixNodeIndices(lookupGraph)
let dicdataWithIndex: [Int: [DicdataElement]] = dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.values.flatMapSet { $0 }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "知っ"})
XCTAssertTrue(dicdata.contains {$0.word == "しっ"})
//
XCTAssertTrue(dicdata.contains {$0.word == "叱咤"})
//
XCTAssertTrue(dicdata.contains {$0.word == "失態"})
}
}
}

View File

@ -1,20 +0,0 @@
//
// extension Kana2Kanji+InputGraph.swift
//
//
// Created by miwa on 2024/02/23.
//
import Foundation
@testable import KanaKanjiConverterModule
extension Kana2Kanji {
func kana2lattice_all(_ inputData: InputGraph, N_best: Int) {
//
//
}
}

View File

@ -1,6 +0,0 @@
//
// extension LOUDS+InputGraph.swift
//
//
// Created by miwa on 2024/02/22.
//

View File

@ -0,0 +1,211 @@
//
// ConvertGraph.swift
//
//
// Created by miwa on 2024/02/23.
//
import XCTest
import Foundation
@testable import KanaKanjiConverterModule
struct ConvertGraph: InputGraphProtocol {
struct Node: InputGraphNodeProtocol {
var latticeNodes: [LatticeNode]
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
var correction: InputGraph.Correction = .none
}
var nodes: [Node] = [
// root node
Node(latticeNodes: [], displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
]
var structure: InputGraphStructure = InputGraphStructure()
static func build(input: LookupGraph, nodeIndex2LatticeNode: [Int: [LatticeNode]]) -> Self {
let nodes = input.nodes.enumerated().map { (index, node) in
Node(latticeNodes: nodeIndex2LatticeNode[index, default: []], displayedTextRange: node.displayedTextRange, inputElementsRange: node.inputElementsRange, correction: node.correction)
}
return Self(nodes: nodes, structure: input.structure)
}
}
extension ConvertGraph {
///
final class LatticeNode: CustomStringConvertible {
///
public let data: DicdataElement
/// `N_best`
var prevs: [RegisteredNode] = []
/// `prevs`
var values: [PValue] = []
/// inputData.inputrange
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
/// `EOS`
static var EOSNode: LatticeNode {
LatticeNode(data: DicdataElement.EOSData, displayedTextRange: .unknown, inputElementsRange: .unknown)
}
init(data: DicdataElement, displayedTextRange: InputGraphStructure.Range, inputElementsRange: InputGraphStructure.Range, prevs: [RegisteredNode] = []) {
self.data = data
self.values = [data.value()]
self.displayedTextRange = displayedTextRange
self.inputElementsRange = inputElementsRange
self.prevs = prevs
}
/// `LatticeNode``RegisteredNode`
/// `LatticeNode``RegisteredNode`1
func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode {
// FIXME:
RegisteredNode(
data: self.data,
registered: self.prevs[index],
totalValue: value,
displayedTextRange: self.displayedTextRange,
inputElementsRange: self.inputElementsRange
)
}
var description: String {
"LatticeNode(data: \(data), ...)"
}
}
struct RegisteredNode: RegisteredNodeProtocol {
///
let data: DicdataElement
/// 1
let prev: (any RegisteredNodeProtocol)?
///
let totalValue: PValue
/// inputData.inputrange
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, displayedTextRange: InputGraphStructure.Range, inputElementsRange: InputGraphStructure.Range) {
self.data = data
self.prev = registered
self.totalValue = totalValue
self.displayedTextRange = displayedTextRange
self.inputElementsRange = inputElementsRange
}
///
/// - Returns:
static func BOSNode() -> RegisteredNode {
RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
}
}
}
/// `struct``RegisteredNode`Existential Type
/// - Note: `indirect enum`
protocol RegisteredNodeProtocol {
var data: DicdataElement {get}
var prev: (any RegisteredNodeProtocol)? {get}
var totalValue: PValue {get}
/// inputData.inputrange
var displayedTextRange: InputGraphStructure.Range {get}
var inputElementsRange: InputGraphStructure.Range {get}
}
extension ConvertGraph {
func convertAll(option: borrowing ConvertRequestOptions, dicdataStore: DicdataStore) -> LatticeNode {
let result: LatticeNode = LatticeNode.EOSNode
result.displayedTextRange = .startIndex(self.structure.displayedTextEndIndexToNodeIndices.endIndex)
result.inputElementsRange = .startIndex(self.structure.inputElementsEndIndexToNodeIndices.endIndex)
var processStack = Array(self.nodes.enumerated().reversed())
var processedIndices: IndexSet = [0] // root
var invalidIndices: IndexSet = []
// inodes
while let (i, graphNode) = processStack.popLast() {
//
guard !processedIndices.contains(i), !invalidIndices.contains(i) else {
continue
}
// prevNode
let prevIndices = self.structure.prevIndices(displayedTextStartIndex: graphNode.displayedTextRange.startIndex, inputElementsStartIndex: graphNode.inputElementsRange.startIndex)
guard !prevIndices.isEmpty else {
invalidIndices.insert(i)
continue
}
var unprocessedPrevs: [(Int, Node)] = []
for prevIndex in prevIndices {
if !processedIndices.contains(prevIndex) && !invalidIndices.contains(prevIndex) {
unprocessedPrevs.append((prevIndex, self.nodes[prevIndex]))
}
}
// prevNodestack
guard unprocessedPrevs.isEmpty else {
processStack.append((i, graphNode))
processStack.append(contentsOf: unprocessedPrevs)
continue
}
print(i, graphNode.displayedTextRange, graphNode.inputElementsRange)
processedIndices.insert(i)
//
for node in graphNode.latticeNodes {
if node.prevs.isEmpty {
continue
}
if dicdataStore.shouldBeRemoved(data: node.data) {
continue
}
//
let wValue: PValue = node.data.value()
if i == 0 {
// values
node.values = node.prevs.map {$0.totalValue + wValue + dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else {
// values
node.values = node.prevs.map {$0.totalValue + wValue}
}
// LatticeNode
let nextIndices = self.structure.nextIndices(
displayedTextEndIndex: node.displayedTextRange.endIndex,
inputElementsEndIndex: node.inputElementsRange.endIndex
)
// count
if nextIndices.isEmpty || self.structure.inputElementsStartIndexToNodeIndices.endIndex == node.inputElementsRange.endIndex {
for index in node.prevs.indices {
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
result.prevs.append(newnode)
}
} else {
for nextIndex in nextIndices {
// nodenextnode
for nextnode in self.nodes[nextIndex].latticeNodes {
// node.registered.isEmpty
if dicdataStore.shouldBeRemoved(data: nextnode.data) {
continue
}
//
let ccValue: PValue = dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
// nodeprevnode
for (index, value) in node.values.enumerated() {
let newValue: PValue = ccValue + value
// index
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
if lastindex == option.N_best {
continue
}
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
//
if nextnode.prevs.count >= option.N_best {
nextnode.prevs.removeLast()
}
// removeinsert (insertO(N))
nextnode.prevs.insert(newnode, at: lastindex)
}
}
}
}
}
}
return result
}
}

View File

@ -44,18 +44,39 @@ enum CorrectPrefixTree {
static let roman2kana: Node = {
Node([
"t": Node([
"s": .terminal(["ta"]),
"z": .terminal(["ta"]),
"q": .terminal(["ta"]),
"p": .terminal(["to"]),
]),
"g": Node([
"s": .terminal(["ga"]),
"z": .terminal(["ga"]),
"q": .terminal(["ga"]),
"d": .terminal(["ge"]),
"r": .terminal(["ge"]),
"w": .terminal(["ge"]),
"k": .terminal(["gi"]),
"l": .terminal(["go"]),
"p": .terminal(["go"]),
])
"j": .terminal(["gu"]),
]),
"m": Node([
"s": .terminal(["ma"]),
"q": .terminal(["ma"]),
"d": .terminal(["me"]),
"r": .terminal(["me"]),
"w": .terminal(["me"]),
"k": .terminal(["mi"]),
"l": .terminal(["mo"]),
"p": .terminal(["mo"]),
"j": .terminal(["mu"]),
]),
"t": Node([
"s": .terminal(["ta"]),
"q": .terminal(["ta"]),
"d": .terminal(["te"]),
"r": .terminal(["te"]),
"w": .terminal(["te"]),
"k": .terminal(["ti"]),
"l": .terminal(["to"]),
"p": .terminal(["to"]),
"j": .terminal(["tu"]),
]),
])
}()
static let direct: Node = {

View File

@ -70,7 +70,8 @@ struct InputGraphStructure {
return indexSet
}
mutating func insert<T>(_ node: T, nodes: inout [T], displayedTextRange: Range, inputElementsRange: Range) {
/// `index`
mutating func insert<T>(_ node: T, nodes: inout [T], displayedTextRange: Range, inputElementsRange: Range) -> Int {
// deadNodeIndices
let index: Int
if let deadIndex = self.deadNodeIndices.popLast() {
@ -104,6 +105,7 @@ struct InputGraphStructure {
}
self.inputElementsEndIndexToNodeIndices[endIndex].insert(index)
}
return index
}
mutating func remove(at index: Int) {
@ -125,7 +127,7 @@ struct InputGraphStructure {
}
}
struct InputGraph {
struct InputGraph: InputGraphProtocol {
struct InputStyle: Identifiable {
init(from deprecatedInputStyle: KanaKanjiConverterModule.InputStyle) {
switch deprecatedInputStyle {
@ -211,7 +213,7 @@ struct InputGraph {
}
}
struct Node: Equatable, CustomStringConvertible {
struct Node: InputGraphNodeProtocol, Equatable, CustomStringConvertible {
var character: Character
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
@ -222,7 +224,7 @@ struct InputGraph {
let de = displayedTextRange.endIndex?.description ?? "?"
let `is` = inputElementsRange.startIndex?.description ?? "?"
let ie = inputElementsRange.endIndex?.description ?? "?"
return "Node(\"\(character)\", d(\(ds)..<\(de)), i(\(`is`)..<\(ie)), isTypo: \(correction.isTypo)"
return "Node(\"\(character)\", d(\(ds)..<\(de)), i(\(`is`)..<\(ie)), isTypo: \(correction.isTypo))"
}
}
@ -233,41 +235,6 @@ struct InputGraph {
var structure: InputGraphStructure = InputGraphStructure()
var root: Node {
nodes[0]
}
func nextIndices(for node: Node) -> IndexSet {
self.structure.nextIndices(
displayedTextEndIndex: node.displayedTextRange.endIndex,
inputElementsEndIndex: node.inputElementsRange.endIndex
)
}
func next(for node: Node) -> [Node] {
nextIndices(for: node).map{ self.nodes[$0] }
}
func prevIndices(for node: Node) -> IndexSet {
self.structure.prevIndices(
displayedTextStartIndex: node.displayedTextRange.startIndex,
inputElementsStartIndex: node.inputElementsRange.startIndex
)
}
func prev(for node: Node) -> [Node] {
prevIndices(for: node).map{ self.nodes[$0] }
}
mutating func remove(at index: Int) {
assert(index != 0, "Node at index 0 is root and must not be removed.")
self.structure.remove(at: index)
}
mutating func insert(_ node: Node) {
self.structure.insert(node, nodes: &self.nodes, displayedTextRange: node.displayedTextRange, inputElementsRange: node.inputElementsRange)
}
static func build(input: [ComposingText.InputElement]) -> Self {
var inputGraph = Self()
//
@ -328,8 +295,15 @@ struct InputGraph {
// ittaitt[][][t]
// a[t][a][ta]
// TODO: typo
typealias Match = (displayedTextStartIndex: Int?, inputElementsStartIndex: Int?, inputElementsEndIndex: Int, value: String, correction: Correction)
typealias BackSearchMatch = (endNode: ReplacePrefixTree.Node, route: [Character], inputStyleId: InputStyle.ID, correction: Correction, longestMatch: Match)
typealias Match = (
displayedTextStartIndex: Int?,
inputElementsStartIndex: Int?,
inputElementsEndIndex: Int,
backwardRoute: [Int],
value: String,
correction: Correction
)
typealias BackSearchMatch = (endNode: ReplacePrefixTree.Node, route: [Int], inputStyleId: InputStyle.ID, correction: Correction, longestMatch: Match)
var backSearchMatch: [BackSearchMatch] = []
do {
if let characterNodes = ReplacePrefixTree.characterNodes[.init(from: item.inputStyle)],
@ -377,14 +351,13 @@ struct InputGraph {
guard let pNode = endNode.parent else { continue }
let inputElementsStartIndex = if cRoute.isEmpty { index } else { inputGraph.nodes[cRoute.first!].inputElementsRange.startIndex }
let displayedTextStartIndex = cRoute.first.flatMap { inputGraph.nodes[$0].displayedTextRange.startIndex }
let characterRoute = cRoute.map{inputGraph.nodes[$0].character}
backSearchMatch.append(
(
pNode,
characterRoute,
cRoute,
cInputStyleId,
cCorrection,
(displayedTextStartIndex, inputElementsStartIndex, index, "", cCorrection)
(displayedTextStartIndex, inputElementsStartIndex, index, cRoute, "", cCorrection)
)
)
}
@ -395,39 +368,38 @@ struct InputGraph {
typealias SearchItem = (
node: ReplacePrefixTree.Node,
nextIndex: Int,
route: [Character],
inputStyleId: InputStyle.ID,
longestMatch: Match
)
var stack: [SearchItem] = []
for match in backSearchMatch {
stack.append((match.endNode, index, match.route, match.inputStyleId, match.longestMatch))
stack.append((match.endNode, index, match.inputStyleId, match.longestMatch))
}
if stack.isEmpty {
stack.append((replacePrefixTree, index, [], .all, (nil, index, index, value: "", correction: .none)))
stack.append((replacePrefixTree, index, .all, (nil, index, index, backwardRoute: [], value: "", correction: .none)))
}
var matches: [Match] = []
while let (cNode, cIndex, cRoute, cInputStyleId, cLongestMatch) = stack.popLast() {
while let (cNode, cIndex, cInputStyleId, cLongestMatch) = stack.popLast() {
let continuous = cIndex < input.endIndex && cInputStyleId.isCompatible(with: .init(from: input[cIndex].inputStyle))
if continuous, let nNode = cNode.find(key: input[cIndex].character) {
if let value = nNode.value {
// valuelongestMatch
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], .init(from: input[cIndex].inputStyle), (cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + 1, value, cLongestMatch.correction)))
} else if cRoute.isEmpty {
stack.append((nNode, cIndex + 1, .init(from: input[cIndex].inputStyle), (cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + 1, cLongestMatch.backwardRoute, value, cLongestMatch.correction)))
} else if (cIndex == index && cLongestMatch.backwardRoute.isEmpty) {
// value1longestMatch
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], .init(from: input[cIndex].inputStyle), (cLongestMatch.displayedTextStartIndex, cIndex, cIndex + 1, String(input[cIndex].character), .none)))
stack.append((nNode, cIndex + 1, .init(from: input[cIndex].inputStyle), (cLongestMatch.displayedTextStartIndex, cIndex, cIndex + 1, cLongestMatch.backwardRoute, String(input[cIndex].character), .none)))
} else {
//
stack.append((nNode, cIndex + 1, cRoute + [input[cIndex].character], .init(from: input[cIndex].inputStyle), cLongestMatch))
stack.append((nNode, cIndex + 1, .init(from: input[cIndex].inputStyle), cLongestMatch))
}
} else {
if cLongestMatch.inputElementsStartIndex != cLongestMatch.inputElementsEndIndex {
if cLongestMatch.inputElementsStartIndex != cLongestMatch.inputElementsEndIndex && !cLongestMatch.value.isEmpty {
// longestMatchmatch
matches.append(cLongestMatch)
} else if cRoute.isEmpty {
} else if (cIndex == index && cLongestMatch.backwardRoute.isEmpty) {
// 1rootcharactermatch
// .->\1
matches.append((nil, index, index + 1, value: String(input[cIndex].character), correction: .none))
matches.append((nil, index, index + 1, [], value: String(input[cIndex].character), correction: .none))
}
}
//
@ -454,9 +426,8 @@ struct InputGraph {
(
.init(),
cIndex + item.inputCount,
cRoute + Array(item.replace),
.init(from: input[cIndex].inputStyle),
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, item.replace, .typo)
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, cLongestMatch.backwardRoute, item.replace, .typo)
)
)
}
@ -467,9 +438,8 @@ struct InputGraph {
(
node,
cIndex + item.inputCount,
cRoute + Array(item.replace),
.init(from: input[cIndex].inputStyle),
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, value, .typo)
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, cLongestMatch.backwardRoute, value, .typo)
)
)
} else {
@ -477,9 +447,8 @@ struct InputGraph {
(
node,
cIndex + item.inputCount,
cRoute + Array(item.replace),
.init(from: input[cIndex].inputStyle),
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, cLongestMatch.value, .typo)
(cLongestMatch.displayedTextStartIndex, cLongestMatch.inputElementsStartIndex, cIndex + item.inputCount, cLongestMatch.backwardRoute, cLongestMatch.value, .typo)
)
)
}
@ -487,17 +456,25 @@ struct InputGraph {
}
}
// matchinsert
for match in matches {
var removedNodeIndices: Set<Int> = []
for match in matches.sorted(by: { $0.backwardRoute.count > $1.backwardRoute.count }) {
let displayedTextStartIndex = if let d = match.displayedTextStartIndex {
d
} else if let beforeNodeIndex = inputGraph.structure.inputElementsEndIndexToNodeIndices[index].first,
let d = inputGraph.nodes[beforeNodeIndex].displayedTextRange.endIndex {
d
} else {
Int?.none
}
} else if let beforeNodeIndex = inputGraph.structure.inputElementsEndIndexToNodeIndices[index].first {
inputGraph.nodes[beforeNodeIndex].displayedTextRange.endIndex
} else {
Int?.none
}
guard let displayedTextStartIndex else { continue }
for backNodeIndex in match.backwardRoute {
if removedNodeIndices.contains(backNodeIndex) {
continue
}
inputGraph.structure.remove(at: backNodeIndex)
removedNodeIndices.insert(backNodeIndex)
}
let characters = Array(match.value)
for (i, c) in zip(characters.indices, characters) {
let inputElementRange: InputGraphStructure.Range = if i == characters.startIndex && i+1 == characters.endIndex {
@ -527,7 +504,6 @@ struct InputGraph {
}
}
}
return consume inputGraph
return inputGraph
}
}

View File

@ -0,0 +1,59 @@
//
// InputGraphProtocol.swift
//
//
// Created by miwa on 2024/02/23.
//
import Foundation
protocol InputGraphNodeProtocol {
var displayedTextRange: InputGraphStructure.Range { get set }
var inputElementsRange: InputGraphStructure.Range { get set }
}
protocol InputGraphProtocol {
associatedtype Node: InputGraphNodeProtocol
var nodes: [Node] { get set }
var structure: InputGraphStructure { get set }
}
extension InputGraphProtocol {
var root: Node {
nodes[0]
}
func nextIndices(for node: Node) -> IndexSet {
self.structure.nextIndices(
displayedTextEndIndex: node.displayedTextRange.endIndex,
inputElementsEndIndex: node.inputElementsRange.endIndex
)
}
func next(for node: Node) -> [Node] {
nextIndices(for: node).map{ self.nodes[$0] }
}
func prevIndices(for node: Node) -> IndexSet {
self.structure.prevIndices(
displayedTextStartIndex: node.displayedTextRange.startIndex,
inputElementsStartIndex: node.inputElementsRange.startIndex
)
}
func prev(for node: Node) -> [Node] {
prevIndices(for: node).map{ self.nodes[$0] }
}
mutating func remove(at index: Int) {
assert(index != 0, "Node at index 0 is root and must not be removed.")
self.structure.remove(at: index)
}
mutating func insert(_ node: Node) {
var nodes = self.nodes
let _ = self.structure.insert(node, nodes: &nodes, displayedTextRange: node.displayedTextRange, inputElementsRange: node.inputElementsRange)
self.nodes = consume nodes
}
}

View File

@ -46,6 +46,7 @@ final class InputGraphTests: XCTestCase {
.init(character: "a", inputStyle: .roman2kana),
])
XCTAssertEqual(graph.nodes.count, 3) // Root nodes
XCTAssertNil(graph.nodes.first(where: {$0.character == ""}))
}
do {
let graph = InputGraph.build(input: [
@ -123,10 +124,31 @@ final class InputGraphTests: XCTestCase {
do {
// ttt
let graph = InputGraph.build(input: [
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana),
])
XCTAssertNil(graph.nodes.first(where: {$0.character == "t"}))
XCTAssertNil(graph.nodes.first(where: {$0.character == ""}))
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(1, 2), inputElementsRange: .startIndex(1), correction: .none)
)
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(2, 3), inputElementsRange: .endIndex(4), correction: .none)
)
}
do {
// ttt
let graph = InputGraph.build(input: [
.init(character: "t", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana),
])
XCTAssertNil(graph.nodes.first(where: {$0.character == "t"}))
XCTAssertNil(graph.nodes.first(where: {$0.character == ""}))
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)
@ -143,6 +165,8 @@ final class InputGraphTests: XCTestCase {
.init(character: "t", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana),
])
XCTAssertNil(graph.nodes.first(where: {$0.character == "t"}))
XCTAssertNil(graph.nodes.first(where: {$0.character == ""}))
XCTAssertEqual(
graph.nodes.first(where: {$0.character == ""}),
.init(character: "", displayedTextRange: .range(0, 1), inputElementsRange: .startIndex(0), correction: .none)

View File

@ -0,0 +1,228 @@
//
// LookupGraphTests.swift
//
//
// Created by miwa on 2024/02/23.
//
import XCTest
import Foundation
@testable import KanaKanjiConverterModule
struct LookupGraph: InputGraphProtocol {
struct Node: Equatable, InputGraphNodeProtocol {
var character: Character
var charId: UInt8
var loudsNodeIndices: Set<Int> = []
var displayedTextRange: InputGraphStructure.Range
var inputElementsRange: InputGraphStructure.Range
var correction: InputGraph.Correction = .none
}
var nodes: [Node] = [
// root node
Node(character: "\0", charId: 0x00, displayedTextRange: .endIndex(0), inputElementsRange: .endIndex(0))
]
var structure: InputGraphStructure = InputGraphStructure()
static func build(input: InputGraph, character2CharId: (Character) -> UInt8) -> Self {
let nodes = input.nodes.map {
Node(character: $0.character, charId: character2CharId($0.character), displayedTextRange: $0.displayedTextRange, inputElementsRange: $0.inputElementsRange, correction: $0.correction)
}
return Self(nodes: nodes, structure: input.structure)
}
}
extension LOUDS {
func byfixNodeIndices(_ lookupGraph: LookupGraph, startGraphNodeIndex: Int = 0) -> (IndexSet, [Int: [(displayedTextEndIndex: Int?, inputElementsEndIndex: Int?)]]) {
var indexSet = IndexSet(integer: 1)
// loudsLookupGraph
var loudsNodeIndex2GraphNodeEndIndices: [Int: [(displayedTextEndIndex: Int?, inputElementsEndIndex: Int?)]] = [:]
typealias SearchItem = (
node: LookupGraph.Node,
lastLoudsNodeIndex: Int
)
var stack: [SearchItem] = [(lookupGraph.nodes[startGraphNodeIndex], 1)]
while let (cNode, cLastLoudsNodeIndex) = stack.popLast() {
// nextNodes
if let loudsNodeIndex = self.searchCharNodeIndex(from: cLastLoudsNodeIndex, char: cNode.charId) {
loudsNodeIndex2GraphNodeEndIndices[loudsNodeIndex, default: []].append((cNode.displayedTextRange.endIndex, cNode.inputElementsRange.endIndex))
indexSet.insert(loudsNodeIndex)
stack.append(contentsOf: lookupGraph.nextIndices(for: cNode).compactMap { index in
let node = lookupGraph.nodes[index]
// endIndex
// endIndex調
if let cDisplayedTextEndIndex = cNode.displayedTextRange.endIndex,
let nDisplayedTextEndIndex = node.displayedTextRange.endIndex {
guard cDisplayedTextEndIndex < nDisplayedTextEndIndex else {
return nil
}
}
if let cInputElementsEndIndex = cNode.inputElementsRange.endIndex,
let nInputElementsEndIndex = node.inputElementsRange.endIndex {
guard cInputElementsEndIndex < nInputElementsEndIndex else {
return nil
}
}
return (node, loudsNodeIndex)
})
} else {
continue
}
}
return (indexSet, loudsNodeIndex2GraphNodeEndIndices)
}
}
extension DicdataStore {
func buildConvertGraph(inputGraph: consuming InputGraph, option: ConvertRequestOptions) -> ConvertGraph {
let lookupGraph = LookupGraph.build(input: consume inputGraph, character2CharId: { self.character2charId($0.toKatakana()) } )
var stack: [Int] = Array(lookupGraph.nextIndices(for: lookupGraph.root))
var graphNodeIndex2LatticeNodes: [Int: [ConvertGraph.LatticeNode]] = [:]
while let graphNodeIndex = stack.popLast() {
let graphNode = lookupGraph.nodes[graphNodeIndex]
guard let louds = self.loadLOUDS(identifier: String(graphNode.character.toKatakana())) else {
continue
}
let (loudsNodeIndices, loudsNodeIndex2GraphEndIndices) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: graphNodeIndex)
let dicdataWithIndex: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = self.getDicdataFromLoudstxt3(identifier: String(graphNode.character.toKatakana()), indices: loudsNodeIndices, option: option)
var latticeNodes: [ConvertGraph.LatticeNode] = []
for (loudsNodeIndex, dicdata) in dicdataWithIndex {
for endIndex in loudsNodeIndex2GraphEndIndices[loudsNodeIndex, default: []] {
let displayedTextRange: InputGraphStructure.Range = switch (graphNode.displayedTextRange.startIndex, endIndex.displayedTextEndIndex) {
case let (s?, e?): .range(s, e)
case (let s?, nil): .startIndex(s)
case (nil, let e?): .endIndex(e)
case (nil, nil): .unknown
}
let inputElementsRange: InputGraphStructure.Range = switch (graphNode.inputElementsRange.startIndex, endIndex.inputElementsEndIndex) {
case let (s?, e?): .range(s, e)
case (let s?, nil): .startIndex(s)
case (nil, let e?): .endIndex(e)
case (nil, nil): .unknown
}
if graphNode.displayedTextRange.startIndex == 0 || graphNode.inputElementsRange.startIndex == 0 {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, displayedTextRange: displayedTextRange, inputElementsRange: inputElementsRange, prevs: [.BOSNode()])
})
} else {
latticeNodes.append(contentsOf: dicdata.map {
.init(data: $0, displayedTextRange: displayedTextRange, inputElementsRange: inputElementsRange)
})
}
}
}
graphNodeIndex2LatticeNodes[graphNodeIndex] = latticeNodes
stack.append(contentsOf: lookupGraph.nextIndices(for: graphNode))
}
return ConvertGraph.build(input: consume lookupGraph, nodeIndex2LatticeNode: graphNodeIndex2LatticeNodes)
}
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
// split = 2048
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [(loudsNodeIndex: Int, dicdata: [DicdataElement])] = []
for (key, value) in dict {
// FIXME: use local option
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: option))
}
return data
}
}
final class LookupGraphTests: XCTestCase {
func requestOptions() -> ConvertRequestOptions {
.withDefaultDictionary(requireJapanesePrediction: false, requireEnglishPrediction: false, keyboardLanguage: .ja_JP, learningType: .nothing, memoryDirectoryURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""), metadata: .init(appVersionString: "Test"))
}
func testByfixNodeIndices() throws {
let dicdataStore = DicdataStore(convertRequestOptions: requestOptions())
let character2CharId: (Character) -> UInt8 = { dicdataStore.character2charId($0.toKatakana()) }
let louds = LOUDS.load("", option: requestOptions())
XCTAssertNotNil(louds)
guard let louds else { return }
do {
let inputGraph = InputGraph.build(input: [
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
.init(character: "", inputStyle: .direct),
])
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: character2CharId)
let startNodeIndex = lookupGraph.nextIndices(for: lookupGraph.root).first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "鹿"})
XCTAssertTrue(dicdata.contains {$0.word == "歯科"})
//
XCTAssertTrue(dicdata.contains {$0.word == "滋賀"})
//
XCTAssertTrue(dicdata.contains {$0.word == "司会"})
XCTAssertTrue(dicdata.contains {$0.word == "視界"})
XCTAssertTrue(dicdata.contains {$0.word == "死界"})
//
XCTAssertTrue(dicdata.contains {$0.word == "市外"})
XCTAssertTrue(dicdata.contains {$0.word == "市街"})
XCTAssertTrue(dicdata.contains {$0.word == "死骸"})
}
do {
// ts -> ta
let inputGraph = InputGraph.build(input: [
.init(character: "s", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "s", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
])
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: character2CharId)
let startNodeIndex = lookupGraph.nextIndices(for: lookupGraph.root).first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
// []
XCTAssertTrue(dicdata.contains {$0.word == ""})
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "死体"})
XCTAssertTrue(dicdata.contains {$0.word == "肢体"})
}
do {
//
let inputGraph = InputGraph.build(input: [
.init(character: "s", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "t", inputStyle: .roman2kana),
.init(character: "a", inputStyle: .roman2kana),
.init(character: "i", inputStyle: .roman2kana),
])
let lookupGraph = LookupGraph.build(input: inputGraph, character2CharId: character2CharId)
let startNodeIndex = lookupGraph.nextIndices(for: lookupGraph.root).first(where: { lookupGraph.nodes[$0].character == "" })
XCTAssertNotNil(startNodeIndex)
let (loudsNodeIndices, _) = louds.byfixNodeIndices(lookupGraph, startGraphNodeIndex: startNodeIndex ?? 0)
let dicdataWithIndex = dicdataStore.getDicdataFromLoudstxt3(identifier: "", indices: loudsNodeIndices, option: requestOptions())
let dicdata = dicdataWithIndex.flatMapSet { $0.dicdata }
//
XCTAssertTrue(dicdata.contains {$0.word == ""})
//
XCTAssertTrue(dicdata.contains {$0.word == "知っ"})
XCTAssertTrue(dicdata.contains {$0.word == "しっ"})
//
XCTAssertTrue(dicdata.contains {$0.word == "叱咤"})
//
XCTAssertTrue(dicdata.contains {$0.word == "失態"})
}
}
}

View File

@ -0,0 +1,95 @@
//
// extension Kana2Kanji+InputGraph.swift
//
//
// Created by miwa on 2024/02/23.
//
import Foundation
@testable import KanaKanjiConverterModule
import XCTest
extension Kana2Kanji {
func _experimental_all(_ inputData: ComposingText, option: ConvertRequestOptions) -> ConvertGraph.LatticeNode {
//
print(#file, "start")
let inputGraph = InputGraph.build(input: inputData.input)
// convertGraph
print(#file, "lookup", inputGraph)
let convertGraph = self.dicdataStore.buildConvertGraph(inputGraph: consume inputGraph, option: option)
print(#file, "convert", convertGraph)
let result = convertGraph.convertAll(option: option, dicdataStore: self.dicdataStore)
return result
}
}
private extension ConvertGraph.LatticeNode {
func joinedPrevs() -> [String] {
var result: [String] = []
for prev in self.prevs {
var words = [self.data.word, prev.data.word]
var curPrev: (any RegisteredNodeProtocol) = prev
while let newPrev = curPrev.prev {
words.append(newPrev.data.word)
curPrev = newPrev
}
result.append(words.reversed().joined())
}
return result
}
}
final class ExperimentalConversionTests: XCTestCase {
func requestOptions() -> ConvertRequestOptions {
.withDefaultDictionary(requireJapanesePrediction: false, requireEnglishPrediction: false, keyboardLanguage: .ja_JP, learningType: .nothing, memoryDirectoryURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""), metadata: .init(appVersionString: "Test"))
}
func testConversion() throws {
let dicdataStore = DicdataStore(requestOptions: requestOptions())
let kana2kanji = Kana2Kanji(dicdataStore: dicdataStore)
do {
var c = ComposingText()
c.insertAtCursorPosition("みらいえいが", inputStyle: .direct)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("未来映画"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("miraieiga", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("未来映画"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("sitta", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("知った"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("unda", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("産んだ"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("ixtsuta", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("言った"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("its", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
XCTAssertTrue(result.joinedPrevs().contains("いた"))
}
do {
var c = ComposingText()
c.insertAtCursorPosition("itsi", inputStyle: .roman2kana)
let result = kana2kanji._experimental_all(c, option: requestOptions())
print(result.joinedPrevs())
XCTAssertTrue(result.joinedPrevs().contains("痛い"))
}
}
}