diff --git a/Sources/CliTool/Subcommands/SessionCommand.swift b/Sources/CliTool/Subcommands/SessionCommand.swift index ff53948..9cf7019 100644 --- a/Sources/CliTool/Subcommands/SessionCommand.swift +++ b/Sources/CliTool/Subcommands/SessionCommand.swift @@ -220,7 +220,7 @@ extension Subcommands { print("Submit \(candidate.text)") converter.setCompletedData(candidate) converter.updateLearningData(candidate) - composingText.prefixComplete(correspondingCount: candidate.correspondingCount) + composingText.prefixComplete(composingCount: candidate.composingCount) if composingText.isEmpty { composingText.stopComposition() converter.stopComposition() diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift index a213d03..86a0770 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessing.swift @@ -28,11 +28,16 @@ extension Kana2Kanji { /// (4)ノードをアップデートした上で返却する。 func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)") - let count: Int = inputData.input.count + let inputCount: Int = inputData.input.count + let surfaceCount = inputData.convertTarget.count let result: LatticeNode = LatticeNode.EOSNode - let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)}) + let lattice: Lattice = Lattice( + inputCount: inputCount, + surfaceCount: surfaceCount, + rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)} + ) // 「i文字目から始まるnodes」に対して - for (i, nodeArray) in lattice.enumerated() { + for (i, nodeArray) in lattice.indexedNodes() { // それぞれのnodeに対して for node in nodeArray { if node.prevs.isEmpty { @@ -43,7 +48,7 @@ extension Kana2Kanji { } // 生起確率を取得する。 let wValue: PValue = node.data.value() - if i == 0 { + if i.isZero { // valuesを更新する node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} } else { @@ -51,12 +56,12 @@ extension Kana2Kanji { node.values = node.prevs.map {$0.totalValue + wValue} } // 変換した文字数 - let nextIndex: Int = node.inputRange.endIndex + let nextIndex = node.range.endIndex // 文字数がcountと等しい場合登録する - if nextIndex == count { + if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) { self.updateResultNode(with: node, resultNode: result) } else { - self.updateNextNodes(with: node, nextNodes: lattice[inputIndex: nextIndex], nBest: N_best) + self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best) } } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift index 5a6a35a..7cf89b5 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift @@ -20,11 +20,16 @@ extension Kana2Kanji { /// (4)ノードをアップデートした上で返却する。 func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) { debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)") - let count: Int = inputData.input.count + let inputCount: Int = inputData.input.count + let surfaceCount: Int = inputData.convertTarget.count let result: LatticeNode = LatticeNode.EOSNode - let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}) + let lattice: Lattice = Lattice( + inputCount: inputCount, + surfaceCount: surfaceCount, + rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)} + ) // 「i文字目から始まるnodes」に対して - for (i, nodeArray) in lattice.enumerated() { + for (i, nodeArray) in lattice.indexedNodes() { // それぞれのnodeに対して for node in nodeArray { if node.prevs.isEmpty { @@ -32,7 +37,7 @@ extension Kana2Kanji { } // 生起確率を取得する。 let wValue: PValue = node.data.value() - if i == 0 { + if i.isZero { // valuesを更新する node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} } else { @@ -40,9 +45,9 @@ extension Kana2Kanji { node.values = node.prevs.map {$0.totalValue + wValue} } // 変換した文字数 - let nextIndex: Int = node.inputRange.endIndex + let nextIndex = node.range.endIndex // 文字数がcountと等しい場合登録する - if nextIndex == count { + if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) { for index in node.prevs.indices { let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index]) // 学習データやユーザ辞書由来の場合は素通しする @@ -61,7 +66,7 @@ extension Kana2Kanji { Array(($0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word).utf8) } // nodeの繋がる次にあり得る全てのnextnodeに対して - for nextnode in lattice[inputIndex: nextIndex] { + for nextnode in lattice[index: nextIndex] { // クラスの連続確率を計算する。 let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid) // nodeの持っている全てのprevnodeに対して diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PredictionProcessing.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PredictionProcessing.swift index 06b1fa3..3278d59 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PredictionProcessing.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PredictionProcessing.swift @@ -14,7 +14,7 @@ extension Kana2Kanji { return Candidate( text: left.text + right.text, value: left.value + right.value, - correspondingCount: left.correspondingCount + right.correspondingCount, + composingCount: .composite(left.composingCount, right.composingCount), lastMid: right.lastMid, data: left.data + right.data ) @@ -26,7 +26,7 @@ extension Kana2Kanji { return Candidate( text: left.text + right.text, value: newValue, - correspondingCount: left.correspondingCount + right.correspondingCount, + composingCount: .composite(left.composingCount, right.composingCount), lastMid: right.lastMid, data: left.data + right.data ) @@ -57,7 +57,7 @@ extension Kana2Kanji { prefixCandidate.data = prefixCandidateData prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word } - prefixCandidate.correspondingCount = prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count } + prefixCandidate.composingCount = .surfaceCount(prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count }) } totalWord.insert(contentsOf: element.word, at: totalWord.startIndex) diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift index 16a4aad..5a1f6ba 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/PrefixCompletionProcessing.swift @@ -17,29 +17,30 @@ extension Kana2Kanji { /// (2)次に、再度計算して良い候補を得る。 func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { debug("確定直後の変換、前は:", previousResult.inputData, "後は:", inputData) - let count = inputData.input.count + let inputCount = inputData.input.count + let surfaceCount = inputData.convertTarget.count + // TODO: 実際にはもっとチェックが必要。具体的には、input/convertTarget両方のsuffixが一致する必要がある + let convertedInputCount = previousResult.inputData.input.count - inputCount + let convertedSurfaceCount = previousResult.inputData.convertTarget.count - surfaceCount // (1) let start = RegisteredNode.fromLastCandidate(completedData) - let lattice = previousResult.lattice.suffix(count) - for (i, nodeArray) in lattice.enumerated() { - if i == .zero { - for node in nodeArray { - node.prevs = [start] - // inputRangeを確定した部分のカウント分ずらす - node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount - } + let lattice = previousResult.lattice.suffix(inputCount: inputCount, surfaceCount: surfaceCount) + for (i, nodeArray) in lattice.indexedNodes() { + let prevs: [RegisteredNode] = if i.isZero { + [start] } else { - for node in nodeArray { - node.prevs = [] - // inputRangeを確定した部分のカウント分ずらす - node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount - } + [] + } + for node in nodeArray { + node.prevs = prevs + // inputRangeを確定した部分のカウント分ずらす + node.range = node.range.offseted(inputOffset: -convertedInputCount, surfaceOffset: -convertedSurfaceCount) } } // (2) let result = LatticeNode.EOSNode - for (i, nodeArray) in lattice.enumerated() { + for (i, nodeArray) in lattice.indexedNodes() { for node in nodeArray { if node.prevs.isEmpty { continue @@ -49,7 +50,7 @@ extension Kana2Kanji { } // 生起確率を取得する。 let wValue = node.data.value() - if i == 0 { + if i.isZero { // valuesを更新する node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} } else { @@ -57,11 +58,11 @@ extension Kana2Kanji { node.values = node.prevs.map {$0.totalValue + wValue} } // 変換した文字数 - let nextIndex = node.inputRange.endIndex - if nextIndex != count { - self.updateNextNodes(with: node, nextNodes: lattice[inputIndex: nextIndex], nBest: N_best) - } else { + let nextIndex = node.range.endIndex + if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) { self.updateResultNode(with: node, resultNode: result) + } else { + self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best) } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift index 2634a20..187a045 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/SuffixReplacementProcessing.swift @@ -6,6 +6,7 @@ // Copyright © 2020 ensan. All rights reserved. // +import Algorithms import Foundation import SwiftUtils @@ -24,27 +25,43 @@ extension Kana2Kanji { /// /// (5)ノードをアップデートした上で返却する。 - func kana2lattice_changed(_ inputData: ComposingText, N_best: Int, counts: (deleted: Int, added: Int), previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { + func kana2lattice_changed( + _ inputData: ComposingText, + N_best: Int, + counts: (deletedInput: Int, addedInput: Int, deletedSurface: Int, addedSurface: Int), + previousResult: (inputData: ComposingText, lattice: Lattice), + needTypoCorrection: Bool + ) -> (result: LatticeNode, lattice: Lattice) { // (0) - let count = inputData.input.count - let commonCount = previousResult.inputData.input.count - counts.deleted - debug("kana2lattice_changed", inputData, counts, previousResult.inputData, count, commonCount) + let inputCount = inputData.input.count + let surfaceCount = inputData.convertTarget.count + let commonInputCount = previousResult.inputData.input.count - counts.deletedInput + let commonSurfaceCount = previousResult.inputData.convertTarget.count - counts.deletedSurface + debug("kana2lattice_changed", inputData, counts, previousResult.inputData, inputCount, commonInputCount) // (1) - var lattice = previousResult.lattice.prefix(commonCount) + var lattice = previousResult.lattice.prefix(inputCount: commonInputCount, surfaceCount: commonSurfaceCount) let terminalNodes: Lattice - if counts.added == 0 { - terminalNodes = Lattice(nodes: lattice.map { - $0.filter { - $0.inputRange.endIndex == count + if counts.addedInput == 0 { + terminalNodes = Lattice( + inputCount: inputCount, + surfaceCount: surfaceCount, + rawNodes: lattice.map { + $0.filter { + $0.range.endIndex == .input(inputCount) || $0.range.endIndex == .surface(inputCount) + } } - }) + ) } else { // (2) - let addedNodes: Lattice = Lattice(nodes: (0.. (result: LatticeNode, lattice: Lattice) { debug("キャッシュから復元、元の文字は:", previousResult.inputData.convertTarget) - let count = previousResult.inputData.input.count + let inputCount = previousResult.inputData.input.count + let surfaceCount = previousResult.inputData.convertTarget.count // (1) let result = LatticeNode.EOSNode for nodeArray in previousResult.lattice { - for node in nodeArray where node.inputRange.endIndex == count { + for node in nodeArray where node.range.endIndex == .input(inputCount) || node.range.endIndex == .surface(surfaceCount) { if node.prevs.isEmpty { continue } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Kana2Kanji.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Kana2Kanji.swift index cd3c9c3..5080734 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Kana2Kanji.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Kana2Kanji.swift @@ -34,11 +34,14 @@ struct Kana2Kanji { let text = data.clauses.map {$0.clause.text}.joined() let value = data.clauses.last!.value + mmValue.value let lastMid = data.clauses.last!.clause.mid - let correspondingCount = data.clauses.reduce(into: 0) {$0 += $1.clause.inputRange.count} + + let composingCount: ComposingCount = data.clauses.reduce(into: .inputCount(0)) { + $0 = .composite($0, $1.clause.range.count) + } return Candidate( text: text, value: value, - correspondingCount: correspondingCount, + composingCount: composingCount, lastMid: lastMid, data: data.data ) diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift index e451b34..3370e59 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Lattice.swift @@ -1,49 +1,180 @@ +import Algorithms +import SwiftUtils + struct Lattice: Sequence { typealias Element = [LatticeNode] - typealias Iterator = IndexingIterator<[[LatticeNode]]> - init(nodes: [[LatticeNode]] = []) { - self.nodes = nodes + init() { + self.inputIndexedNodes = [] + self.surfaceIndexedNodes = [] } - private var nodes: [[LatticeNode]] + init(inputCount: Int, surfaceCount: Int, rawNodes: [[LatticeNode]]) { + self.inputIndexedNodes = .init(repeating: [], count: inputCount) + self.surfaceIndexedNodes = .init(repeating: [], count: surfaceCount) - func prefix(_ k: Int) -> Lattice { - var lattice = Lattice(nodes: self.nodes.prefix(k).map {(nodes: [LatticeNode]) in - nodes.filter {$0.inputRange.endIndex <= k} - }) - while lattice.nodes.last?.isEmpty ?? false { - lattice.nodes.removeLast() + for nodes in rawNodes { + guard let first = nodes.first else { continue } + switch first.range.startIndex { + case .surface(let i): + self.surfaceIndexedNodes[i] = nodes + case .input(let i): + self.inputIndexedNodes[i] = nodes + } } - return lattice } - func suffix(_ count: Int) -> Lattice { - Lattice(nodes: self.nodes.suffix(count)) + private init(inputIndexedNodes: [[LatticeNode]], surfaceIndexedNodes: [[LatticeNode]]) { + self.inputIndexedNodes = inputIndexedNodes + self.surfaceIndexedNodes = surfaceIndexedNodes + } + + private var inputIndexedNodes: [[LatticeNode]] + private var surfaceIndexedNodes: [[LatticeNode]] + + func prefix(inputCount: Int, surfaceCount: Int) -> Lattice { + let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in + switch node.range.endIndex { + case .input(let value): + value <= inputCount + case .surface(let value): + value <= surfaceCount + } + } + let newInputIndexedNodes = Array(self.inputIndexedNodes.prefix(inputCount).map {(nodes: [LatticeNode]) in + nodes.filter(filterClosure) + }.drop(while: \.isEmpty)) + let newSurfaceIndexedNodes = Array(self.surfaceIndexedNodes.prefix(surfaceCount).map {(nodes: [LatticeNode]) in + nodes.filter(filterClosure) + }.drop(while: \.isEmpty)) + + return Lattice(inputIndexedNodes: newInputIndexedNodes, surfaceIndexedNodes: newSurfaceIndexedNodes) + } + + func suffix(inputCount: Int, surfaceCount: Int) -> Lattice { + Lattice( + inputIndexedNodes: self.inputIndexedNodes.suffix(inputCount), + surfaceIndexedNodes: self.surfaceIndexedNodes.suffix(surfaceCount) + ) } mutating func merge(_ lattice: Lattice) { - for (index, nodeArray) in lattice.nodes.enumerated() where index < self.nodes.endIndex { - self.nodes[index].append(contentsOf: nodeArray) + for (index, nodeArray) in lattice.inputIndexedNodes.enumerated() where index < self.inputIndexedNodes.endIndex { + self.inputIndexedNodes[index].append(contentsOf: nodeArray) } - if self.nodes.endIndex < lattice.nodes.endIndex { - for nodeArray in lattice.nodes[self.nodes.endIndex...] { - self.nodes.append(nodeArray) + if self.inputIndexedNodes.endIndex < lattice.inputIndexedNodes.endIndex { + for nodeArray in lattice.inputIndexedNodes[self.inputIndexedNodes.endIndex...] { + self.inputIndexedNodes.append(nodeArray) + } + } + for (index, nodeArray) in lattice.surfaceIndexedNodes.enumerated() where index < self.surfaceIndexedNodes.endIndex { + self.surfaceIndexedNodes[index].append(contentsOf: nodeArray) + } + if self.surfaceIndexedNodes.endIndex < lattice.surfaceIndexedNodes.endIndex { + for nodeArray in lattice.surfaceIndexedNodes[self.surfaceIndexedNodes.endIndex...] { + self.surfaceIndexedNodes.append(nodeArray) } } } subscript(inputIndex i: Int) -> [LatticeNode] { get { - self.nodes[i] + self.inputIndexedNodes[i] } } - func makeIterator() -> IndexingIterator<[[LatticeNode]]> { - self.nodes.makeIterator() + subscript(index index: LatticeIndex) -> [LatticeNode] { + get { + switch index { + case .input(let i): self.inputIndexedNodes[i] + case .surface(let i): self.surfaceIndexedNodes[i] + } + } + } + + func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> { + self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) } + .chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) }) + } + + func makeIterator() -> Chain2Sequence<[[LatticeNode]], [[LatticeNode]]>.Iterator { + self.inputIndexedNodes.chained(self.surfaceIndexedNodes).makeIterator() } var isEmpty: Bool { - self.nodes.isEmpty + self.inputIndexedNodes.isEmpty && self.surfaceIndexedNodes.isEmpty + } + + enum LatticeIndex: Sendable, Equatable { + case surface(Int) + case input(Int) + + var isZero: Bool { + self == .surface(0) || self == .input(0) + } + } + + enum LatticeRange: Sendable, Equatable { + static var zero: Self { + .input(from: 0, to: 0) + } + case surface(from: Int, to: Int) + case input(from: Int, to: Int) + + var count: ComposingCount { + switch self { + case .surface(let from, let to): + .surfaceCount(to - from) + case .input(let from, let to): + .inputCount(to - from) + } + } + + var startIndex: LatticeIndex { + switch self { + case .surface(let from, _): + .surface(from) + case .input(let from, _): + .input(from) + } + } + + var endIndex: LatticeIndex { + switch self { + case .surface(_, let to): + .surface(to) + case .input(_, let to): + .input(to) + } + } + + func merged(with other: Self) -> Self? { + switch (self, other) { + case (let .surface(l, ml), let .surface(mr, r)): + if ml == mr { + .surface(from: l, to: r) + } else { + nil + } + case (let .input(l, ml), let .input(mr, r)): + if ml == mr { + .input(from: l, to: r) + } else { + nil + } + case (.surface, .input), (.input, .surface): + nil + } + } + + func offseted(inputOffset: Int, surfaceOffset: Int) -> Self { + switch self { + case .surface(from: let from, to: let to): + .surface(from: from + surfaceOffset, to: to + surfaceOffset) + case .input(from: let from, to: let to): + .input(from: from + inputOffset, to: to + inputOffset) + } + } + } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/LatticeNode.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/LatticeNode.swift index 92d33ab..e18cb3c 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/LatticeNode.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/LatticeNode.swift @@ -17,23 +17,23 @@ public final class LatticeNode { /// `prevs`の各要素に対応するスコアのデータ var values: [PValue] = [] /// inputData.input内のrange - var inputRange: Range + var range: Lattice.LatticeRange /// `EOS`に対応するノード。 static var EOSNode: LatticeNode { - LatticeNode(data: DicdataElement.EOSData, inputRange: 0..<0) + LatticeNode(data: DicdataElement.EOSData, range: .zero) } - init(data: DicdataElement, inputRange: Range) { + init(data: DicdataElement, range: Lattice.LatticeRange) { self.data = data self.values = [data.value()] - self.inputRange = inputRange + self.range = range } /// `LatticeNode`の持っている情報を反映した`RegisteredNode`を作成する /// `LatticeNode`は複数の過去のノードを持つことができるが、`RegisteredNode`は1つしか持たない。 func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode { - RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, inputRange: self.inputRange) + RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, range: self.range) } /// 再帰的にノードを遡り、`CandidateData`を構築する関数 diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/PostCompositionPredictionCandidate.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/PostCompositionPredictionCandidate.swift index 5649b45..29a73bb 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/PostCompositionPredictionCandidate.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/PostCompositionPredictionCandidate.swift @@ -36,7 +36,7 @@ public struct PostCompositionPredictionCandidate { candidate.data.append(data) } candidate.value = self.value - candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count } + candidate.composingCount = .surfaceCount(candidate.rubyCount) candidate.lastMid = data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? candidate.lastMid return candidate case .replacement(let targetData, let replacementData): @@ -45,7 +45,7 @@ public struct PostCompositionPredictionCandidate { candidate.text = candidate.data.reduce(into: "") {$0 += $1.word} candidate.value = self.value candidate.lastMid = candidate.data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid - candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count } + candidate.composingCount = .surfaceCount(candidate.rubyCount) return candidate } } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Prediction.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Prediction.swift index 31d3e9b..1cde0ac 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Prediction.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Prediction.swift @@ -22,9 +22,14 @@ extension Kana2Kanji { /// - note: /// この関数の役割は意味連接の考慮にある。 func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] { - debug("getPredictionCandidates", composingText, lastClause.inputRange, lastClause.text) - let lastRuby = ComposingText.getConvertTarget(for: composingText.input[lastClause.inputRange]).toKatakana() - let lastRubyCount = lastClause.inputRange.count + debug("getPredictionCandidates", composingText, lastClause.range, lastClause.text) + let lastRuby = switch lastClause.range { + case let .input(left, right): + ComposingText.getConvertTarget(for: composingText.input[left.. {get} + var range: Lattice.LatticeRange {get} } struct RegisteredNode: RegisteredNodeProtocol { @@ -25,19 +25,19 @@ struct RegisteredNode: RegisteredNodeProtocol { /// 始点からこのノードまでのコスト let totalValue: PValue /// `composingText`の`input`で対応する範囲 - let inputRange: Range + let range: Lattice.LatticeRange - init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, inputRange: Range) { + init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, range: Lattice.LatticeRange) { self.data = data self.prev = registered self.totalValue = totalValue - self.inputRange = inputRange + self.range = range } /// 始点ノードを生成する関数 /// - Returns: 始点ノードのデータ static func BOSNode() -> RegisteredNode { - RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, inputRange: 0 ..< 0) + RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, range: .zero) } /// 入力中、確定した部分を考慮した始点ノードを生成する関数 @@ -47,7 +47,7 @@ struct RegisteredNode: RegisteredNodeProtocol { data: DicdataElement(word: "", ruby: "", lcid: CIDData.BOS.cid, rcid: candidate.data.last?.rcid ?? CIDData.BOS.cid, mid: candidate.lastMid, value: 0), registered: nil, totalValue: 0, - inputRange: 0 ..< 0 + range: .zero ) } } @@ -59,7 +59,7 @@ extension RegisteredNodeProtocol { guard let prev else { let unit = ClauseDataUnit() unit.mid = self.data.mid - unit.inputRange = self.inputRange + unit.range = self.range return CandidateData(clauses: [(clause: unit, value: .zero)], data: []) } var lastcandidate = prev.getCandidateData() // 自分に至るregisterdそれぞれのデータに処理 @@ -75,7 +75,11 @@ extension RegisteredNodeProtocol { if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) { // 文節ではないので、最後に追加する。 lastClause.text.append(self.data.word) - lastClause.inputRange = lastClause.inputRange.startIndex ..< self.inputRange.endIndex + if let newRange = lastClause.range.merged(with: self.range) { + lastClause.range = newRange + } else { + fatalError("このケースは想定していません。") + } // 最初だった場合を想定している if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) { lastClause.mid = self.data.mid @@ -88,7 +92,7 @@ extension RegisteredNodeProtocol { else { let unit = ClauseDataUnit() unit.text = self.data.word - unit.inputRange = self.inputRange + unit.range = self.range if DicdataStore.includeMMValueCalculation(self.data) { unit.mid = self.data.mid } diff --git a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift index f253be8..5cd2a19 100644 --- a/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift +++ b/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Zenzai/zenzai.swift @@ -65,7 +65,7 @@ extension Kana2Kanji { var constraint = zenzaiCache?.getNewConstraint(for: inputData) ?? PrefixConstraint([]) debug("initial constraint", constraint) let eosNode = LatticeNode.EOSNode - var lattice: Lattice = Lattice(nodes: []) + var lattice: Lattice = Lattice() var constructedCandidates: [(RegisteredNode, Candidate)] = [] var insertedCandidates: [(RegisteredNode, Candidate)] = [] defer { diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/Candidate.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/Candidate.swift index e62bd7f..8489880 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/Candidate.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/Candidate.swift @@ -17,28 +17,32 @@ final class ClauseDataUnit { /// The text of the unit. var text: String = "" /// The range of the unit in input text. - var inputRange: Range = 0 ..< 0 + var range: Lattice.LatticeRange = .zero /// Merge the given unit to this unit. /// - Parameter: /// - unit: The unit to merge. func merge(with unit: ClauseDataUnit) { self.text.append(unit.text) - self.inputRange = self.inputRange.startIndex ..< unit.inputRange.endIndex + if let newRange = self.range.merged(with: unit.range) { + self.range = newRange + } else { + fatalError("このケースは想定していません。") + } self.nextLcid = unit.nextLcid } } extension ClauseDataUnit: Equatable { static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool { - lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.inputRange == rhs.inputRange + lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.range == rhs.range } } #if DEBUG extension ClauseDataUnit: CustomDebugStringConvertible { var debugDescription: String { - "ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), inputRange: \(inputRange))" + "ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), range: \(range))" } } #endif @@ -67,14 +71,24 @@ public enum CompleteAction: Equatable, Sendable { case moveCursor(Int) } +public enum ComposingCount: Equatable, Sendable { + /// composingText.inputにおいて対応する文字数。 + case inputCount(Int) + /// composingText.convertTargeにおいて対応する文字数。 + case surfaceCount(Int) + + /// 複数のカウントの連結 + indirect case composite(Self, Self) +} + /// 変換候補のデータ public struct Candidate: Sendable { /// 入力となるテキスト public var text: String /// 評価値 public var value: PValue - /// composingText.inputにおいて対応する文字数。 - public var correspondingCount: Int + + public var composingCount: ComposingCount /// 最後のmid(予測変換に利用) public var lastMid: Int /// DicdataElement列 @@ -86,14 +100,18 @@ public struct Candidate: Sendable { /// - note: 文字数表示のために追加したフラグ public let inputable: Bool - public init(text: String, value: PValue, correspondingCount: Int, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) { + /// ルビ文字数 + public let rubyCount: Int + + public init(text: String, value: PValue, composingCount: ComposingCount, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) { self.text = text self.value = value - self.correspondingCount = correspondingCount + self.composingCount = composingCount self.lastMid = lastMid self.data = data self.actions = actions self.inputable = inputable + self.rubyCount = self.data.reduce(into: 0) { $0 += $1.ruby.count } } /// 後から`action`を追加した形を生成する関数 /// - parameters: @@ -138,7 +156,7 @@ public struct Candidate: Sendable { /// 入力を文としたとき、prefixになる文節に対応するCandidateを作る public static func makePrefixClauseCandidate(data: some Collection) -> Candidate { var text = "" - var correspondingCount = 0 + var composingCount = 0 var lastRcid = CIDData.BOS.cid var lastMid = 501 var candidateData: [DicdataElement] = [] @@ -148,7 +166,7 @@ public struct Candidate: Sendable { break } text.append(item.word) - correspondingCount += item.ruby.count + composingCount += item.ruby.count lastRcid = item.rcid // 最初だった場合を想定している if item.mid != 500 && DicdataStore.includeMMValueCalculation(item) { @@ -159,7 +177,7 @@ public struct Candidate: Sendable { return Candidate( text: text, value: -5, - correspondingCount: correspondingCount, + composingCount: .surfaceCount(composingCount), lastMid: lastMid, data: candidateData ) diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift index 17e7dda..ff0a2b2 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/KanaKanjiConverter.swift @@ -168,7 +168,7 @@ import EfficientNGram var textIndex = [String: Int]() for candidate in candidates where !candidate.text.isEmpty && !seenCandidates.contains(candidate.text) { if let index = textIndex[candidate.text] { - if result[index].value < candidate.value || result[index].correspondingCount < candidate.correspondingCount { + if result[index].value < candidate.value || result[index].rubyCount < candidate.rubyCount { result[index] = candidate } } else { @@ -219,7 +219,7 @@ import EfficientNGram let candidate: Candidate = Candidate( text: ruby, value: penalty, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: data ) @@ -232,7 +232,7 @@ import EfficientNGram let candidate: Candidate = Candidate( text: word, value: value, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: data ) @@ -251,7 +251,7 @@ import EfficientNGram let candidate: Candidate = Candidate( text: ruby, value: penalty, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: data ) @@ -264,7 +264,7 @@ import EfficientNGram let candidate: Candidate = Candidate( text: word, value: value, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: data ) @@ -368,7 +368,7 @@ import EfficientNGram private func getAdditionalCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] { var candidates: [Candidate] = [] let string = inputData.convertTarget.toKatakana() - let correspondingCount = inputData.input.count + let composingCount: ComposingCount = .inputCount(inputData.input.count) do { // カタカナ let value = -14 * getKatakanaScore(string) @@ -376,7 +376,7 @@ import EfficientNGram let katakana = Candidate( text: string, value: value, - correspondingCount: correspondingCount, + composingCount: composingCount, lastMid: MIDData.一般.mid, data: [data] ) @@ -390,7 +390,7 @@ import EfficientNGram let hiragana = Candidate( text: hiraganaString, value: -14.5, - correspondingCount: correspondingCount, + composingCount: composingCount, lastMid: MIDData.一般.mid, data: [data] ) @@ -403,7 +403,7 @@ import EfficientNGram let uppercasedLetter = Candidate( text: word, value: -14.6, - correspondingCount: correspondingCount, + composingCount: composingCount, lastMid: MIDData.一般.mid, data: [data] ) @@ -416,7 +416,7 @@ import EfficientNGram let fullWidthLetter = Candidate( text: word, value: -14.7, - correspondingCount: correspondingCount, + composingCount: composingCount, lastMid: MIDData.一般.mid, data: [data] ) @@ -429,7 +429,7 @@ import EfficientNGram let halfWidthKatakana = Candidate( text: word, value: -15, - correspondingCount: correspondingCount, + composingCount: composingCount, lastMid: MIDData.一般.mid, data: [data] ) @@ -472,7 +472,7 @@ import EfficientNGram return Candidate( text: first.clause.text, value: first.value, - correspondingCount: first.clause.inputRange.count, + composingCount: first.clause.range.count, lastMid: first.clause.mid, data: Array(candidateData.data[0...count]) ) @@ -529,10 +529,10 @@ import EfficientNGram var seenCandidate: Set = full_candidate.mapSet {$0.text} // 文節のみ変換するパターン(上位5件) let clause_candidates = self.getUniqueCandidate(clauseCandidates, seenCandidates: seenCandidate).min(count: 5) { - if $0.correspondingCount == $1.correspondingCount { + if $0.rubyCount == $1.rubyCount { $0.value > $1.value } else { - $0.correspondingCount > $1.correspondingCount + $0.rubyCount > $1.rubyCount } } seenCandidate.formUnion(clause_candidates.map {$0.text}) @@ -543,7 +543,7 @@ import EfficientNGram Candidate( text: $0.data.word, value: $0.data.value(), - correspondingCount: $0.inputRange.count, + composingCount: $0.range.count, lastMid: $0.data.mid, data: [$0.data] ) @@ -554,8 +554,8 @@ import EfficientNGram // 文字列の長さごとに並べ、かつその中で評価の高いものから順に並べる。 var word_candidates: [Candidate] = self.getUniqueCandidate(dicCandidates.chained(additionalCandidates), seenCandidates: seenCandidate) .sorted { - let count0 = $0.correspondingCount - let count1 = $1.correspondingCount + let count0 = $0.rubyCount + let count1 = $1.rubyCount return count0 == count1 ? $0.value > $1.value : count0 > count1 } seenCandidate.formUnion(word_candidates.map {$0.text}) @@ -590,10 +590,10 @@ import EfficientNGram } // 文節のみ変換するパターン(上位5件) let firstClauseResults = self.getUniqueCandidate(clauseCandidates).min(count: 5) { - if $0.correspondingCount == $1.correspondingCount { + if $0.rubyCount == $1.rubyCount { $0.value > $1.value } else { - $0.correspondingCount > $1.correspondingCount + $0.rubyCount > $1.rubyCount } } return ConversionResult(mainResults: result, firstClauseResults: firstClauseResults) @@ -662,7 +662,7 @@ import EfficientNGram let diff = inputData.differenceSuffix(to: previousInputData) debug("\(#function): 最後尾文字置換用の関数を呼びます、差分は\(diff)") - let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: (diff.deleted, diff.addedCount), previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection) + let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: diff, previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection) self.previousInputData = inputData return result } diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CalendarCandidate.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CalendarCandidate.swift index dbe3af4..8a983a1 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CalendarCandidate.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CalendarCandidate.swift @@ -21,7 +21,7 @@ extension KanaKanjiConverter { return result.map {[Candidate( text: $0, value: -15, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: $0, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)] )]} ?? [] @@ -116,7 +116,7 @@ extension KanaKanjiConverter { Candidate( text: $0, value: -18, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.年.mid, data: [DicdataElement(word: $0, ruby: string, cid: CIDData.一般名詞.cid, mid: MIDData.年.mid, value: -18)] ) @@ -125,7 +125,7 @@ extension KanaKanjiConverter { Candidate( text: $0, value: -19, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.年.mid, data: [DicdataElement(word: $0, ruby: string, cid: CIDData.一般名詞.cid, mid: MIDData.年.mid, value: -19)] ) diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CommaSeparatedNumber.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CommaSeparatedNumber.swift index 23c910c..52d1f58 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CommaSeparatedNumber.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/CommaSeparatedNumber.swift @@ -38,7 +38,7 @@ extension KanaKanjiConverter { let candidate = Candidate( text: result, value: -10, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: result, ruby: ruby, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)] ) diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/EmailAddress.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/EmailAddress.swift index a883ac9..84124a2 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/EmailAddress.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/EmailAddress.swift @@ -46,7 +46,7 @@ extension KanaKanjiConverter { Candidate( text: address, value: baseValue - PValue(i), - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: address, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: baseValue - PValue(i))] ) diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/RomanTypographys.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/RomanTypographys.swift index 8733c37..1e8b331 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/RomanTypographys.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/RomanTypographys.swift @@ -37,7 +37,7 @@ extension KanaKanjiConverter { Candidate( text: $0, value: -15, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: $0, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)] ) diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/TimeExpression.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/TimeExpression.swift index 630f7cf..ca21766 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/TimeExpression.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/TimeExpression.swift @@ -17,7 +17,7 @@ extension KanaKanjiConverter { let candidate = Candidate( text: timeExpression, value: -10, - correspondingCount: numberString.count, + composingCount: .surfaceCount(numberString.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)] ) @@ -31,7 +31,7 @@ extension KanaKanjiConverter { let candidate = Candidate( text: timeExpression, value: -10, - correspondingCount: numberString.count, + composingCount: .surfaceCount(numberString.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)] ) diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/Unicode.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/Unicode.swift index 79e1475..e1461cb 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/Unicode.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/Unicode.swift @@ -22,7 +22,7 @@ extension KanaKanjiConverter { Candidate( text: char, value: value0, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: char, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: value0)] ) diff --git a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/VersionCandidate.swift b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/VersionCandidate.swift index 6a55962..c069144 100644 --- a/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/VersionCandidate.swift +++ b/Sources/KanaKanjiConverterModule/ConverterAPI/SpecialConversion/VersionCandidate.swift @@ -20,7 +20,7 @@ extension KanaKanjiConverter { return [Candidate( text: versionString, value: -30, - correspondingCount: inputData.input.count, + composingCount: .inputCount(inputData.input.count), lastMid: MIDData.一般.mid, data: [DicdataElement(word: versionString, ruby: inputData.convertTarget.toKatakana(), cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -30)] )] diff --git a/Sources/KanaKanjiConverterModule/DictionaryManagement/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DictionaryManagement/DicdataStore.swift index 1a88a1a..a13cd8a 100644 --- a/Sources/KanaKanjiConverterModule/DictionaryManagement/DicdataStore.swift +++ b/Sources/KanaKanjiConverterModule/DictionaryManagement/DicdataStore.swift @@ -427,7 +427,7 @@ public final class DicdataStore { guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else { return nil } - let node = LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1) + let node = LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1)) node.prevs.append(RegisteredNode.BOSNode()) return node } @@ -437,7 +437,7 @@ public final class DicdataStore { guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else { return nil } - return LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1) + return LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1)) } return result } diff --git a/Sources/KanaKanjiConverterModule/InputManagement/ComposingText.swift b/Sources/KanaKanjiConverterModule/InputManagement/ComposingText.swift index c51bbe3..e185097 100644 --- a/Sources/KanaKanjiConverterModule/InputManagement/ComposingText.swift +++ b/Sources/KanaKanjiConverterModule/InputManagement/ComposingText.swift @@ -341,18 +341,37 @@ public struct ComposingText: Sendable { /// 文頭の方を確定させる関数 /// - parameters: /// - correspondingCount: `input`において対応する文字数 - public mutating func prefixComplete(correspondingCount: Int) { - let correspondingCount = min(correspondingCount, self.input.count) - self.input.removeFirst(correspondingCount) - // convetTargetを更新する - let newConvertTarget = Self.getConvertTarget(for: self.input) - // カーソルの位置は、消す文字数の分削除する - let cursorDelta = self.convertTarget.count - newConvertTarget.count - self.convertTarget = newConvertTarget - self.convertTargetCursorPosition -= cursorDelta - // もしも左端にカーソルが位置していたら、文頭に移動させる - if self.convertTargetCursorPosition == 0 { - self.convertTargetCursorPosition = self.convertTarget.count + public mutating func prefixComplete(composingCount: ComposingCount) { + switch composingCount { + case .inputCount(let correspondingCount): + let correspondingCount = min(correspondingCount, self.input.count) + self.input.removeFirst(correspondingCount) + // convetTargetを更新する + let newConvertTarget = Self.getConvertTarget(for: self.input) + // カーソルの位置は、消す文字数の分削除する + let cursorDelta = self.convertTarget.count - newConvertTarget.count + self.convertTarget = newConvertTarget + self.convertTargetCursorPosition -= cursorDelta + // もしも左端にカーソルが位置していたら、文頭に移動させる + if self.convertTargetCursorPosition == 0 { + self.convertTargetCursorPosition = self.convertTarget.count + } + case .surfaceCount(let correspondingCount): + // 先頭correspondingCountを削除する操作に相当する + // カーソルを移動する + let prefix = self.convertTarget.prefix(correspondingCount) + let index = self.forceGetInputCursorPosition(target: prefix) + self.input = Array(self.input[index...]) + self.convertTarget = String(self.convertTarget.dropFirst(correspondingCount)) + self.convertTargetCursorPosition -= correspondingCount + // もしも左端にカーソルが位置していたら、文頭に移動させる + if self.convertTargetCursorPosition == 0 { + self.convertTargetCursorPosition = self.convertTarget.count + } + + case .composite(let left, let right): + self.prefixComplete(composingCount: left) + self.prefixComplete(composingCount: right) } } @@ -580,17 +599,20 @@ extension ComposingText.ConvertTargetElement: Equatable {} extension ComposingText { /// 2つの`ComposingText`のデータを比較し、差分を計算する。 /// `convertTarget`との整合性をとるため、`convertTarget`に合わせた上で比較する - func differenceSuffix(to previousData: ComposingText) -> (deleted: Int, addedCount: Int) { + func differenceSuffix(to previousData: ComposingText) -> (deletedInput: Int, addedInput: Int, deletedSurface: Int, addedSurface: Int) { // k→か、sh→しゃ、のような場合、差分は全てx ... lastの範囲に現れるので、差分計算が問題なく動作する // かn → かんs、のような場合、「かんs、んs、s」のようなものは現れるが、「かん」が生成できない // 本質的にこれはポリシーの問題であり、「は|しゃ」の変換で「はし」が部分変換として現れないことと同根の問題である。 // 解決のためには、inputの段階で「ん」をdirectで扱うべきである。 - // 差分を計算する let common = self.input.commonPrefix(with: previousData.input) let deleted = previousData.input.count - common.count let added = self.input.dropFirst(common.count).count - return (deleted, added) + + let commonSurface = self.convertTarget.commonPrefix(with: previousData.convertTarget) + let deletedSurface = previousData.convertTarget.count - commonSurface.count + let addedSurface = self.convertTarget.suffix(from: commonSurface.startIndex).count + return (deleted, added, deletedSurface, addedSurface) } func inputHasSuffix(inputOf suffix: ComposingText) -> Bool { diff --git a/Tests/KanaKanjiConverterModuleTests/ClauseDataUnitTests.swift b/Tests/KanaKanjiConverterModuleTests/ClauseDataUnitTests.swift index 7123f4f..50c5ca2 100644 --- a/Tests/KanaKanjiConverterModuleTests/ClauseDataUnitTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/ClauseDataUnitTests.swift @@ -14,19 +14,19 @@ final class ClauseDataUnitTests: XCTestCase { do { let unit1 = ClauseDataUnit() unit1.text = "僕が" - unit1.inputRange = 0 ..< 3 + unit1.range = .input(from: 0, to: 3) unit1.mid = 0 unit1.nextLcid = 0 let unit2 = ClauseDataUnit() unit2.text = "走る" - unit2.inputRange = 3 ..< 6 + unit2.range = .input(from: 3, to: 6) unit2.mid = 1 unit2.nextLcid = 1 unit1.merge(with: unit2) XCTAssertEqual(unit1.text, "僕が走る") - XCTAssertEqual(unit1.inputRange, 0 ..< 6) + XCTAssertEqual(unit1.range, .input(from: 0, to: 6)) XCTAssertEqual(unit1.nextLcid, 1) XCTAssertEqual(unit1.mid, 0) } @@ -34,19 +34,19 @@ final class ClauseDataUnitTests: XCTestCase { do { let unit1 = ClauseDataUnit() unit1.text = "君は" - unit1.inputRange = 0 ..< 3 + unit1.range = .input(from: 0, to: 3) unit1.mid = 0 unit1.nextLcid = 0 let unit2 = ClauseDataUnit() unit2.text = "笑った" - unit2.inputRange = 3 ..< 7 + unit2.range = .input(from: 3, to: 7) unit2.mid = 3 unit2.nextLcid = 3 unit1.merge(with: unit2) XCTAssertEqual(unit1.text, "君は笑った") - XCTAssertEqual(unit1.inputRange, 0 ..< 7) + XCTAssertEqual(unit1.range, .input(from: 0, to: 7)) XCTAssertEqual(unit1.nextLcid, 3) XCTAssertEqual(unit1.mid, 0) } diff --git a/Tests/KanaKanjiConverterModuleTests/ComposingTextTests.swift b/Tests/KanaKanjiConverterModuleTests/ComposingTextTests.swift index 440e6be..da96691 100644 --- a/Tests/KanaKanjiConverterModuleTests/ComposingTextTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/ComposingTextTests.swift @@ -202,8 +202,8 @@ final class ComposingTextTests: XCTestCase { var c2 = ComposingText() c2.insertAtCursorPosition("hasiru", inputStyle: .roman2kana) - XCTAssertEqual(c2.differenceSuffix(to: c1).deleted, 0) - XCTAssertEqual(c2.differenceSuffix(to: c1).addedCount, 1) + XCTAssertEqual(c2.differenceSuffix(to: c1).deletedInput, 0) + XCTAssertEqual(c2.differenceSuffix(to: c1).addedInput, 1) } do { var c1 = ComposingText() @@ -212,8 +212,8 @@ final class ComposingTextTests: XCTestCase { var c2 = ComposingText() c2.insertAtCursorPosition("tukatte", inputStyle: .roman2kana) - XCTAssertEqual(c2.differenceSuffix(to: c1).deleted, 0) - XCTAssertEqual(c2.differenceSuffix(to: c1).addedCount, 1) + XCTAssertEqual(c2.differenceSuffix(to: c1).deletedInput, 0) + XCTAssertEqual(c2.differenceSuffix(to: c1).addedInput, 1) } } } diff --git a/Tests/KanaKanjiConverterModuleTests/ConverterTests/CandidateTests.swift b/Tests/KanaKanjiConverterModuleTests/ConverterTests/CandidateTests.swift index 33e8255..5b31cdb 100644 --- a/Tests/KanaKanjiConverterModuleTests/ConverterTests/CandidateTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/ConverterTests/CandidateTests.swift @@ -16,7 +16,7 @@ final class CandidateTests: XCTestCase { let candidate = Candidate( text: text, value: -40, - correspondingCount: 4, + composingCount: .inputCount(4), lastMid: 5, data: [DicdataElement(word: text, ruby: "サイコロ", cid: 0, mid: 5, value: -40)] ) @@ -27,7 +27,7 @@ final class CandidateTests: XCTestCase { print(candidate2.text) XCTAssertTrue(Set((1...3).map(String.init)).contains(candidate2.text)) XCTAssertEqual(candidate.value, candidate2.value) - XCTAssertEqual(candidate.correspondingCount, candidate2.correspondingCount) + XCTAssertEqual(candidate.composingCount, candidate2.composingCount) XCTAssertEqual(candidate.lastMid, candidate2.lastMid) XCTAssertEqual(candidate.data, candidate2.data) XCTAssertEqual(candidate.actions, candidate2.actions) @@ -38,7 +38,7 @@ final class CandidateTests: XCTestCase { let candidate = Candidate( text: text, value: 0, - correspondingCount: 0, + composingCount: .inputCount(0), lastMid: 0, data: [DicdataElement(word: text, ruby: "", cid: 0, mid: 0, value: 0)] ) diff --git a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift index 573b474..9ba57d4 100644 --- a/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/LearningMemoryTests.swift @@ -88,7 +88,7 @@ final class LearningMemoryTests: XCTestCase { Candidate( text: element.word, value: element.value(), - correspondingCount: 3, + composingCount: .inputCount(3), lastMid: element.mid, data: [element] ) @@ -128,7 +128,7 @@ final class LearningMemoryTests: XCTestCase { Candidate( text: element.word, value: element.value(), - correspondingCount: 3, + composingCount: .inputCount(3), lastMid: element.mid, data: [element] ) diff --git a/Tests/KanaKanjiConverterModuleTests/RegisteredNodeTests.swift b/Tests/KanaKanjiConverterModuleTests/RegisteredNodeTests.swift index 54aabda..d498b41 100644 --- a/Tests/KanaKanjiConverterModuleTests/RegisteredNodeTests.swift +++ b/Tests/KanaKanjiConverterModuleTests/RegisteredNodeTests.swift @@ -12,16 +12,16 @@ import XCTest final class RegisteredNodeTests: XCTestCase { func testBOSNode() throws { let bos = RegisteredNode.BOSNode() - XCTAssertEqual(bos.inputRange, 0..<0) + XCTAssertEqual(bos.range, Lattice.LatticeRange.zero) XCTAssertNil(bos.prev) XCTAssertEqual(bos.totalValue, 0) XCTAssertEqual(bos.data.rcid, CIDData.BOS.cid) } func testFromLastCandidate() throws { - let candidate = Candidate(text: "我輩は猫", value: -20, correspondingCount: 7, lastMid: 100, data: [DicdataElement(word: "我輩は猫", ruby: "ワガハイハネコ", cid: CIDData.一般名詞.cid, mid: 100, value: -20)]) + let candidate = Candidate(text: "我輩は猫", value: -20, composingCount: .inputCount(7), lastMid: 100, data: [DicdataElement(word: "我輩は猫", ruby: "ワガハイハネコ", cid: CIDData.一般名詞.cid, mid: 100, value: -20)]) let bos = RegisteredNode.fromLastCandidate(candidate) - XCTAssertEqual(bos.inputRange, 0..<0) + XCTAssertEqual(bos.range, Lattice.LatticeRange.zero) XCTAssertNil(bos.prev) XCTAssertEqual(bos.totalValue, 0) XCTAssertEqual(bos.data.rcid, CIDData.一般名詞.cid) @@ -34,37 +34,37 @@ final class RegisteredNodeTests: XCTestCase { data: DicdataElement(word: "我輩", ruby: "ワガハイ", cid: CIDData.一般名詞.cid, mid: 1, value: -5), registered: bos, totalValue: -10, - inputRange: 0..<4 + range: .input(from: 0, to: 4) ) let node2 = RegisteredNode( data: DicdataElement(word: "は", ruby: "ハ", cid: CIDData.係助詞ハ.cid, mid: 2, value: -2), registered: node1, totalValue: -13, - inputRange: 4..<5 + range: .input(from: 4, to: 5) ) let node3 = RegisteredNode( data: DicdataElement(word: "猫", ruby: "ネコ", cid: CIDData.一般名詞.cid, mid: 3, value: -4), registered: node2, totalValue: -20, - inputRange: 5..<7 + range: .input(from: 5, to: 7) ) let node4 = RegisteredNode( data: DicdataElement(word: "です", ruby: "デス", cid: CIDData.助動詞デス基本形.cid, mid: 4, value: -3), registered: node3, totalValue: -25, - inputRange: 7..<9 + range: .input(from: 7, to: 9) ) let result = node4.getCandidateData() let clause1 = ClauseDataUnit() clause1.text = "我輩は" clause1.nextLcid = CIDData.一般名詞.cid - clause1.inputRange = 0..<5 + clause1.range = .input(from: 0, to: 5) clause1.mid = 1 let clause2 = ClauseDataUnit() clause2.text = "猫です" clause2.nextLcid = CIDData.EOS.cid - clause2.inputRange = 5..<9 + clause2.range = .input(from: 5, to: 9) clause2.mid = 3 let expectedResult: CandidateData = CandidateData( diff --git a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift index 556e7c8..70e12b7 100644 --- a/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift +++ b/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/DicdataStoreTests.swift @@ -290,7 +290,7 @@ final class DicdataStoreTests: XCTestCase { sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana) let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false) XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"})) - XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.inputRange, 0 ..< 11) + XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.range, .input(from: 0, to: 11)) } // 動的ユーザ辞書の単語が通常の辞書よりも優先されることのテスト