feat: Latticeの操作をconvertTargetベースのindexとinputベースのindexの二重化

This commit is contained in:
Miwa / Ensan
2025-07-10 01:12:11 +09:00
parent dca5119e59
commit 704fa9871f
31 changed files with 401 additions and 188 deletions

View File

@ -220,7 +220,7 @@ extension Subcommands {
print("Submit \(candidate.text)") print("Submit \(candidate.text)")
converter.setCompletedData(candidate) converter.setCompletedData(candidate)
converter.updateLearningData(candidate) converter.updateLearningData(candidate)
composingText.prefixComplete(correspondingCount: candidate.correspondingCount) composingText.prefixComplete(composingCount: candidate.composingCount)
if composingText.isEmpty { if composingText.isEmpty {
composingText.stopComposition() composingText.stopComposition()
converter.stopComposition() converter.stopComposition()

View File

@ -28,11 +28,16 @@ extension Kana2Kanji {
/// (4) /// (4)
func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) {
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)") debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)")
let count: Int = inputData.input.count let inputCount: Int = inputData.input.count
let surfaceCount = inputData.convertTarget.count
let result: LatticeNode = LatticeNode.EOSNode let result: LatticeNode = LatticeNode.EOSNode
let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)}) let lattice: Lattice = Lattice(
inputCount: inputCount,
surfaceCount: surfaceCount,
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)}
)
// inodes // inodes
for (i, nodeArray) in lattice.enumerated() { for (i, nodeArray) in lattice.indexedNodes() {
// node // node
for node in nodeArray { for node in nodeArray {
if node.prevs.isEmpty { if node.prevs.isEmpty {
@ -43,7 +48,7 @@ extension Kana2Kanji {
} }
// //
let wValue: PValue = node.data.value() let wValue: PValue = node.data.value()
if i == 0 { if i.isZero {
// values // values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else { } else {
@ -51,12 +56,12 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue} node.values = node.prevs.map {$0.totalValue + wValue}
} }
// //
let nextIndex: Int = node.inputRange.endIndex let nextIndex = node.range.endIndex
// count // count
if nextIndex == count { if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
self.updateResultNode(with: node, resultNode: result) self.updateResultNode(with: node, resultNode: result)
} else { } else {
self.updateNextNodes(with: node, nextNodes: lattice[inputIndex: nextIndex], nBest: N_best) self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)
} }
} }
} }

View File

@ -20,11 +20,16 @@ extension Kana2Kanji {
/// (4) /// (4)
func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) { func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) {
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)") debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)")
let count: Int = inputData.input.count let inputCount: Int = inputData.input.count
let surfaceCount: Int = inputData.convertTarget.count
let result: LatticeNode = LatticeNode.EOSNode let result: LatticeNode = LatticeNode.EOSNode
let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}) let lattice: Lattice = Lattice(
inputCount: inputCount,
surfaceCount: surfaceCount,
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}
)
// inodes // inodes
for (i, nodeArray) in lattice.enumerated() { for (i, nodeArray) in lattice.indexedNodes() {
// node // node
for node in nodeArray { for node in nodeArray {
if node.prevs.isEmpty { if node.prevs.isEmpty {
@ -32,7 +37,7 @@ extension Kana2Kanji {
} }
// //
let wValue: PValue = node.data.value() let wValue: PValue = node.data.value()
if i == 0 { if i.isZero {
// values // values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else { } else {
@ -40,9 +45,9 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue} node.values = node.prevs.map {$0.totalValue + wValue}
} }
// //
let nextIndex: Int = node.inputRange.endIndex let nextIndex = node.range.endIndex
// count // count
if nextIndex == count { if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
for index in node.prevs.indices { for index in node.prevs.indices {
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index]) let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
// //
@ -61,7 +66,7 @@ extension Kana2Kanji {
Array(($0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word).utf8) Array(($0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word).utf8)
} }
// nodenextnode // nodenextnode
for nextnode in lattice[inputIndex: nextIndex] { for nextnode in lattice[index: nextIndex] {
// //
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid) let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
// nodeprevnode // nodeprevnode

View File

@ -14,7 +14,7 @@ extension Kana2Kanji {
return Candidate( return Candidate(
text: left.text + right.text, text: left.text + right.text,
value: left.value + right.value, value: left.value + right.value,
correspondingCount: left.correspondingCount + right.correspondingCount, composingCount: .composite(left.composingCount, right.composingCount),
lastMid: right.lastMid, lastMid: right.lastMid,
data: left.data + right.data data: left.data + right.data
) )
@ -26,7 +26,7 @@ extension Kana2Kanji {
return Candidate( return Candidate(
text: left.text + right.text, text: left.text + right.text,
value: newValue, value: newValue,
correspondingCount: left.correspondingCount + right.correspondingCount, composingCount: .composite(left.composingCount, right.composingCount),
lastMid: right.lastMid, lastMid: right.lastMid,
data: left.data + right.data data: left.data + right.data
) )
@ -57,7 +57,7 @@ extension Kana2Kanji {
prefixCandidate.data = prefixCandidateData prefixCandidate.data = prefixCandidateData
prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word } prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word }
prefixCandidate.correspondingCount = prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count } prefixCandidate.composingCount = .surfaceCount(prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count })
} }
totalWord.insert(contentsOf: element.word, at: totalWord.startIndex) totalWord.insert(contentsOf: element.word, at: totalWord.startIndex)

View File

@ -17,29 +17,30 @@ extension Kana2Kanji {
/// (2) /// (2)
func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) {
debug("確定直後の変換、前は:", previousResult.inputData, "後は:", inputData) debug("確定直後の変換、前は:", previousResult.inputData, "後は:", inputData)
let count = inputData.input.count let inputCount = inputData.input.count
let surfaceCount = inputData.convertTarget.count
// TODO: input/convertTargetsuffix
let convertedInputCount = previousResult.inputData.input.count - inputCount
let convertedSurfaceCount = previousResult.inputData.convertTarget.count - surfaceCount
// (1) // (1)
let start = RegisteredNode.fromLastCandidate(completedData) let start = RegisteredNode.fromLastCandidate(completedData)
let lattice = previousResult.lattice.suffix(count) let lattice = previousResult.lattice.suffix(inputCount: inputCount, surfaceCount: surfaceCount)
for (i, nodeArray) in lattice.enumerated() { for (i, nodeArray) in lattice.indexedNodes() {
if i == .zero { let prevs: [RegisteredNode] = if i.isZero {
for node in nodeArray { [start]
node.prevs = [start]
// inputRange
node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount
}
} else { } else {
for node in nodeArray { []
node.prevs = [] }
// inputRange for node in nodeArray {
node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount node.prevs = prevs
} // inputRange
node.range = node.range.offseted(inputOffset: -convertedInputCount, surfaceOffset: -convertedSurfaceCount)
} }
} }
// (2) // (2)
let result = LatticeNode.EOSNode let result = LatticeNode.EOSNode
for (i, nodeArray) in lattice.enumerated() { for (i, nodeArray) in lattice.indexedNodes() {
for node in nodeArray { for node in nodeArray {
if node.prevs.isEmpty { if node.prevs.isEmpty {
continue continue
@ -49,7 +50,7 @@ extension Kana2Kanji {
} }
// //
let wValue = node.data.value() let wValue = node.data.value()
if i == 0 { if i.isZero {
// values // values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)} node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else { } else {
@ -57,11 +58,11 @@ extension Kana2Kanji {
node.values = node.prevs.map {$0.totalValue + wValue} node.values = node.prevs.map {$0.totalValue + wValue}
} }
// //
let nextIndex = node.inputRange.endIndex let nextIndex = node.range.endIndex
if nextIndex != count { if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
self.updateNextNodes(with: node, nextNodes: lattice[inputIndex: nextIndex], nBest: N_best)
} else {
self.updateResultNode(with: node, resultNode: result) self.updateResultNode(with: node, resultNode: result)
} else {
self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)
} }
} }

View File

@ -6,6 +6,7 @@
// Copyright © 2020 ensan. All rights reserved. // Copyright © 2020 ensan. All rights reserved.
// //
import Algorithms
import Foundation import Foundation
import SwiftUtils import SwiftUtils
@ -24,27 +25,43 @@ extension Kana2Kanji {
/// ///
/// (5) /// (5)
func kana2lattice_changed(_ inputData: ComposingText, N_best: Int, counts: (deleted: Int, added: Int), previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) { func kana2lattice_changed(
_ inputData: ComposingText,
N_best: Int,
counts: (deletedInput: Int, addedInput: Int, deletedSurface: Int, addedSurface: Int),
previousResult: (inputData: ComposingText, lattice: Lattice),
needTypoCorrection: Bool
) -> (result: LatticeNode, lattice: Lattice) {
// (0) // (0)
let count = inputData.input.count let inputCount = inputData.input.count
let commonCount = previousResult.inputData.input.count - counts.deleted let surfaceCount = inputData.convertTarget.count
debug("kana2lattice_changed", inputData, counts, previousResult.inputData, count, commonCount) let commonInputCount = previousResult.inputData.input.count - counts.deletedInput
let commonSurfaceCount = previousResult.inputData.convertTarget.count - counts.deletedSurface
debug("kana2lattice_changed", inputData, counts, previousResult.inputData, inputCount, commonInputCount)
// (1) // (1)
var lattice = previousResult.lattice.prefix(commonCount) var lattice = previousResult.lattice.prefix(inputCount: commonInputCount, surfaceCount: commonSurfaceCount)
let terminalNodes: Lattice let terminalNodes: Lattice
if counts.added == 0 { if counts.addedInput == 0 {
terminalNodes = Lattice(nodes: lattice.map { terminalNodes = Lattice(
$0.filter { inputCount: inputCount,
$0.inputRange.endIndex == count surfaceCount: surfaceCount,
rawNodes: lattice.map {
$0.filter {
$0.range.endIndex == .input(inputCount) || $0.range.endIndex == .surface(inputCount)
}
} }
}) )
} else { } else {
// (2) // (2)
let addedNodes: Lattice = Lattice(nodes: (0..<count).map {(i: Int) in let addedNodes: Lattice = Lattice(
self.dicdataStore.getLOUDSDataInRange(inputData: inputData, from: i, toIndexRange: max(commonCount, i) ..< count, needTypoCorrection: needTypoCorrection) inputCount: inputCount,
}) surfaceCount: surfaceCount,
rawNodes: (0..<inputCount).map {(i: Int) in
self.dicdataStore.getLOUDSDataInRange(inputData: inputData, from: i, toIndexRange: max(commonInputCount, i) ..< inputCount, needTypoCorrection: needTypoCorrection)
}
)
// (3) // (3)
for nodeArray in lattice { for nodeArray in lattice {
@ -56,8 +73,8 @@ extension Kana2Kanji {
continue continue
} }
// //
let nextIndex = node.inputRange.endIndex let nextIndex = node.range.endIndex
self.updateNextNodes(with: node, nextNodes: addedNodes[inputIndex: nextIndex], nBest: N_best) self.updateNextNodes(with: node, nextNodes: addedNodes[index: nextIndex], nBest: N_best)
} }
} }
lattice.merge(addedNodes) lattice.merge(addedNodes)
@ -86,11 +103,11 @@ extension Kana2Kanji {
// values // values
node.values = node.prevs.map {$0.totalValue + wValue} node.values = node.prevs.map {$0.totalValue + wValue}
} }
let nextIndex = node.inputRange.endIndex let nextIndex = node.range.endIndex
if count == nextIndex { if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
self.updateResultNode(with: node, resultNode: result) self.updateResultNode(with: node, resultNode: result)
} else { } else {
self.updateNextNodes(with: node, nextNodes: terminalNodes[inputIndex: nextIndex], nBest: N_best) self.updateNextNodes(with: node, nextNodes: terminalNodes[index: nextIndex], nBest: N_best)
} }
} }
} }

View File

@ -6,6 +6,7 @@
// Copyright © 2022 ensan. All rights reserved. // Copyright © 2022 ensan. All rights reserved.
// //
import Algorithms
import Foundation import Foundation
import SwiftUtils import SwiftUtils
@ -26,12 +27,13 @@ extension Kana2Kanji {
func kana2lattice_no_change(N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice)) -> (result: LatticeNode, lattice: Lattice) { func kana2lattice_no_change(N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice)) -> (result: LatticeNode, lattice: Lattice) {
debug("キャッシュから復元、元の文字は:", previousResult.inputData.convertTarget) debug("キャッシュから復元、元の文字は:", previousResult.inputData.convertTarget)
let count = previousResult.inputData.input.count let inputCount = previousResult.inputData.input.count
let surfaceCount = previousResult.inputData.convertTarget.count
// (1) // (1)
let result = LatticeNode.EOSNode let result = LatticeNode.EOSNode
for nodeArray in previousResult.lattice { for nodeArray in previousResult.lattice {
for node in nodeArray where node.inputRange.endIndex == count { for node in nodeArray where node.range.endIndex == .input(inputCount) || node.range.endIndex == .surface(surfaceCount) {
if node.prevs.isEmpty { if node.prevs.isEmpty {
continue continue
} }

View File

@ -34,11 +34,14 @@ struct Kana2Kanji {
let text = data.clauses.map {$0.clause.text}.joined() let text = data.clauses.map {$0.clause.text}.joined()
let value = data.clauses.last!.value + mmValue.value let value = data.clauses.last!.value + mmValue.value
let lastMid = data.clauses.last!.clause.mid let lastMid = data.clauses.last!.clause.mid
let correspondingCount = data.clauses.reduce(into: 0) {$0 += $1.clause.inputRange.count}
let composingCount: ComposingCount = data.clauses.reduce(into: .inputCount(0)) {
$0 = .composite($0, $1.clause.range.count)
}
return Candidate( return Candidate(
text: text, text: text,
value: value, value: value,
correspondingCount: correspondingCount, composingCount: composingCount,
lastMid: lastMid, lastMid: lastMid,
data: data.data data: data.data
) )

View File

@ -1,49 +1,180 @@
import Algorithms
import SwiftUtils
struct Lattice: Sequence { struct Lattice: Sequence {
typealias Element = [LatticeNode] typealias Element = [LatticeNode]
typealias Iterator = IndexingIterator<[[LatticeNode]]>
init(nodes: [[LatticeNode]] = []) { init() {
self.nodes = nodes self.inputIndexedNodes = []
self.surfaceIndexedNodes = []
} }
private var nodes: [[LatticeNode]] init(inputCount: Int, surfaceCount: Int, rawNodes: [[LatticeNode]]) {
self.inputIndexedNodes = .init(repeating: [], count: inputCount)
self.surfaceIndexedNodes = .init(repeating: [], count: surfaceCount)
func prefix(_ k: Int) -> Lattice { for nodes in rawNodes {
var lattice = Lattice(nodes: self.nodes.prefix(k).map {(nodes: [LatticeNode]) in guard let first = nodes.first else { continue }
nodes.filter {$0.inputRange.endIndex <= k} switch first.range.startIndex {
}) case .surface(let i):
while lattice.nodes.last?.isEmpty ?? false { self.surfaceIndexedNodes[i] = nodes
lattice.nodes.removeLast() case .input(let i):
self.inputIndexedNodes[i] = nodes
}
} }
return lattice
} }
func suffix(_ count: Int) -> Lattice { private init(inputIndexedNodes: [[LatticeNode]], surfaceIndexedNodes: [[LatticeNode]]) {
Lattice(nodes: self.nodes.suffix(count)) self.inputIndexedNodes = inputIndexedNodes
self.surfaceIndexedNodes = surfaceIndexedNodes
}
private var inputIndexedNodes: [[LatticeNode]]
private var surfaceIndexedNodes: [[LatticeNode]]
func prefix(inputCount: Int, surfaceCount: Int) -> Lattice {
let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in
switch node.range.endIndex {
case .input(let value):
value <= inputCount
case .surface(let value):
value <= surfaceCount
}
}
let newInputIndexedNodes = Array(self.inputIndexedNodes.prefix(inputCount).map {(nodes: [LatticeNode]) in
nodes.filter(filterClosure)
}.drop(while: \.isEmpty))
let newSurfaceIndexedNodes = Array(self.surfaceIndexedNodes.prefix(surfaceCount).map {(nodes: [LatticeNode]) in
nodes.filter(filterClosure)
}.drop(while: \.isEmpty))
return Lattice(inputIndexedNodes: newInputIndexedNodes, surfaceIndexedNodes: newSurfaceIndexedNodes)
}
func suffix(inputCount: Int, surfaceCount: Int) -> Lattice {
Lattice(
inputIndexedNodes: self.inputIndexedNodes.suffix(inputCount),
surfaceIndexedNodes: self.surfaceIndexedNodes.suffix(surfaceCount)
)
} }
mutating func merge(_ lattice: Lattice) { mutating func merge(_ lattice: Lattice) {
for (index, nodeArray) in lattice.nodes.enumerated() where index < self.nodes.endIndex { for (index, nodeArray) in lattice.inputIndexedNodes.enumerated() where index < self.inputIndexedNodes.endIndex {
self.nodes[index].append(contentsOf: nodeArray) self.inputIndexedNodes[index].append(contentsOf: nodeArray)
} }
if self.nodes.endIndex < lattice.nodes.endIndex { if self.inputIndexedNodes.endIndex < lattice.inputIndexedNodes.endIndex {
for nodeArray in lattice.nodes[self.nodes.endIndex...] { for nodeArray in lattice.inputIndexedNodes[self.inputIndexedNodes.endIndex...] {
self.nodes.append(nodeArray) self.inputIndexedNodes.append(nodeArray)
}
}
for (index, nodeArray) in lattice.surfaceIndexedNodes.enumerated() where index < self.surfaceIndexedNodes.endIndex {
self.surfaceIndexedNodes[index].append(contentsOf: nodeArray)
}
if self.surfaceIndexedNodes.endIndex < lattice.surfaceIndexedNodes.endIndex {
for nodeArray in lattice.surfaceIndexedNodes[self.surfaceIndexedNodes.endIndex...] {
self.surfaceIndexedNodes.append(nodeArray)
} }
} }
} }
subscript(inputIndex i: Int) -> [LatticeNode] { subscript(inputIndex i: Int) -> [LatticeNode] {
get { get {
self.nodes[i] self.inputIndexedNodes[i]
} }
} }
func makeIterator() -> IndexingIterator<[[LatticeNode]]> { subscript(index index: LatticeIndex) -> [LatticeNode] {
self.nodes.makeIterator() get {
switch index {
case .input(let i): self.inputIndexedNodes[i]
case .surface(let i): self.surfaceIndexedNodes[i]
}
}
}
func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> {
self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) }
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })
}
func makeIterator() -> Chain2Sequence<[[LatticeNode]], [[LatticeNode]]>.Iterator {
self.inputIndexedNodes.chained(self.surfaceIndexedNodes).makeIterator()
} }
var isEmpty: Bool { var isEmpty: Bool {
self.nodes.isEmpty self.inputIndexedNodes.isEmpty && self.surfaceIndexedNodes.isEmpty
}
enum LatticeIndex: Sendable, Equatable {
case surface(Int)
case input(Int)
var isZero: Bool {
self == .surface(0) || self == .input(0)
}
}
enum LatticeRange: Sendable, Equatable {
static var zero: Self {
.input(from: 0, to: 0)
}
case surface(from: Int, to: Int)
case input(from: Int, to: Int)
var count: ComposingCount {
switch self {
case .surface(let from, let to):
.surfaceCount(to - from)
case .input(let from, let to):
.inputCount(to - from)
}
}
var startIndex: LatticeIndex {
switch self {
case .surface(let from, _):
.surface(from)
case .input(let from, _):
.input(from)
}
}
var endIndex: LatticeIndex {
switch self {
case .surface(_, let to):
.surface(to)
case .input(_, let to):
.input(to)
}
}
func merged(with other: Self) -> Self? {
switch (self, other) {
case (let .surface(l, ml), let .surface(mr, r)):
if ml == mr {
.surface(from: l, to: r)
} else {
nil
}
case (let .input(l, ml), let .input(mr, r)):
if ml == mr {
.input(from: l, to: r)
} else {
nil
}
case (.surface, .input), (.input, .surface):
nil
}
}
func offseted(inputOffset: Int, surfaceOffset: Int) -> Self {
switch self {
case .surface(from: let from, to: let to):
.surface(from: from + surfaceOffset, to: to + surfaceOffset)
case .input(from: let from, to: let to):
.input(from: from + inputOffset, to: to + inputOffset)
}
}
} }
} }

View File

@ -17,23 +17,23 @@ public final class LatticeNode {
/// `prevs` /// `prevs`
var values: [PValue] = [] var values: [PValue] = []
/// inputData.inputrange /// inputData.inputrange
var inputRange: Range<Int> var range: Lattice.LatticeRange
/// `EOS` /// `EOS`
static var EOSNode: LatticeNode { static var EOSNode: LatticeNode {
LatticeNode(data: DicdataElement.EOSData, inputRange: 0..<0) LatticeNode(data: DicdataElement.EOSData, range: .zero)
} }
init(data: DicdataElement, inputRange: Range<Int>) { init(data: DicdataElement, range: Lattice.LatticeRange) {
self.data = data self.data = data
self.values = [data.value()] self.values = [data.value()]
self.inputRange = inputRange self.range = range
} }
/// `LatticeNode``RegisteredNode` /// `LatticeNode``RegisteredNode`
/// `LatticeNode``RegisteredNode`1 /// `LatticeNode``RegisteredNode`1
func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode { func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode {
RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, inputRange: self.inputRange) RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, range: self.range)
} }
/// `CandidateData` /// `CandidateData`

View File

@ -36,7 +36,7 @@ public struct PostCompositionPredictionCandidate {
candidate.data.append(data) candidate.data.append(data)
} }
candidate.value = self.value candidate.value = self.value
candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count } candidate.composingCount = .surfaceCount(candidate.rubyCount)
candidate.lastMid = data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? candidate.lastMid candidate.lastMid = data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? candidate.lastMid
return candidate return candidate
case .replacement(let targetData, let replacementData): case .replacement(let targetData, let replacementData):
@ -45,7 +45,7 @@ public struct PostCompositionPredictionCandidate {
candidate.text = candidate.data.reduce(into: "") {$0 += $1.word} candidate.text = candidate.data.reduce(into: "") {$0 += $1.word}
candidate.value = self.value candidate.value = self.value
candidate.lastMid = candidate.data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid candidate.lastMid = candidate.data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count } candidate.composingCount = .surfaceCount(candidate.rubyCount)
return candidate return candidate
} }
} }

View File

@ -22,9 +22,14 @@ extension Kana2Kanji {
/// - note: /// - note:
/// ///
func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] { func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] {
debug("getPredictionCandidates", composingText, lastClause.inputRange, lastClause.text) debug("getPredictionCandidates", composingText, lastClause.range, lastClause.text)
let lastRuby = ComposingText.getConvertTarget(for: composingText.input[lastClause.inputRange]).toKatakana() let lastRuby = switch lastClause.range {
let lastRubyCount = lastClause.inputRange.count case let .input(left, right):
ComposingText.getConvertTarget(for: composingText.input[left..<right]).toKatakana()
case let .surface(left, right):
String(composingText.convertTarget.dropFirst(left).prefix(right - left))
}
let lastRubyCount = lastRuby.count
let datas: [DicdataElement] let datas: [DicdataElement]
do { do {
var _str = "" var _str = ""
@ -42,11 +47,11 @@ extension Kana2Kanji {
let osuserdict: [DicdataElement] = dicdataStore.getPrefixMatchDynamicUserDict(lastRuby) let osuserdict: [DicdataElement] = dicdataStore.getPrefixMatchDynamicUserDict(lastRuby)
let lastCandidate: Candidate = prepart.isEmpty ? Candidate(text: "", value: .zero, correspondingCount: 0, lastMid: MIDData.EOS.mid, data: []) : self.processClauseCandidate(prepart) let lastCandidate: Candidate = prepart.isEmpty ? Candidate(text: "", value: .zero, composingCount: .inputCount(0), lastMid: MIDData.EOS.mid, data: []) : self.processClauseCandidate(prepart)
let lastRcid: Int = lastCandidate.data.last?.rcid ?? CIDData.EOS.cid let lastRcid: Int = lastCandidate.data.last?.rcid ?? CIDData.EOS.cid
let nextLcid: Int = prepart.lastClause?.nextLcid ?? CIDData.EOS.cid let nextLcid: Int = prepart.lastClause?.nextLcid ?? CIDData.EOS.cid
let lastMid: Int = lastCandidate.lastMid let lastMid: Int = lastCandidate.lastMid
let correspoindingCount: Int = lastCandidate.correspondingCount + lastRubyCount let composingCount: ComposingCount = .composite(lastCandidate.composingCount, .surfaceCount(lastRubyCount))
let ignoreCCValue: PValue = self.dicdataStore.getCCValue(lastRcid, nextLcid) let ignoreCCValue: PValue = self.dicdataStore.getCCValue(lastRcid, nextLcid)
let inputStyle = composingText.input.last?.inputStyle ?? .direct let inputStyle = composingText.input.last?.inputStyle ?? .direct
@ -91,7 +96,7 @@ extension Kana2Kanji {
let candidate: Candidate = Candidate( let candidate: Candidate = Candidate(
text: lastCandidate.text + data.word, text: lastCandidate.text + data.word,
value: newValue, value: newValue,
correspondingCount: correspoindingCount, composingCount: composingCount,
lastMid: includeMMValueCalculation ? data.mid:lastMid, lastMid: includeMMValueCalculation ? data.mid:lastMid,
data: nodedata data: nodedata
) )

View File

@ -14,7 +14,7 @@ protocol RegisteredNodeProtocol {
var data: DicdataElement {get} var data: DicdataElement {get}
var prev: (any RegisteredNodeProtocol)? {get} var prev: (any RegisteredNodeProtocol)? {get}
var totalValue: PValue {get} var totalValue: PValue {get}
var inputRange: Range<Int> {get} var range: Lattice.LatticeRange {get}
} }
struct RegisteredNode: RegisteredNodeProtocol { struct RegisteredNode: RegisteredNodeProtocol {
@ -25,19 +25,19 @@ struct RegisteredNode: RegisteredNodeProtocol {
/// ///
let totalValue: PValue let totalValue: PValue
/// `composingText``input` /// `composingText``input`
let inputRange: Range<Int> let range: Lattice.LatticeRange
init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, inputRange: Range<Int>) { init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, range: Lattice.LatticeRange) {
self.data = data self.data = data
self.prev = registered self.prev = registered
self.totalValue = totalValue self.totalValue = totalValue
self.inputRange = inputRange self.range = range
} }
/// ///
/// - Returns: /// - Returns:
static func BOSNode() -> RegisteredNode { static func BOSNode() -> RegisteredNode {
RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, inputRange: 0 ..< 0) RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, range: .zero)
} }
/// ///
@ -47,7 +47,7 @@ struct RegisteredNode: RegisteredNodeProtocol {
data: DicdataElement(word: "", ruby: "", lcid: CIDData.BOS.cid, rcid: candidate.data.last?.rcid ?? CIDData.BOS.cid, mid: candidate.lastMid, value: 0), data: DicdataElement(word: "", ruby: "", lcid: CIDData.BOS.cid, rcid: candidate.data.last?.rcid ?? CIDData.BOS.cid, mid: candidate.lastMid, value: 0),
registered: nil, registered: nil,
totalValue: 0, totalValue: 0,
inputRange: 0 ..< 0 range: .zero
) )
} }
} }
@ -59,7 +59,7 @@ extension RegisteredNodeProtocol {
guard let prev else { guard let prev else {
let unit = ClauseDataUnit() let unit = ClauseDataUnit()
unit.mid = self.data.mid unit.mid = self.data.mid
unit.inputRange = self.inputRange unit.range = self.range
return CandidateData(clauses: [(clause: unit, value: .zero)], data: []) return CandidateData(clauses: [(clause: unit, value: .zero)], data: [])
} }
var lastcandidate = prev.getCandidateData() // registerd var lastcandidate = prev.getCandidateData() // registerd
@ -75,7 +75,11 @@ extension RegisteredNodeProtocol {
if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) { if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) {
// //
lastClause.text.append(self.data.word) lastClause.text.append(self.data.word)
lastClause.inputRange = lastClause.inputRange.startIndex ..< self.inputRange.endIndex if let newRange = lastClause.range.merged(with: self.range) {
lastClause.range = newRange
} else {
fatalError("このケースは想定していません。")
}
// //
if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) { if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) {
lastClause.mid = self.data.mid lastClause.mid = self.data.mid
@ -88,7 +92,7 @@ extension RegisteredNodeProtocol {
else { else {
let unit = ClauseDataUnit() let unit = ClauseDataUnit()
unit.text = self.data.word unit.text = self.data.word
unit.inputRange = self.inputRange unit.range = self.range
if DicdataStore.includeMMValueCalculation(self.data) { if DicdataStore.includeMMValueCalculation(self.data) {
unit.mid = self.data.mid unit.mid = self.data.mid
} }

View File

@ -65,7 +65,7 @@ extension Kana2Kanji {
var constraint = zenzaiCache?.getNewConstraint(for: inputData) ?? PrefixConstraint([]) var constraint = zenzaiCache?.getNewConstraint(for: inputData) ?? PrefixConstraint([])
debug("initial constraint", constraint) debug("initial constraint", constraint)
let eosNode = LatticeNode.EOSNode let eosNode = LatticeNode.EOSNode
var lattice: Lattice = Lattice(nodes: []) var lattice: Lattice = Lattice()
var constructedCandidates: [(RegisteredNode, Candidate)] = [] var constructedCandidates: [(RegisteredNode, Candidate)] = []
var insertedCandidates: [(RegisteredNode, Candidate)] = [] var insertedCandidates: [(RegisteredNode, Candidate)] = []
defer { defer {

View File

@ -17,28 +17,32 @@ final class ClauseDataUnit {
/// The text of the unit. /// The text of the unit.
var text: String = "" var text: String = ""
/// The range of the unit in input text. /// The range of the unit in input text.
var inputRange: Range<Int> = 0 ..< 0 var range: Lattice.LatticeRange = .zero
/// Merge the given unit to this unit. /// Merge the given unit to this unit.
/// - Parameter: /// - Parameter:
/// - unit: The unit to merge. /// - unit: The unit to merge.
func merge(with unit: ClauseDataUnit) { func merge(with unit: ClauseDataUnit) {
self.text.append(unit.text) self.text.append(unit.text)
self.inputRange = self.inputRange.startIndex ..< unit.inputRange.endIndex if let newRange = self.range.merged(with: unit.range) {
self.range = newRange
} else {
fatalError("このケースは想定していません。")
}
self.nextLcid = unit.nextLcid self.nextLcid = unit.nextLcid
} }
} }
extension ClauseDataUnit: Equatable { extension ClauseDataUnit: Equatable {
static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool { static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool {
lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.inputRange == rhs.inputRange lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.range == rhs.range
} }
} }
#if DEBUG #if DEBUG
extension ClauseDataUnit: CustomDebugStringConvertible { extension ClauseDataUnit: CustomDebugStringConvertible {
var debugDescription: String { var debugDescription: String {
"ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), inputRange: \(inputRange))" "ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), range: \(range))"
} }
} }
#endif #endif
@ -67,14 +71,24 @@ public enum CompleteAction: Equatable, Sendable {
case moveCursor(Int) case moveCursor(Int)
} }
public enum ComposingCount: Equatable, Sendable {
/// composingText.input
case inputCount(Int)
/// composingText.convertTarge
case surfaceCount(Int)
///
indirect case composite(Self, Self)
}
/// ///
public struct Candidate: Sendable { public struct Candidate: Sendable {
/// ///
public var text: String public var text: String
/// ///
public var value: PValue public var value: PValue
/// composingText.input
public var correspondingCount: Int public var composingCount: ComposingCount
/// mid() /// mid()
public var lastMid: Int public var lastMid: Int
/// DicdataElement /// DicdataElement
@ -86,14 +100,18 @@ public struct Candidate: Sendable {
/// - note: /// - note:
public let inputable: Bool public let inputable: Bool
public init(text: String, value: PValue, correspondingCount: Int, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) { ///
public let rubyCount: Int
public init(text: String, value: PValue, composingCount: ComposingCount, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) {
self.text = text self.text = text
self.value = value self.value = value
self.correspondingCount = correspondingCount self.composingCount = composingCount
self.lastMid = lastMid self.lastMid = lastMid
self.data = data self.data = data
self.actions = actions self.actions = actions
self.inputable = inputable self.inputable = inputable
self.rubyCount = self.data.reduce(into: 0) { $0 += $1.ruby.count }
} }
/// `action` /// `action`
/// - parameters: /// - parameters:
@ -138,7 +156,7 @@ public struct Candidate: Sendable {
/// prefixCandidate /// prefixCandidate
public static func makePrefixClauseCandidate(data: some Collection<DicdataElement>) -> Candidate { public static func makePrefixClauseCandidate(data: some Collection<DicdataElement>) -> Candidate {
var text = "" var text = ""
var correspondingCount = 0 var composingCount = 0
var lastRcid = CIDData.BOS.cid var lastRcid = CIDData.BOS.cid
var lastMid = 501 var lastMid = 501
var candidateData: [DicdataElement] = [] var candidateData: [DicdataElement] = []
@ -148,7 +166,7 @@ public struct Candidate: Sendable {
break break
} }
text.append(item.word) text.append(item.word)
correspondingCount += item.ruby.count composingCount += item.ruby.count
lastRcid = item.rcid lastRcid = item.rcid
// //
if item.mid != 500 && DicdataStore.includeMMValueCalculation(item) { if item.mid != 500 && DicdataStore.includeMMValueCalculation(item) {
@ -159,7 +177,7 @@ public struct Candidate: Sendable {
return Candidate( return Candidate(
text: text, text: text,
value: -5, value: -5,
correspondingCount: correspondingCount, composingCount: .surfaceCount(composingCount),
lastMid: lastMid, lastMid: lastMid,
data: candidateData data: candidateData
) )

View File

@ -168,7 +168,7 @@ import EfficientNGram
var textIndex = [String: Int]() var textIndex = [String: Int]()
for candidate in candidates where !candidate.text.isEmpty && !seenCandidates.contains(candidate.text) { for candidate in candidates where !candidate.text.isEmpty && !seenCandidates.contains(candidate.text) {
if let index = textIndex[candidate.text] { if let index = textIndex[candidate.text] {
if result[index].value < candidate.value || result[index].correspondingCount < candidate.correspondingCount { if result[index].value < candidate.value || result[index].rubyCount < candidate.rubyCount {
result[index] = candidate result[index] = candidate
} }
} else { } else {
@ -219,7 +219,7 @@ import EfficientNGram
let candidate: Candidate = Candidate( let candidate: Candidate = Candidate(
text: ruby, text: ruby,
value: penalty, value: penalty,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: data data: data
) )
@ -232,7 +232,7 @@ import EfficientNGram
let candidate: Candidate = Candidate( let candidate: Candidate = Candidate(
text: word, text: word,
value: value, value: value,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: data data: data
) )
@ -251,7 +251,7 @@ import EfficientNGram
let candidate: Candidate = Candidate( let candidate: Candidate = Candidate(
text: ruby, text: ruby,
value: penalty, value: penalty,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: data data: data
) )
@ -264,7 +264,7 @@ import EfficientNGram
let candidate: Candidate = Candidate( let candidate: Candidate = Candidate(
text: word, text: word,
value: value, value: value,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: data data: data
) )
@ -368,7 +368,7 @@ import EfficientNGram
private func getAdditionalCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] { private func getAdditionalCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
var candidates: [Candidate] = [] var candidates: [Candidate] = []
let string = inputData.convertTarget.toKatakana() let string = inputData.convertTarget.toKatakana()
let correspondingCount = inputData.input.count let composingCount: ComposingCount = .inputCount(inputData.input.count)
do { do {
// //
let value = -14 * getKatakanaScore(string) let value = -14 * getKatakanaScore(string)
@ -376,7 +376,7 @@ import EfficientNGram
let katakana = Candidate( let katakana = Candidate(
text: string, text: string,
value: value, value: value,
correspondingCount: correspondingCount, composingCount: composingCount,
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [data] data: [data]
) )
@ -390,7 +390,7 @@ import EfficientNGram
let hiragana = Candidate( let hiragana = Candidate(
text: hiraganaString, text: hiraganaString,
value: -14.5, value: -14.5,
correspondingCount: correspondingCount, composingCount: composingCount,
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [data] data: [data]
) )
@ -403,7 +403,7 @@ import EfficientNGram
let uppercasedLetter = Candidate( let uppercasedLetter = Candidate(
text: word, text: word,
value: -14.6, value: -14.6,
correspondingCount: correspondingCount, composingCount: composingCount,
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [data] data: [data]
) )
@ -416,7 +416,7 @@ import EfficientNGram
let fullWidthLetter = Candidate( let fullWidthLetter = Candidate(
text: word, text: word,
value: -14.7, value: -14.7,
correspondingCount: correspondingCount, composingCount: composingCount,
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [data] data: [data]
) )
@ -429,7 +429,7 @@ import EfficientNGram
let halfWidthKatakana = Candidate( let halfWidthKatakana = Candidate(
text: word, text: word,
value: -15, value: -15,
correspondingCount: correspondingCount, composingCount: composingCount,
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [data] data: [data]
) )
@ -472,7 +472,7 @@ import EfficientNGram
return Candidate( return Candidate(
text: first.clause.text, text: first.clause.text,
value: first.value, value: first.value,
correspondingCount: first.clause.inputRange.count, composingCount: first.clause.range.count,
lastMid: first.clause.mid, lastMid: first.clause.mid,
data: Array(candidateData.data[0...count]) data: Array(candidateData.data[0...count])
) )
@ -529,10 +529,10 @@ import EfficientNGram
var seenCandidate: Set<String> = full_candidate.mapSet {$0.text} var seenCandidate: Set<String> = full_candidate.mapSet {$0.text}
// 5 // 5
let clause_candidates = self.getUniqueCandidate(clauseCandidates, seenCandidates: seenCandidate).min(count: 5) { let clause_candidates = self.getUniqueCandidate(clauseCandidates, seenCandidates: seenCandidate).min(count: 5) {
if $0.correspondingCount == $1.correspondingCount { if $0.rubyCount == $1.rubyCount {
$0.value > $1.value $0.value > $1.value
} else { } else {
$0.correspondingCount > $1.correspondingCount $0.rubyCount > $1.rubyCount
} }
} }
seenCandidate.formUnion(clause_candidates.map {$0.text}) seenCandidate.formUnion(clause_candidates.map {$0.text})
@ -543,7 +543,7 @@ import EfficientNGram
Candidate( Candidate(
text: $0.data.word, text: $0.data.word,
value: $0.data.value(), value: $0.data.value(),
correspondingCount: $0.inputRange.count, composingCount: $0.range.count,
lastMid: $0.data.mid, lastMid: $0.data.mid,
data: [$0.data] data: [$0.data]
) )
@ -554,8 +554,8 @@ import EfficientNGram
// //
var word_candidates: [Candidate] = self.getUniqueCandidate(dicCandidates.chained(additionalCandidates), seenCandidates: seenCandidate) var word_candidates: [Candidate] = self.getUniqueCandidate(dicCandidates.chained(additionalCandidates), seenCandidates: seenCandidate)
.sorted { .sorted {
let count0 = $0.correspondingCount let count0 = $0.rubyCount
let count1 = $1.correspondingCount let count1 = $1.rubyCount
return count0 == count1 ? $0.value > $1.value : count0 > count1 return count0 == count1 ? $0.value > $1.value : count0 > count1
} }
seenCandidate.formUnion(word_candidates.map {$0.text}) seenCandidate.formUnion(word_candidates.map {$0.text})
@ -590,10 +590,10 @@ import EfficientNGram
} }
// 5 // 5
let firstClauseResults = self.getUniqueCandidate(clauseCandidates).min(count: 5) { let firstClauseResults = self.getUniqueCandidate(clauseCandidates).min(count: 5) {
if $0.correspondingCount == $1.correspondingCount { if $0.rubyCount == $1.rubyCount {
$0.value > $1.value $0.value > $1.value
} else { } else {
$0.correspondingCount > $1.correspondingCount $0.rubyCount > $1.rubyCount
} }
} }
return ConversionResult(mainResults: result, firstClauseResults: firstClauseResults) return ConversionResult(mainResults: result, firstClauseResults: firstClauseResults)
@ -662,7 +662,7 @@ import EfficientNGram
let diff = inputData.differenceSuffix(to: previousInputData) let diff = inputData.differenceSuffix(to: previousInputData)
debug("\(#function): 最後尾文字置換用の関数を呼びます、差分は\(diff)") debug("\(#function): 最後尾文字置換用の関数を呼びます、差分は\(diff)")
let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: (diff.deleted, diff.addedCount), previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection) let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: diff, previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection)
self.previousInputData = inputData self.previousInputData = inputData
return result return result
} }

View File

@ -21,7 +21,7 @@ extension KanaKanjiConverter {
return result.map {[Candidate( return result.map {[Candidate(
text: $0, text: $0,
value: -15, value: -15,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -15)] data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -15)]
)]} ?? [] )]} ?? []
@ -116,7 +116,7 @@ extension KanaKanjiConverter {
Candidate( Candidate(
text: $0, text: $0,
value: -18, value: -18,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -18)] data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -18)]
) )
@ -125,7 +125,7 @@ extension KanaKanjiConverter {
Candidate( Candidate(
text: $0, text: $0,
value: -19, value: -19,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -19)] data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -19)]
) )

View File

@ -38,7 +38,7 @@ extension KanaKanjiConverter {
let candidate = Candidate( let candidate = Candidate(
text: result, text: result,
value: -10, value: -10,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: result, ruby: ruby, cid: CIDData..cid, mid: MIDData..mid, value: -10)] data: [DicdataElement(word: result, ruby: ruby, cid: CIDData..cid, mid: MIDData..mid, value: -10)]
) )

View File

@ -46,7 +46,7 @@ extension KanaKanjiConverter {
Candidate( Candidate(
text: address, text: address,
value: baseValue - PValue(i), value: baseValue - PValue(i),
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: address, ruby: string, cid: .zero, mid: MIDData..mid, value: baseValue - PValue(i))] data: [DicdataElement(word: address, ruby: string, cid: .zero, mid: MIDData..mid, value: baseValue - PValue(i))]
) )

View File

@ -37,7 +37,7 @@ extension KanaKanjiConverter {
Candidate( Candidate(
text: $0, text: $0,
value: -15, value: -15,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -15)] data: [DicdataElement(word: $0, ruby: string, cid: CIDData..cid, mid: MIDData..mid, value: -15)]
) )

View File

@ -17,7 +17,7 @@ extension KanaKanjiConverter {
let candidate = Candidate( let candidate = Candidate(
text: timeExpression, text: timeExpression,
value: -10, value: -10,
correspondingCount: numberString.count, composingCount: .surfaceCount(numberString.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData..cid, mid: MIDData..mid, value: -10)] data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData..cid, mid: MIDData..mid, value: -10)]
) )
@ -31,7 +31,7 @@ extension KanaKanjiConverter {
let candidate = Candidate( let candidate = Candidate(
text: timeExpression, text: timeExpression,
value: -10, value: -10,
correspondingCount: numberString.count, composingCount: .surfaceCount(numberString.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData..cid, mid: MIDData..mid, value: -10)] data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData..cid, mid: MIDData..mid, value: -10)]
) )

View File

@ -22,7 +22,7 @@ extension KanaKanjiConverter {
Candidate( Candidate(
text: char, text: char,
value: value0, value: value0,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: char, ruby: string, cid: .zero, mid: MIDData..mid, value: value0)] data: [DicdataElement(word: char, ruby: string, cid: .zero, mid: MIDData..mid, value: value0)]
) )

View File

@ -20,7 +20,7 @@ extension KanaKanjiConverter {
return [Candidate( return [Candidate(
text: versionString, text: versionString,
value: -30, value: -30,
correspondingCount: inputData.input.count, composingCount: .inputCount(inputData.input.count),
lastMid: MIDData..mid, lastMid: MIDData..mid,
data: [DicdataElement(word: versionString, ruby: inputData.convertTarget.toKatakana(), cid: CIDData..cid, mid: MIDData..mid, value: -30)] data: [DicdataElement(word: versionString, ruby: inputData.convertTarget.toKatakana(), cid: CIDData..cid, mid: MIDData..mid, value: -30)]
)] )]

View File

@ -427,7 +427,7 @@ public final class DicdataStore {
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else { guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
return nil return nil
} }
let node = LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1) let node = LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
node.prevs.append(RegisteredNode.BOSNode()) node.prevs.append(RegisteredNode.BOSNode())
return node return node
} }
@ -437,7 +437,7 @@ public final class DicdataStore {
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else { guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
return nil return nil
} }
return LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1) return LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
} }
return result return result
} }

View File

@ -341,18 +341,37 @@ public struct ComposingText: Sendable {
/// ///
/// - parameters: /// - parameters:
/// - correspondingCount: `input` /// - correspondingCount: `input`
public mutating func prefixComplete(correspondingCount: Int) { public mutating func prefixComplete(composingCount: ComposingCount) {
let correspondingCount = min(correspondingCount, self.input.count) switch composingCount {
self.input.removeFirst(correspondingCount) case .inputCount(let correspondingCount):
// convetTarget let correspondingCount = min(correspondingCount, self.input.count)
let newConvertTarget = Self.getConvertTarget(for: self.input) self.input.removeFirst(correspondingCount)
// // convetTarget
let cursorDelta = self.convertTarget.count - newConvertTarget.count let newConvertTarget = Self.getConvertTarget(for: self.input)
self.convertTarget = newConvertTarget //
self.convertTargetCursorPosition -= cursorDelta let cursorDelta = self.convertTarget.count - newConvertTarget.count
// self.convertTarget = newConvertTarget
if self.convertTargetCursorPosition == 0 { self.convertTargetCursorPosition -= cursorDelta
self.convertTargetCursorPosition = self.convertTarget.count //
if self.convertTargetCursorPosition == 0 {
self.convertTargetCursorPosition = self.convertTarget.count
}
case .surfaceCount(let correspondingCount):
// correspondingCount
//
let prefix = self.convertTarget.prefix(correspondingCount)
let index = self.forceGetInputCursorPosition(target: prefix)
self.input = Array(self.input[index...])
self.convertTarget = String(self.convertTarget.dropFirst(correspondingCount))
self.convertTargetCursorPosition -= correspondingCount
//
if self.convertTargetCursorPosition == 0 {
self.convertTargetCursorPosition = self.convertTarget.count
}
case .composite(let left, let right):
self.prefixComplete(composingCount: left)
self.prefixComplete(composingCount: right)
} }
} }
@ -580,17 +599,20 @@ extension ComposingText.ConvertTargetElement: Equatable {}
extension ComposingText { extension ComposingText {
/// 2`ComposingText` /// 2`ComposingText`
/// `convertTarget``convertTarget` /// `convertTarget``convertTarget`
func differenceSuffix(to previousData: ComposingText) -> (deleted: Int, addedCount: Int) { func differenceSuffix(to previousData: ComposingText) -> (deletedInput: Int, addedInput: Int, deletedSurface: Int, addedSurface: Int) {
// kshx ... last // kshx ... last
// n ssss // n ssss
// | // |
// inputdirect // inputdirect
// //
let common = self.input.commonPrefix(with: previousData.input) let common = self.input.commonPrefix(with: previousData.input)
let deleted = previousData.input.count - common.count let deleted = previousData.input.count - common.count
let added = self.input.dropFirst(common.count).count let added = self.input.dropFirst(common.count).count
return (deleted, added)
let commonSurface = self.convertTarget.commonPrefix(with: previousData.convertTarget)
let deletedSurface = previousData.convertTarget.count - commonSurface.count
let addedSurface = self.convertTarget.suffix(from: commonSurface.startIndex).count
return (deleted, added, deletedSurface, addedSurface)
} }
func inputHasSuffix(inputOf suffix: ComposingText) -> Bool { func inputHasSuffix(inputOf suffix: ComposingText) -> Bool {

View File

@ -14,19 +14,19 @@ final class ClauseDataUnitTests: XCTestCase {
do { do {
let unit1 = ClauseDataUnit() let unit1 = ClauseDataUnit()
unit1.text = "僕が" unit1.text = "僕が"
unit1.inputRange = 0 ..< 3 unit1.range = .input(from: 0, to: 3)
unit1.mid = 0 unit1.mid = 0
unit1.nextLcid = 0 unit1.nextLcid = 0
let unit2 = ClauseDataUnit() let unit2 = ClauseDataUnit()
unit2.text = "走る" unit2.text = "走る"
unit2.inputRange = 3 ..< 6 unit2.range = .input(from: 3, to: 6)
unit2.mid = 1 unit2.mid = 1
unit2.nextLcid = 1 unit2.nextLcid = 1
unit1.merge(with: unit2) unit1.merge(with: unit2)
XCTAssertEqual(unit1.text, "僕が走る") XCTAssertEqual(unit1.text, "僕が走る")
XCTAssertEqual(unit1.inputRange, 0 ..< 6) XCTAssertEqual(unit1.range, .input(from: 0, to: 6))
XCTAssertEqual(unit1.nextLcid, 1) XCTAssertEqual(unit1.nextLcid, 1)
XCTAssertEqual(unit1.mid, 0) XCTAssertEqual(unit1.mid, 0)
} }
@ -34,19 +34,19 @@ final class ClauseDataUnitTests: XCTestCase {
do { do {
let unit1 = ClauseDataUnit() let unit1 = ClauseDataUnit()
unit1.text = "君は" unit1.text = "君は"
unit1.inputRange = 0 ..< 3 unit1.range = .input(from: 0, to: 3)
unit1.mid = 0 unit1.mid = 0
unit1.nextLcid = 0 unit1.nextLcid = 0
let unit2 = ClauseDataUnit() let unit2 = ClauseDataUnit()
unit2.text = "笑った" unit2.text = "笑った"
unit2.inputRange = 3 ..< 7 unit2.range = .input(from: 3, to: 7)
unit2.mid = 3 unit2.mid = 3
unit2.nextLcid = 3 unit2.nextLcid = 3
unit1.merge(with: unit2) unit1.merge(with: unit2)
XCTAssertEqual(unit1.text, "君は笑った") XCTAssertEqual(unit1.text, "君は笑った")
XCTAssertEqual(unit1.inputRange, 0 ..< 7) XCTAssertEqual(unit1.range, .input(from: 0, to: 7))
XCTAssertEqual(unit1.nextLcid, 3) XCTAssertEqual(unit1.nextLcid, 3)
XCTAssertEqual(unit1.mid, 0) XCTAssertEqual(unit1.mid, 0)
} }

View File

@ -202,8 +202,8 @@ final class ComposingTextTests: XCTestCase {
var c2 = ComposingText() var c2 = ComposingText()
c2.insertAtCursorPosition("hasiru", inputStyle: .roman2kana) c2.insertAtCursorPosition("hasiru", inputStyle: .roman2kana)
XCTAssertEqual(c2.differenceSuffix(to: c1).deleted, 0) XCTAssertEqual(c2.differenceSuffix(to: c1).deletedInput, 0)
XCTAssertEqual(c2.differenceSuffix(to: c1).addedCount, 1) XCTAssertEqual(c2.differenceSuffix(to: c1).addedInput, 1)
} }
do { do {
var c1 = ComposingText() var c1 = ComposingText()
@ -212,8 +212,8 @@ final class ComposingTextTests: XCTestCase {
var c2 = ComposingText() var c2 = ComposingText()
c2.insertAtCursorPosition("tukatte", inputStyle: .roman2kana) c2.insertAtCursorPosition("tukatte", inputStyle: .roman2kana)
XCTAssertEqual(c2.differenceSuffix(to: c1).deleted, 0) XCTAssertEqual(c2.differenceSuffix(to: c1).deletedInput, 0)
XCTAssertEqual(c2.differenceSuffix(to: c1).addedCount, 1) XCTAssertEqual(c2.differenceSuffix(to: c1).addedInput, 1)
} }
} }
} }

View File

@ -16,7 +16,7 @@ final class CandidateTests: XCTestCase {
let candidate = Candidate( let candidate = Candidate(
text: text, text: text,
value: -40, value: -40,
correspondingCount: 4, composingCount: .inputCount(4),
lastMid: 5, lastMid: 5,
data: [DicdataElement(word: text, ruby: "サイコロ", cid: 0, mid: 5, value: -40)] data: [DicdataElement(word: text, ruby: "サイコロ", cid: 0, mid: 5, value: -40)]
) )
@ -27,7 +27,7 @@ final class CandidateTests: XCTestCase {
print(candidate2.text) print(candidate2.text)
XCTAssertTrue(Set((1...3).map(String.init)).contains(candidate2.text)) XCTAssertTrue(Set((1...3).map(String.init)).contains(candidate2.text))
XCTAssertEqual(candidate.value, candidate2.value) XCTAssertEqual(candidate.value, candidate2.value)
XCTAssertEqual(candidate.correspondingCount, candidate2.correspondingCount) XCTAssertEqual(candidate.composingCount, candidate2.composingCount)
XCTAssertEqual(candidate.lastMid, candidate2.lastMid) XCTAssertEqual(candidate.lastMid, candidate2.lastMid)
XCTAssertEqual(candidate.data, candidate2.data) XCTAssertEqual(candidate.data, candidate2.data)
XCTAssertEqual(candidate.actions, candidate2.actions) XCTAssertEqual(candidate.actions, candidate2.actions)
@ -38,7 +38,7 @@ final class CandidateTests: XCTestCase {
let candidate = Candidate( let candidate = Candidate(
text: text, text: text,
value: 0, value: 0,
correspondingCount: 0, composingCount: .inputCount(0),
lastMid: 0, lastMid: 0,
data: [DicdataElement(word: text, ruby: "", cid: 0, mid: 0, value: 0)] data: [DicdataElement(word: text, ruby: "", cid: 0, mid: 0, value: 0)]
) )

View File

@ -88,7 +88,7 @@ final class LearningMemoryTests: XCTestCase {
Candidate( Candidate(
text: element.word, text: element.word,
value: element.value(), value: element.value(),
correspondingCount: 3, composingCount: .inputCount(3),
lastMid: element.mid, lastMid: element.mid,
data: [element] data: [element]
) )
@ -128,7 +128,7 @@ final class LearningMemoryTests: XCTestCase {
Candidate( Candidate(
text: element.word, text: element.word,
value: element.value(), value: element.value(),
correspondingCount: 3, composingCount: .inputCount(3),
lastMid: element.mid, lastMid: element.mid,
data: [element] data: [element]
) )

View File

@ -12,16 +12,16 @@ import XCTest
final class RegisteredNodeTests: XCTestCase { final class RegisteredNodeTests: XCTestCase {
func testBOSNode() throws { func testBOSNode() throws {
let bos = RegisteredNode.BOSNode() let bos = RegisteredNode.BOSNode()
XCTAssertEqual(bos.inputRange, 0..<0) XCTAssertEqual(bos.range, Lattice.LatticeRange.zero)
XCTAssertNil(bos.prev) XCTAssertNil(bos.prev)
XCTAssertEqual(bos.totalValue, 0) XCTAssertEqual(bos.totalValue, 0)
XCTAssertEqual(bos.data.rcid, CIDData.BOS.cid) XCTAssertEqual(bos.data.rcid, CIDData.BOS.cid)
} }
func testFromLastCandidate() throws { func testFromLastCandidate() throws {
let candidate = Candidate(text: "我輩は猫", value: -20, correspondingCount: 7, lastMid: 100, data: [DicdataElement(word: "我輩は猫", ruby: "ワガハイハネコ", cid: CIDData..cid, mid: 100, value: -20)]) let candidate = Candidate(text: "我輩は猫", value: -20, composingCount: .inputCount(7), lastMid: 100, data: [DicdataElement(word: "我輩は猫", ruby: "ワガハイハネコ", cid: CIDData..cid, mid: 100, value: -20)])
let bos = RegisteredNode.fromLastCandidate(candidate) let bos = RegisteredNode.fromLastCandidate(candidate)
XCTAssertEqual(bos.inputRange, 0..<0) XCTAssertEqual(bos.range, Lattice.LatticeRange.zero)
XCTAssertNil(bos.prev) XCTAssertNil(bos.prev)
XCTAssertEqual(bos.totalValue, 0) XCTAssertEqual(bos.totalValue, 0)
XCTAssertEqual(bos.data.rcid, CIDData..cid) XCTAssertEqual(bos.data.rcid, CIDData..cid)
@ -34,37 +34,37 @@ final class RegisteredNodeTests: XCTestCase {
data: DicdataElement(word: "我輩", ruby: "ワガハイ", cid: CIDData..cid, mid: 1, value: -5), data: DicdataElement(word: "我輩", ruby: "ワガハイ", cid: CIDData..cid, mid: 1, value: -5),
registered: bos, registered: bos,
totalValue: -10, totalValue: -10,
inputRange: 0..<4 range: .input(from: 0, to: 4)
) )
let node2 = RegisteredNode( let node2 = RegisteredNode(
data: DicdataElement(word: "", ruby: "", cid: CIDData..cid, mid: 2, value: -2), data: DicdataElement(word: "", ruby: "", cid: CIDData..cid, mid: 2, value: -2),
registered: node1, registered: node1,
totalValue: -13, totalValue: -13,
inputRange: 4..<5 range: .input(from: 4, to: 5)
) )
let node3 = RegisteredNode( let node3 = RegisteredNode(
data: DicdataElement(word: "", ruby: "ネコ", cid: CIDData..cid, mid: 3, value: -4), data: DicdataElement(word: "", ruby: "ネコ", cid: CIDData..cid, mid: 3, value: -4),
registered: node2, registered: node2,
totalValue: -20, totalValue: -20,
inputRange: 5..<7 range: .input(from: 5, to: 7)
) )
let node4 = RegisteredNode( let node4 = RegisteredNode(
data: DicdataElement(word: "です", ruby: "デス", cid: CIDData..cid, mid: 4, value: -3), data: DicdataElement(word: "です", ruby: "デス", cid: CIDData..cid, mid: 4, value: -3),
registered: node3, registered: node3,
totalValue: -25, totalValue: -25,
inputRange: 7..<9 range: .input(from: 7, to: 9)
) )
let result = node4.getCandidateData() let result = node4.getCandidateData()
let clause1 = ClauseDataUnit() let clause1 = ClauseDataUnit()
clause1.text = "我輩は" clause1.text = "我輩は"
clause1.nextLcid = CIDData..cid clause1.nextLcid = CIDData..cid
clause1.inputRange = 0..<5 clause1.range = .input(from: 0, to: 5)
clause1.mid = 1 clause1.mid = 1
let clause2 = ClauseDataUnit() let clause2 = ClauseDataUnit()
clause2.text = "猫です" clause2.text = "猫です"
clause2.nextLcid = CIDData.EOS.cid clause2.nextLcid = CIDData.EOS.cid
clause2.inputRange = 5..<9 clause2.range = .input(from: 5, to: 9)
clause2.mid = 3 clause2.mid = 3
let expectedResult: CandidateData = CandidateData( let expectedResult: CandidateData = CandidateData(

View File

@ -290,7 +290,7 @@ final class DicdataStoreTests: XCTestCase {
sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana) sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana)
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false) let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"})) XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"}))
XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.inputRange, 0 ..< 11) XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.range, .input(from: 0, to: 11))
} }
// //