mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
feat: Latticeの操作をconvertTargetベースのindexとinputベースのindexの二重化
This commit is contained in:
@ -220,7 +220,7 @@ extension Subcommands {
|
||||
print("Submit \(candidate.text)")
|
||||
converter.setCompletedData(candidate)
|
||||
converter.updateLearningData(candidate)
|
||||
composingText.prefixComplete(correspondingCount: candidate.correspondingCount)
|
||||
composingText.prefixComplete(composingCount: candidate.composingCount)
|
||||
if composingText.isEmpty {
|
||||
composingText.stopComposition()
|
||||
converter.stopComposition()
|
||||
|
@ -28,11 +28,16 @@ extension Kana2Kanji {
|
||||
/// (4)ノードをアップデートした上で返却する。
|
||||
func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) {
|
||||
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)")
|
||||
let count: Int = inputData.input.count
|
||||
let inputCount: Int = inputData.input.count
|
||||
let surfaceCount = inputData.convertTarget.count
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)})
|
||||
let lattice: Lattice = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)}
|
||||
)
|
||||
// 「i文字目から始まるnodes」に対して
|
||||
for (i, nodeArray) in lattice.enumerated() {
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
// それぞれのnodeに対して
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
@ -43,7 +48,7 @@ extension Kana2Kanji {
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue: PValue = node.data.value()
|
||||
if i == 0 {
|
||||
if i.isZero {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
@ -51,12 +56,12 @@ extension Kana2Kanji {
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex: Int = node.inputRange.endIndex
|
||||
let nextIndex = node.range.endIndex
|
||||
// 文字数がcountと等しい場合登録する
|
||||
if nextIndex == count {
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
} else {
|
||||
self.updateNextNodes(with: node, nextNodes: lattice[inputIndex: nextIndex], nBest: N_best)
|
||||
self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,11 +20,16 @@ extension Kana2Kanji {
|
||||
/// (4)ノードをアップデートした上で返却する。
|
||||
func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) {
|
||||
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)")
|
||||
let count: Int = inputData.input.count
|
||||
let inputCount: Int = inputData.input.count
|
||||
let surfaceCount: Int = inputData.convertTarget.count
|
||||
let result: LatticeNode = LatticeNode.EOSNode
|
||||
let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)})
|
||||
let lattice: Lattice = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
rawNodes: (.zero ..< inputCount).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}
|
||||
)
|
||||
// 「i文字目から始まるnodes」に対して
|
||||
for (i, nodeArray) in lattice.enumerated() {
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
// それぞれのnodeに対して
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
@ -32,7 +37,7 @@ extension Kana2Kanji {
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue: PValue = node.data.value()
|
||||
if i == 0 {
|
||||
if i.isZero {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
@ -40,9 +45,9 @@ extension Kana2Kanji {
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex: Int = node.inputRange.endIndex
|
||||
let nextIndex = node.range.endIndex
|
||||
// 文字数がcountと等しい場合登録する
|
||||
if nextIndex == count {
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
for index in node.prevs.indices {
|
||||
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
|
||||
// 学習データやユーザ辞書由来の場合は素通しする
|
||||
@ -61,7 +66,7 @@ extension Kana2Kanji {
|
||||
Array(($0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word).utf8)
|
||||
}
|
||||
// nodeの繋がる次にあり得る全てのnextnodeに対して
|
||||
for nextnode in lattice[inputIndex: nextIndex] {
|
||||
for nextnode in lattice[index: nextIndex] {
|
||||
// クラスの連続確率を計算する。
|
||||
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
|
||||
// nodeの持っている全てのprevnodeに対して
|
||||
|
@ -14,7 +14,7 @@ extension Kana2Kanji {
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: left.value + right.value,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
composingCount: .composite(left.composingCount, right.composingCount),
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
@ -26,7 +26,7 @@ extension Kana2Kanji {
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: newValue,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
composingCount: .composite(left.composingCount, right.composingCount),
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
@ -57,7 +57,7 @@ extension Kana2Kanji {
|
||||
prefixCandidate.data = prefixCandidateData
|
||||
|
||||
prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word }
|
||||
prefixCandidate.correspondingCount = prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count }
|
||||
prefixCandidate.composingCount = .surfaceCount(prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count })
|
||||
}
|
||||
|
||||
totalWord.insert(contentsOf: element.word, at: totalWord.startIndex)
|
||||
|
@ -17,29 +17,30 @@ extension Kana2Kanji {
|
||||
/// (2)次に、再度計算して良い候補を得る。
|
||||
func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) {
|
||||
debug("確定直後の変換、前は:", previousResult.inputData, "後は:", inputData)
|
||||
let count = inputData.input.count
|
||||
let inputCount = inputData.input.count
|
||||
let surfaceCount = inputData.convertTarget.count
|
||||
// TODO: 実際にはもっとチェックが必要。具体的には、input/convertTarget両方のsuffixが一致する必要がある
|
||||
let convertedInputCount = previousResult.inputData.input.count - inputCount
|
||||
let convertedSurfaceCount = previousResult.inputData.convertTarget.count - surfaceCount
|
||||
// (1)
|
||||
let start = RegisteredNode.fromLastCandidate(completedData)
|
||||
let lattice = previousResult.lattice.suffix(count)
|
||||
for (i, nodeArray) in lattice.enumerated() {
|
||||
if i == .zero {
|
||||
for node in nodeArray {
|
||||
node.prevs = [start]
|
||||
// inputRangeを確定した部分のカウント分ずらす
|
||||
node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount
|
||||
}
|
||||
let lattice = previousResult.lattice.suffix(inputCount: inputCount, surfaceCount: surfaceCount)
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
let prevs: [RegisteredNode] = if i.isZero {
|
||||
[start]
|
||||
} else {
|
||||
for node in nodeArray {
|
||||
node.prevs = []
|
||||
// inputRangeを確定した部分のカウント分ずらす
|
||||
node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount
|
||||
[]
|
||||
}
|
||||
for node in nodeArray {
|
||||
node.prevs = prevs
|
||||
// inputRangeを確定した部分のカウント分ずらす
|
||||
node.range = node.range.offseted(inputOffset: -convertedInputCount, surfaceOffset: -convertedSurfaceCount)
|
||||
}
|
||||
}
|
||||
// (2)
|
||||
let result = LatticeNode.EOSNode
|
||||
|
||||
for (i, nodeArray) in lattice.enumerated() {
|
||||
for (i, nodeArray) in lattice.indexedNodes() {
|
||||
for node in nodeArray {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
@ -49,7 +50,7 @@ extension Kana2Kanji {
|
||||
}
|
||||
// 生起確率を取得する。
|
||||
let wValue = node.data.value()
|
||||
if i == 0 {
|
||||
if i.isZero {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
|
||||
} else {
|
||||
@ -57,11 +58,11 @@ extension Kana2Kanji {
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
if nextIndex != count {
|
||||
self.updateNextNodes(with: node, nextNodes: lattice[inputIndex: nextIndex], nBest: N_best)
|
||||
} else {
|
||||
let nextIndex = node.range.endIndex
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
} else {
|
||||
self.updateNextNodes(with: node, nextNodes: lattice[index: nextIndex], nBest: N_best)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
// Copyright © 2020 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Algorithms
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
@ -24,27 +25,43 @@ extension Kana2Kanji {
|
||||
///
|
||||
/// (5)ノードをアップデートした上で返却する。
|
||||
|
||||
func kana2lattice_changed(_ inputData: ComposingText, N_best: Int, counts: (deleted: Int, added: Int), previousResult: (inputData: ComposingText, lattice: Lattice), needTypoCorrection: Bool) -> (result: LatticeNode, lattice: Lattice) {
|
||||
func kana2lattice_changed(
|
||||
_ inputData: ComposingText,
|
||||
N_best: Int,
|
||||
counts: (deletedInput: Int, addedInput: Int, deletedSurface: Int, addedSurface: Int),
|
||||
previousResult: (inputData: ComposingText, lattice: Lattice),
|
||||
needTypoCorrection: Bool
|
||||
) -> (result: LatticeNode, lattice: Lattice) {
|
||||
// (0)
|
||||
let count = inputData.input.count
|
||||
let commonCount = previousResult.inputData.input.count - counts.deleted
|
||||
debug("kana2lattice_changed", inputData, counts, previousResult.inputData, count, commonCount)
|
||||
let inputCount = inputData.input.count
|
||||
let surfaceCount = inputData.convertTarget.count
|
||||
let commonInputCount = previousResult.inputData.input.count - counts.deletedInput
|
||||
let commonSurfaceCount = previousResult.inputData.convertTarget.count - counts.deletedSurface
|
||||
debug("kana2lattice_changed", inputData, counts, previousResult.inputData, inputCount, commonInputCount)
|
||||
|
||||
// (1)
|
||||
var lattice = previousResult.lattice.prefix(commonCount)
|
||||
var lattice = previousResult.lattice.prefix(inputCount: commonInputCount, surfaceCount: commonSurfaceCount)
|
||||
|
||||
let terminalNodes: Lattice
|
||||
if counts.added == 0 {
|
||||
terminalNodes = Lattice(nodes: lattice.map {
|
||||
if counts.addedInput == 0 {
|
||||
terminalNodes = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
rawNodes: lattice.map {
|
||||
$0.filter {
|
||||
$0.inputRange.endIndex == count
|
||||
$0.range.endIndex == .input(inputCount) || $0.range.endIndex == .surface(inputCount)
|
||||
}
|
||||
})
|
||||
}
|
||||
)
|
||||
} else {
|
||||
// (2)
|
||||
let addedNodes: Lattice = Lattice(nodes: (0..<count).map {(i: Int) in
|
||||
self.dicdataStore.getLOUDSDataInRange(inputData: inputData, from: i, toIndexRange: max(commonCount, i) ..< count, needTypoCorrection: needTypoCorrection)
|
||||
})
|
||||
let addedNodes: Lattice = Lattice(
|
||||
inputCount: inputCount,
|
||||
surfaceCount: surfaceCount,
|
||||
rawNodes: (0..<inputCount).map {(i: Int) in
|
||||
self.dicdataStore.getLOUDSDataInRange(inputData: inputData, from: i, toIndexRange: max(commonInputCount, i) ..< inputCount, needTypoCorrection: needTypoCorrection)
|
||||
}
|
||||
)
|
||||
|
||||
// (3)
|
||||
for nodeArray in lattice {
|
||||
@ -56,8 +73,8 @@ extension Kana2Kanji {
|
||||
continue
|
||||
}
|
||||
// 変換した文字数
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
self.updateNextNodes(with: node, nextNodes: addedNodes[inputIndex: nextIndex], nBest: N_best)
|
||||
let nextIndex = node.range.endIndex
|
||||
self.updateNextNodes(with: node, nextNodes: addedNodes[index: nextIndex], nBest: N_best)
|
||||
}
|
||||
}
|
||||
lattice.merge(addedNodes)
|
||||
@ -86,11 +103,11 @@ extension Kana2Kanji {
|
||||
// valuesを更新する
|
||||
node.values = node.prevs.map {$0.totalValue + wValue}
|
||||
}
|
||||
let nextIndex = node.inputRange.endIndex
|
||||
if count == nextIndex {
|
||||
let nextIndex = node.range.endIndex
|
||||
if nextIndex == .input(inputCount) || nextIndex == .surface(surfaceCount) {
|
||||
self.updateResultNode(with: node, resultNode: result)
|
||||
} else {
|
||||
self.updateNextNodes(with: node, nextNodes: terminalNodes[inputIndex: nextIndex], nBest: N_best)
|
||||
self.updateNextNodes(with: node, nextNodes: terminalNodes[index: nextIndex], nBest: N_best)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
// Copyright © 2022 ensan. All rights reserved.
|
||||
//
|
||||
|
||||
import Algorithms
|
||||
import Foundation
|
||||
import SwiftUtils
|
||||
|
||||
@ -26,12 +27,13 @@ extension Kana2Kanji {
|
||||
|
||||
func kana2lattice_no_change(N_best: Int, previousResult: (inputData: ComposingText, lattice: Lattice)) -> (result: LatticeNode, lattice: Lattice) {
|
||||
debug("キャッシュから復元、元の文字は:", previousResult.inputData.convertTarget)
|
||||
let count = previousResult.inputData.input.count
|
||||
let inputCount = previousResult.inputData.input.count
|
||||
let surfaceCount = previousResult.inputData.convertTarget.count
|
||||
// (1)
|
||||
let result = LatticeNode.EOSNode
|
||||
|
||||
for nodeArray in previousResult.lattice {
|
||||
for node in nodeArray where node.inputRange.endIndex == count {
|
||||
for node in nodeArray where node.range.endIndex == .input(inputCount) || node.range.endIndex == .surface(surfaceCount) {
|
||||
if node.prevs.isEmpty {
|
||||
continue
|
||||
}
|
||||
|
@ -34,11 +34,14 @@ struct Kana2Kanji {
|
||||
let text = data.clauses.map {$0.clause.text}.joined()
|
||||
let value = data.clauses.last!.value + mmValue.value
|
||||
let lastMid = data.clauses.last!.clause.mid
|
||||
let correspondingCount = data.clauses.reduce(into: 0) {$0 += $1.clause.inputRange.count}
|
||||
|
||||
let composingCount: ComposingCount = data.clauses.reduce(into: .inputCount(0)) {
|
||||
$0 = .composite($0, $1.clause.range.count)
|
||||
}
|
||||
return Candidate(
|
||||
text: text,
|
||||
value: value,
|
||||
correspondingCount: correspondingCount,
|
||||
composingCount: composingCount,
|
||||
lastMid: lastMid,
|
||||
data: data.data
|
||||
)
|
||||
|
@ -1,49 +1,180 @@
|
||||
import Algorithms
|
||||
import SwiftUtils
|
||||
|
||||
struct Lattice: Sequence {
|
||||
typealias Element = [LatticeNode]
|
||||
typealias Iterator = IndexingIterator<[[LatticeNode]]>
|
||||
|
||||
init(nodes: [[LatticeNode]] = []) {
|
||||
self.nodes = nodes
|
||||
init() {
|
||||
self.inputIndexedNodes = []
|
||||
self.surfaceIndexedNodes = []
|
||||
}
|
||||
|
||||
private var nodes: [[LatticeNode]]
|
||||
init(inputCount: Int, surfaceCount: Int, rawNodes: [[LatticeNode]]) {
|
||||
self.inputIndexedNodes = .init(repeating: [], count: inputCount)
|
||||
self.surfaceIndexedNodes = .init(repeating: [], count: surfaceCount)
|
||||
|
||||
func prefix(_ k: Int) -> Lattice {
|
||||
var lattice = Lattice(nodes: self.nodes.prefix(k).map {(nodes: [LatticeNode]) in
|
||||
nodes.filter {$0.inputRange.endIndex <= k}
|
||||
})
|
||||
while lattice.nodes.last?.isEmpty ?? false {
|
||||
lattice.nodes.removeLast()
|
||||
for nodes in rawNodes {
|
||||
guard let first = nodes.first else { continue }
|
||||
switch first.range.startIndex {
|
||||
case .surface(let i):
|
||||
self.surfaceIndexedNodes[i] = nodes
|
||||
case .input(let i):
|
||||
self.inputIndexedNodes[i] = nodes
|
||||
}
|
||||
}
|
||||
return lattice
|
||||
}
|
||||
|
||||
func suffix(_ count: Int) -> Lattice {
|
||||
Lattice(nodes: self.nodes.suffix(count))
|
||||
private init(inputIndexedNodes: [[LatticeNode]], surfaceIndexedNodes: [[LatticeNode]]) {
|
||||
self.inputIndexedNodes = inputIndexedNodes
|
||||
self.surfaceIndexedNodes = surfaceIndexedNodes
|
||||
}
|
||||
|
||||
private var inputIndexedNodes: [[LatticeNode]]
|
||||
private var surfaceIndexedNodes: [[LatticeNode]]
|
||||
|
||||
func prefix(inputCount: Int, surfaceCount: Int) -> Lattice {
|
||||
let filterClosure: (LatticeNode) -> Bool = { (node: LatticeNode) -> Bool in
|
||||
switch node.range.endIndex {
|
||||
case .input(let value):
|
||||
value <= inputCount
|
||||
case .surface(let value):
|
||||
value <= surfaceCount
|
||||
}
|
||||
}
|
||||
let newInputIndexedNodes = Array(self.inputIndexedNodes.prefix(inputCount).map {(nodes: [LatticeNode]) in
|
||||
nodes.filter(filterClosure)
|
||||
}.drop(while: \.isEmpty))
|
||||
let newSurfaceIndexedNodes = Array(self.surfaceIndexedNodes.prefix(surfaceCount).map {(nodes: [LatticeNode]) in
|
||||
nodes.filter(filterClosure)
|
||||
}.drop(while: \.isEmpty))
|
||||
|
||||
return Lattice(inputIndexedNodes: newInputIndexedNodes, surfaceIndexedNodes: newSurfaceIndexedNodes)
|
||||
}
|
||||
|
||||
func suffix(inputCount: Int, surfaceCount: Int) -> Lattice {
|
||||
Lattice(
|
||||
inputIndexedNodes: self.inputIndexedNodes.suffix(inputCount),
|
||||
surfaceIndexedNodes: self.surfaceIndexedNodes.suffix(surfaceCount)
|
||||
)
|
||||
}
|
||||
|
||||
mutating func merge(_ lattice: Lattice) {
|
||||
for (index, nodeArray) in lattice.nodes.enumerated() where index < self.nodes.endIndex {
|
||||
self.nodes[index].append(contentsOf: nodeArray)
|
||||
for (index, nodeArray) in lattice.inputIndexedNodes.enumerated() where index < self.inputIndexedNodes.endIndex {
|
||||
self.inputIndexedNodes[index].append(contentsOf: nodeArray)
|
||||
}
|
||||
if self.nodes.endIndex < lattice.nodes.endIndex {
|
||||
for nodeArray in lattice.nodes[self.nodes.endIndex...] {
|
||||
self.nodes.append(nodeArray)
|
||||
if self.inputIndexedNodes.endIndex < lattice.inputIndexedNodes.endIndex {
|
||||
for nodeArray in lattice.inputIndexedNodes[self.inputIndexedNodes.endIndex...] {
|
||||
self.inputIndexedNodes.append(nodeArray)
|
||||
}
|
||||
}
|
||||
for (index, nodeArray) in lattice.surfaceIndexedNodes.enumerated() where index < self.surfaceIndexedNodes.endIndex {
|
||||
self.surfaceIndexedNodes[index].append(contentsOf: nodeArray)
|
||||
}
|
||||
if self.surfaceIndexedNodes.endIndex < lattice.surfaceIndexedNodes.endIndex {
|
||||
for nodeArray in lattice.surfaceIndexedNodes[self.surfaceIndexedNodes.endIndex...] {
|
||||
self.surfaceIndexedNodes.append(nodeArray)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
subscript(inputIndex i: Int) -> [LatticeNode] {
|
||||
get {
|
||||
self.nodes[i]
|
||||
self.inputIndexedNodes[i]
|
||||
}
|
||||
}
|
||||
|
||||
func makeIterator() -> IndexingIterator<[[LatticeNode]]> {
|
||||
self.nodes.makeIterator()
|
||||
subscript(index index: LatticeIndex) -> [LatticeNode] {
|
||||
get {
|
||||
switch index {
|
||||
case .input(let i): self.inputIndexedNodes[i]
|
||||
case .surface(let i): self.surfaceIndexedNodes[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func indexedNodes() -> some Sequence<(index: LatticeIndex, nodes: [LatticeNode])> {
|
||||
self.inputIndexedNodes.enumerated().lazy.map { (.input($0.offset), $0.element) }
|
||||
.chained(self.surfaceIndexedNodes.enumerated().lazy.map { (.surface($0.offset), $0.element) })
|
||||
}
|
||||
|
||||
func makeIterator() -> Chain2Sequence<[[LatticeNode]], [[LatticeNode]]>.Iterator {
|
||||
self.inputIndexedNodes.chained(self.surfaceIndexedNodes).makeIterator()
|
||||
}
|
||||
|
||||
var isEmpty: Bool {
|
||||
self.nodes.isEmpty
|
||||
self.inputIndexedNodes.isEmpty && self.surfaceIndexedNodes.isEmpty
|
||||
}
|
||||
|
||||
enum LatticeIndex: Sendable, Equatable {
|
||||
case surface(Int)
|
||||
case input(Int)
|
||||
|
||||
var isZero: Bool {
|
||||
self == .surface(0) || self == .input(0)
|
||||
}
|
||||
}
|
||||
|
||||
enum LatticeRange: Sendable, Equatable {
|
||||
static var zero: Self {
|
||||
.input(from: 0, to: 0)
|
||||
}
|
||||
case surface(from: Int, to: Int)
|
||||
case input(from: Int, to: Int)
|
||||
|
||||
var count: ComposingCount {
|
||||
switch self {
|
||||
case .surface(let from, let to):
|
||||
.surfaceCount(to - from)
|
||||
case .input(let from, let to):
|
||||
.inputCount(to - from)
|
||||
}
|
||||
}
|
||||
|
||||
var startIndex: LatticeIndex {
|
||||
switch self {
|
||||
case .surface(let from, _):
|
||||
.surface(from)
|
||||
case .input(let from, _):
|
||||
.input(from)
|
||||
}
|
||||
}
|
||||
|
||||
var endIndex: LatticeIndex {
|
||||
switch self {
|
||||
case .surface(_, let to):
|
||||
.surface(to)
|
||||
case .input(_, let to):
|
||||
.input(to)
|
||||
}
|
||||
}
|
||||
|
||||
func merged(with other: Self) -> Self? {
|
||||
switch (self, other) {
|
||||
case (let .surface(l, ml), let .surface(mr, r)):
|
||||
if ml == mr {
|
||||
.surface(from: l, to: r)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
case (let .input(l, ml), let .input(mr, r)):
|
||||
if ml == mr {
|
||||
.input(from: l, to: r)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
case (.surface, .input), (.input, .surface):
|
||||
nil
|
||||
}
|
||||
}
|
||||
|
||||
func offseted(inputOffset: Int, surfaceOffset: Int) -> Self {
|
||||
switch self {
|
||||
case .surface(from: let from, to: let to):
|
||||
.surface(from: from + surfaceOffset, to: to + surfaceOffset)
|
||||
case .input(from: let from, to: let to):
|
||||
.input(from: from + inputOffset, to: to + inputOffset)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -17,23 +17,23 @@ public final class LatticeNode {
|
||||
/// `prevs`の各要素に対応するスコアのデータ
|
||||
var values: [PValue] = []
|
||||
/// inputData.input内のrange
|
||||
var inputRange: Range<Int>
|
||||
var range: Lattice.LatticeRange
|
||||
|
||||
/// `EOS`に対応するノード。
|
||||
static var EOSNode: LatticeNode {
|
||||
LatticeNode(data: DicdataElement.EOSData, inputRange: 0..<0)
|
||||
LatticeNode(data: DicdataElement.EOSData, range: .zero)
|
||||
}
|
||||
|
||||
init(data: DicdataElement, inputRange: Range<Int>) {
|
||||
init(data: DicdataElement, range: Lattice.LatticeRange) {
|
||||
self.data = data
|
||||
self.values = [data.value()]
|
||||
self.inputRange = inputRange
|
||||
self.range = range
|
||||
}
|
||||
|
||||
/// `LatticeNode`の持っている情報を反映した`RegisteredNode`を作成する
|
||||
/// `LatticeNode`は複数の過去のノードを持つことができるが、`RegisteredNode`は1つしか持たない。
|
||||
func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode {
|
||||
RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, inputRange: self.inputRange)
|
||||
RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, range: self.range)
|
||||
}
|
||||
|
||||
/// 再帰的にノードを遡り、`CandidateData`を構築する関数
|
||||
|
@ -36,7 +36,7 @@ public struct PostCompositionPredictionCandidate {
|
||||
candidate.data.append(data)
|
||||
}
|
||||
candidate.value = self.value
|
||||
candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count }
|
||||
candidate.composingCount = .surfaceCount(candidate.rubyCount)
|
||||
candidate.lastMid = data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? candidate.lastMid
|
||||
return candidate
|
||||
case .replacement(let targetData, let replacementData):
|
||||
@ -45,7 +45,7 @@ public struct PostCompositionPredictionCandidate {
|
||||
candidate.text = candidate.data.reduce(into: "") {$0 += $1.word}
|
||||
candidate.value = self.value
|
||||
candidate.lastMid = candidate.data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
|
||||
candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count }
|
||||
candidate.composingCount = .surfaceCount(candidate.rubyCount)
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
|
@ -22,9 +22,14 @@ extension Kana2Kanji {
|
||||
/// - note:
|
||||
/// この関数の役割は意味連接の考慮にある。
|
||||
func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] {
|
||||
debug("getPredictionCandidates", composingText, lastClause.inputRange, lastClause.text)
|
||||
let lastRuby = ComposingText.getConvertTarget(for: composingText.input[lastClause.inputRange]).toKatakana()
|
||||
let lastRubyCount = lastClause.inputRange.count
|
||||
debug("getPredictionCandidates", composingText, lastClause.range, lastClause.text)
|
||||
let lastRuby = switch lastClause.range {
|
||||
case let .input(left, right):
|
||||
ComposingText.getConvertTarget(for: composingText.input[left..<right]).toKatakana()
|
||||
case let .surface(left, right):
|
||||
String(composingText.convertTarget.dropFirst(left).prefix(right - left))
|
||||
}
|
||||
let lastRubyCount = lastRuby.count
|
||||
let datas: [DicdataElement]
|
||||
do {
|
||||
var _str = ""
|
||||
@ -42,11 +47,11 @@ extension Kana2Kanji {
|
||||
|
||||
let osuserdict: [DicdataElement] = dicdataStore.getPrefixMatchDynamicUserDict(lastRuby)
|
||||
|
||||
let lastCandidate: Candidate = prepart.isEmpty ? Candidate(text: "", value: .zero, correspondingCount: 0, lastMid: MIDData.EOS.mid, data: []) : self.processClauseCandidate(prepart)
|
||||
let lastCandidate: Candidate = prepart.isEmpty ? Candidate(text: "", value: .zero, composingCount: .inputCount(0), lastMid: MIDData.EOS.mid, data: []) : self.processClauseCandidate(prepart)
|
||||
let lastRcid: Int = lastCandidate.data.last?.rcid ?? CIDData.EOS.cid
|
||||
let nextLcid: Int = prepart.lastClause?.nextLcid ?? CIDData.EOS.cid
|
||||
let lastMid: Int = lastCandidate.lastMid
|
||||
let correspoindingCount: Int = lastCandidate.correspondingCount + lastRubyCount
|
||||
let composingCount: ComposingCount = .composite(lastCandidate.composingCount, .surfaceCount(lastRubyCount))
|
||||
let ignoreCCValue: PValue = self.dicdataStore.getCCValue(lastRcid, nextLcid)
|
||||
|
||||
let inputStyle = composingText.input.last?.inputStyle ?? .direct
|
||||
@ -91,7 +96,7 @@ extension Kana2Kanji {
|
||||
let candidate: Candidate = Candidate(
|
||||
text: lastCandidate.text + data.word,
|
||||
value: newValue,
|
||||
correspondingCount: correspoindingCount,
|
||||
composingCount: composingCount,
|
||||
lastMid: includeMMValueCalculation ? data.mid:lastMid,
|
||||
data: nodedata
|
||||
)
|
||||
|
@ -14,7 +14,7 @@ protocol RegisteredNodeProtocol {
|
||||
var data: DicdataElement {get}
|
||||
var prev: (any RegisteredNodeProtocol)? {get}
|
||||
var totalValue: PValue {get}
|
||||
var inputRange: Range<Int> {get}
|
||||
var range: Lattice.LatticeRange {get}
|
||||
}
|
||||
|
||||
struct RegisteredNode: RegisteredNodeProtocol {
|
||||
@ -25,19 +25,19 @@ struct RegisteredNode: RegisteredNodeProtocol {
|
||||
/// 始点からこのノードまでのコスト
|
||||
let totalValue: PValue
|
||||
/// `composingText`の`input`で対応する範囲
|
||||
let inputRange: Range<Int>
|
||||
let range: Lattice.LatticeRange
|
||||
|
||||
init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, inputRange: Range<Int>) {
|
||||
init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, range: Lattice.LatticeRange) {
|
||||
self.data = data
|
||||
self.prev = registered
|
||||
self.totalValue = totalValue
|
||||
self.inputRange = inputRange
|
||||
self.range = range
|
||||
}
|
||||
|
||||
/// 始点ノードを生成する関数
|
||||
/// - Returns: 始点ノードのデータ
|
||||
static func BOSNode() -> RegisteredNode {
|
||||
RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, inputRange: 0 ..< 0)
|
||||
RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, range: .zero)
|
||||
}
|
||||
|
||||
/// 入力中、確定した部分を考慮した始点ノードを生成する関数
|
||||
@ -47,7 +47,7 @@ struct RegisteredNode: RegisteredNodeProtocol {
|
||||
data: DicdataElement(word: "", ruby: "", lcid: CIDData.BOS.cid, rcid: candidate.data.last?.rcid ?? CIDData.BOS.cid, mid: candidate.lastMid, value: 0),
|
||||
registered: nil,
|
||||
totalValue: 0,
|
||||
inputRange: 0 ..< 0
|
||||
range: .zero
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -59,7 +59,7 @@ extension RegisteredNodeProtocol {
|
||||
guard let prev else {
|
||||
let unit = ClauseDataUnit()
|
||||
unit.mid = self.data.mid
|
||||
unit.inputRange = self.inputRange
|
||||
unit.range = self.range
|
||||
return CandidateData(clauses: [(clause: unit, value: .zero)], data: [])
|
||||
}
|
||||
var lastcandidate = prev.getCandidateData() // 自分に至るregisterdそれぞれのデータに処理
|
||||
@ -75,7 +75,11 @@ extension RegisteredNodeProtocol {
|
||||
if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) {
|
||||
// 文節ではないので、最後に追加する。
|
||||
lastClause.text.append(self.data.word)
|
||||
lastClause.inputRange = lastClause.inputRange.startIndex ..< self.inputRange.endIndex
|
||||
if let newRange = lastClause.range.merged(with: self.range) {
|
||||
lastClause.range = newRange
|
||||
} else {
|
||||
fatalError("このケースは想定していません。")
|
||||
}
|
||||
// 最初だった場合を想定している
|
||||
if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) {
|
||||
lastClause.mid = self.data.mid
|
||||
@ -88,7 +92,7 @@ extension RegisteredNodeProtocol {
|
||||
else {
|
||||
let unit = ClauseDataUnit()
|
||||
unit.text = self.data.word
|
||||
unit.inputRange = self.inputRange
|
||||
unit.range = self.range
|
||||
if DicdataStore.includeMMValueCalculation(self.data) {
|
||||
unit.mid = self.data.mid
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ extension Kana2Kanji {
|
||||
var constraint = zenzaiCache?.getNewConstraint(for: inputData) ?? PrefixConstraint([])
|
||||
debug("initial constraint", constraint)
|
||||
let eosNode = LatticeNode.EOSNode
|
||||
var lattice: Lattice = Lattice(nodes: [])
|
||||
var lattice: Lattice = Lattice()
|
||||
var constructedCandidates: [(RegisteredNode, Candidate)] = []
|
||||
var insertedCandidates: [(RegisteredNode, Candidate)] = []
|
||||
defer {
|
||||
|
@ -17,28 +17,32 @@ final class ClauseDataUnit {
|
||||
/// The text of the unit.
|
||||
var text: String = ""
|
||||
/// The range of the unit in input text.
|
||||
var inputRange: Range<Int> = 0 ..< 0
|
||||
var range: Lattice.LatticeRange = .zero
|
||||
|
||||
/// Merge the given unit to this unit.
|
||||
/// - Parameter:
|
||||
/// - unit: The unit to merge.
|
||||
func merge(with unit: ClauseDataUnit) {
|
||||
self.text.append(unit.text)
|
||||
self.inputRange = self.inputRange.startIndex ..< unit.inputRange.endIndex
|
||||
if let newRange = self.range.merged(with: unit.range) {
|
||||
self.range = newRange
|
||||
} else {
|
||||
fatalError("このケースは想定していません。")
|
||||
}
|
||||
self.nextLcid = unit.nextLcid
|
||||
}
|
||||
}
|
||||
|
||||
extension ClauseDataUnit: Equatable {
|
||||
static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool {
|
||||
lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.inputRange == rhs.inputRange
|
||||
lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.range == rhs.range
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
extension ClauseDataUnit: CustomDebugStringConvertible {
|
||||
var debugDescription: String {
|
||||
"ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), inputRange: \(inputRange))"
|
||||
"ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), range: \(range))"
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -67,14 +71,24 @@ public enum CompleteAction: Equatable, Sendable {
|
||||
case moveCursor(Int)
|
||||
}
|
||||
|
||||
public enum ComposingCount: Equatable, Sendable {
|
||||
/// composingText.inputにおいて対応する文字数。
|
||||
case inputCount(Int)
|
||||
/// composingText.convertTargeにおいて対応する文字数。
|
||||
case surfaceCount(Int)
|
||||
|
||||
/// 複数のカウントの連結
|
||||
indirect case composite(Self, Self)
|
||||
}
|
||||
|
||||
/// 変換候補のデータ
|
||||
public struct Candidate: Sendable {
|
||||
/// 入力となるテキスト
|
||||
public var text: String
|
||||
/// 評価値
|
||||
public var value: PValue
|
||||
/// composingText.inputにおいて対応する文字数。
|
||||
public var correspondingCount: Int
|
||||
|
||||
public var composingCount: ComposingCount
|
||||
/// 最後のmid(予測変換に利用)
|
||||
public var lastMid: Int
|
||||
/// DicdataElement列
|
||||
@ -86,14 +100,18 @@ public struct Candidate: Sendable {
|
||||
/// - note: 文字数表示のために追加したフラグ
|
||||
public let inputable: Bool
|
||||
|
||||
public init(text: String, value: PValue, correspondingCount: Int, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) {
|
||||
/// ルビ文字数
|
||||
public let rubyCount: Int
|
||||
|
||||
public init(text: String, value: PValue, composingCount: ComposingCount, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) {
|
||||
self.text = text
|
||||
self.value = value
|
||||
self.correspondingCount = correspondingCount
|
||||
self.composingCount = composingCount
|
||||
self.lastMid = lastMid
|
||||
self.data = data
|
||||
self.actions = actions
|
||||
self.inputable = inputable
|
||||
self.rubyCount = self.data.reduce(into: 0) { $0 += $1.ruby.count }
|
||||
}
|
||||
/// 後から`action`を追加した形を生成する関数
|
||||
/// - parameters:
|
||||
@ -138,7 +156,7 @@ public struct Candidate: Sendable {
|
||||
/// 入力を文としたとき、prefixになる文節に対応するCandidateを作る
|
||||
public static func makePrefixClauseCandidate(data: some Collection<DicdataElement>) -> Candidate {
|
||||
var text = ""
|
||||
var correspondingCount = 0
|
||||
var composingCount = 0
|
||||
var lastRcid = CIDData.BOS.cid
|
||||
var lastMid = 501
|
||||
var candidateData: [DicdataElement] = []
|
||||
@ -148,7 +166,7 @@ public struct Candidate: Sendable {
|
||||
break
|
||||
}
|
||||
text.append(item.word)
|
||||
correspondingCount += item.ruby.count
|
||||
composingCount += item.ruby.count
|
||||
lastRcid = item.rcid
|
||||
// 最初だった場合を想定している
|
||||
if item.mid != 500 && DicdataStore.includeMMValueCalculation(item) {
|
||||
@ -159,7 +177,7 @@ public struct Candidate: Sendable {
|
||||
return Candidate(
|
||||
text: text,
|
||||
value: -5,
|
||||
correspondingCount: correspondingCount,
|
||||
composingCount: .surfaceCount(composingCount),
|
||||
lastMid: lastMid,
|
||||
data: candidateData
|
||||
)
|
||||
|
@ -168,7 +168,7 @@ import EfficientNGram
|
||||
var textIndex = [String: Int]()
|
||||
for candidate in candidates where !candidate.text.isEmpty && !seenCandidates.contains(candidate.text) {
|
||||
if let index = textIndex[candidate.text] {
|
||||
if result[index].value < candidate.value || result[index].correspondingCount < candidate.correspondingCount {
|
||||
if result[index].value < candidate.value || result[index].rubyCount < candidate.rubyCount {
|
||||
result[index] = candidate
|
||||
}
|
||||
} else {
|
||||
@ -219,7 +219,7 @@ import EfficientNGram
|
||||
let candidate: Candidate = Candidate(
|
||||
text: ruby,
|
||||
value: penalty,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
@ -232,7 +232,7 @@ import EfficientNGram
|
||||
let candidate: Candidate = Candidate(
|
||||
text: word,
|
||||
value: value,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
@ -251,7 +251,7 @@ import EfficientNGram
|
||||
let candidate: Candidate = Candidate(
|
||||
text: ruby,
|
||||
value: penalty,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
@ -264,7 +264,7 @@ import EfficientNGram
|
||||
let candidate: Candidate = Candidate(
|
||||
text: word,
|
||||
value: value,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: data
|
||||
)
|
||||
@ -368,7 +368,7 @@ import EfficientNGram
|
||||
private func getAdditionalCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
var candidates: [Candidate] = []
|
||||
let string = inputData.convertTarget.toKatakana()
|
||||
let correspondingCount = inputData.input.count
|
||||
let composingCount: ComposingCount = .inputCount(inputData.input.count)
|
||||
do {
|
||||
// カタカナ
|
||||
let value = -14 * getKatakanaScore(string)
|
||||
@ -376,7 +376,7 @@ import EfficientNGram
|
||||
let katakana = Candidate(
|
||||
text: string,
|
||||
value: value,
|
||||
correspondingCount: correspondingCount,
|
||||
composingCount: composingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
@ -390,7 +390,7 @@ import EfficientNGram
|
||||
let hiragana = Candidate(
|
||||
text: hiraganaString,
|
||||
value: -14.5,
|
||||
correspondingCount: correspondingCount,
|
||||
composingCount: composingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
@ -403,7 +403,7 @@ import EfficientNGram
|
||||
let uppercasedLetter = Candidate(
|
||||
text: word,
|
||||
value: -14.6,
|
||||
correspondingCount: correspondingCount,
|
||||
composingCount: composingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
@ -416,7 +416,7 @@ import EfficientNGram
|
||||
let fullWidthLetter = Candidate(
|
||||
text: word,
|
||||
value: -14.7,
|
||||
correspondingCount: correspondingCount,
|
||||
composingCount: composingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
@ -429,7 +429,7 @@ import EfficientNGram
|
||||
let halfWidthKatakana = Candidate(
|
||||
text: word,
|
||||
value: -15,
|
||||
correspondingCount: correspondingCount,
|
||||
composingCount: composingCount,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [data]
|
||||
)
|
||||
@ -472,7 +472,7 @@ import EfficientNGram
|
||||
return Candidate(
|
||||
text: first.clause.text,
|
||||
value: first.value,
|
||||
correspondingCount: first.clause.inputRange.count,
|
||||
composingCount: first.clause.range.count,
|
||||
lastMid: first.clause.mid,
|
||||
data: Array(candidateData.data[0...count])
|
||||
)
|
||||
@ -529,10 +529,10 @@ import EfficientNGram
|
||||
var seenCandidate: Set<String> = full_candidate.mapSet {$0.text}
|
||||
// 文節のみ変換するパターン(上位5件)
|
||||
let clause_candidates = self.getUniqueCandidate(clauseCandidates, seenCandidates: seenCandidate).min(count: 5) {
|
||||
if $0.correspondingCount == $1.correspondingCount {
|
||||
if $0.rubyCount == $1.rubyCount {
|
||||
$0.value > $1.value
|
||||
} else {
|
||||
$0.correspondingCount > $1.correspondingCount
|
||||
$0.rubyCount > $1.rubyCount
|
||||
}
|
||||
}
|
||||
seenCandidate.formUnion(clause_candidates.map {$0.text})
|
||||
@ -543,7 +543,7 @@ import EfficientNGram
|
||||
Candidate(
|
||||
text: $0.data.word,
|
||||
value: $0.data.value(),
|
||||
correspondingCount: $0.inputRange.count,
|
||||
composingCount: $0.range.count,
|
||||
lastMid: $0.data.mid,
|
||||
data: [$0.data]
|
||||
)
|
||||
@ -554,8 +554,8 @@ import EfficientNGram
|
||||
// 文字列の長さごとに並べ、かつその中で評価の高いものから順に並べる。
|
||||
var word_candidates: [Candidate] = self.getUniqueCandidate(dicCandidates.chained(additionalCandidates), seenCandidates: seenCandidate)
|
||||
.sorted {
|
||||
let count0 = $0.correspondingCount
|
||||
let count1 = $1.correspondingCount
|
||||
let count0 = $0.rubyCount
|
||||
let count1 = $1.rubyCount
|
||||
return count0 == count1 ? $0.value > $1.value : count0 > count1
|
||||
}
|
||||
seenCandidate.formUnion(word_candidates.map {$0.text})
|
||||
@ -590,10 +590,10 @@ import EfficientNGram
|
||||
}
|
||||
// 文節のみ変換するパターン(上位5件)
|
||||
let firstClauseResults = self.getUniqueCandidate(clauseCandidates).min(count: 5) {
|
||||
if $0.correspondingCount == $1.correspondingCount {
|
||||
if $0.rubyCount == $1.rubyCount {
|
||||
$0.value > $1.value
|
||||
} else {
|
||||
$0.correspondingCount > $1.correspondingCount
|
||||
$0.rubyCount > $1.rubyCount
|
||||
}
|
||||
}
|
||||
return ConversionResult(mainResults: result, firstClauseResults: firstClauseResults)
|
||||
@ -662,7 +662,7 @@ import EfficientNGram
|
||||
let diff = inputData.differenceSuffix(to: previousInputData)
|
||||
|
||||
debug("\(#function): 最後尾文字置換用の関数を呼びます、差分は\(diff)")
|
||||
let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: (diff.deleted, diff.addedCount), previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection)
|
||||
let result = converter.kana2lattice_changed(inputData, N_best: N_best, counts: diff, previousResult: (inputData: previousInputData, lattice: self.lattice), needTypoCorrection: needTypoCorrection)
|
||||
self.previousInputData = inputData
|
||||
return result
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ extension KanaKanjiConverter {
|
||||
return result.map {[Candidate(
|
||||
text: $0,
|
||||
value: -15,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)]
|
||||
)]} ?? []
|
||||
@ -116,7 +116,7 @@ extension KanaKanjiConverter {
|
||||
Candidate(
|
||||
text: $0,
|
||||
value: -18,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.年.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.一般名詞.cid, mid: MIDData.年.mid, value: -18)]
|
||||
)
|
||||
@ -125,7 +125,7 @@ extension KanaKanjiConverter {
|
||||
Candidate(
|
||||
text: $0,
|
||||
value: -19,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.年.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.一般名詞.cid, mid: MIDData.年.mid, value: -19)]
|
||||
)
|
||||
|
@ -38,7 +38,7 @@ extension KanaKanjiConverter {
|
||||
let candidate = Candidate(
|
||||
text: result,
|
||||
value: -10,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: result, ruby: ruby, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)]
|
||||
)
|
||||
|
@ -46,7 +46,7 @@ extension KanaKanjiConverter {
|
||||
Candidate(
|
||||
text: address,
|
||||
value: baseValue - PValue(i),
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: address, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: baseValue - PValue(i))]
|
||||
)
|
||||
|
@ -37,7 +37,7 @@ extension KanaKanjiConverter {
|
||||
Candidate(
|
||||
text: $0,
|
||||
value: -15,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: $0, ruby: string, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -15)]
|
||||
)
|
||||
|
@ -17,7 +17,7 @@ extension KanaKanjiConverter {
|
||||
let candidate = Candidate(
|
||||
text: timeExpression,
|
||||
value: -10,
|
||||
correspondingCount: numberString.count,
|
||||
composingCount: .surfaceCount(numberString.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)]
|
||||
)
|
||||
@ -31,7 +31,7 @@ extension KanaKanjiConverter {
|
||||
let candidate = Candidate(
|
||||
text: timeExpression,
|
||||
value: -10,
|
||||
correspondingCount: numberString.count,
|
||||
composingCount: .surfaceCount(numberString.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)]
|
||||
)
|
||||
|
@ -22,7 +22,7 @@ extension KanaKanjiConverter {
|
||||
Candidate(
|
||||
text: char,
|
||||
value: value0,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: char, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: value0)]
|
||||
)
|
||||
|
@ -20,7 +20,7 @@ extension KanaKanjiConverter {
|
||||
return [Candidate(
|
||||
text: versionString,
|
||||
value: -30,
|
||||
correspondingCount: inputData.input.count,
|
||||
composingCount: .inputCount(inputData.input.count),
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: versionString, ruby: inputData.convertTarget.toKatakana(), cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -30)]
|
||||
)]
|
||||
|
@ -427,7 +427,7 @@ public final class DicdataStore {
|
||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
||||
return nil
|
||||
}
|
||||
let node = LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
|
||||
let node = LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
|
||||
node.prevs.append(RegisteredNode.BOSNode())
|
||||
return node
|
||||
}
|
||||
@ -437,7 +437,7 @@ public final class DicdataStore {
|
||||
guard let endIndex = stringToInfo[Array($0.ruby)]?.endIndex else {
|
||||
return nil
|
||||
}
|
||||
return LatticeNode(data: $0, inputRange: fromIndex ..< endIndex + 1)
|
||||
return LatticeNode(data: $0, range: .input(from: fromIndex, to: endIndex + 1))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
@ -341,7 +341,9 @@ public struct ComposingText: Sendable {
|
||||
/// 文頭の方を確定させる関数
|
||||
/// - parameters:
|
||||
/// - correspondingCount: `input`において対応する文字数
|
||||
public mutating func prefixComplete(correspondingCount: Int) {
|
||||
public mutating func prefixComplete(composingCount: ComposingCount) {
|
||||
switch composingCount {
|
||||
case .inputCount(let correspondingCount):
|
||||
let correspondingCount = min(correspondingCount, self.input.count)
|
||||
self.input.removeFirst(correspondingCount)
|
||||
// convetTargetを更新する
|
||||
@ -354,6 +356,23 @@ public struct ComposingText: Sendable {
|
||||
if self.convertTargetCursorPosition == 0 {
|
||||
self.convertTargetCursorPosition = self.convertTarget.count
|
||||
}
|
||||
case .surfaceCount(let correspondingCount):
|
||||
// 先頭correspondingCountを削除する操作に相当する
|
||||
// カーソルを移動する
|
||||
let prefix = self.convertTarget.prefix(correspondingCount)
|
||||
let index = self.forceGetInputCursorPosition(target: prefix)
|
||||
self.input = Array(self.input[index...])
|
||||
self.convertTarget = String(self.convertTarget.dropFirst(correspondingCount))
|
||||
self.convertTargetCursorPosition -= correspondingCount
|
||||
// もしも左端にカーソルが位置していたら、文頭に移動させる
|
||||
if self.convertTargetCursorPosition == 0 {
|
||||
self.convertTargetCursorPosition = self.convertTarget.count
|
||||
}
|
||||
|
||||
case .composite(let left, let right):
|
||||
self.prefixComplete(composingCount: left)
|
||||
self.prefixComplete(composingCount: right)
|
||||
}
|
||||
}
|
||||
|
||||
/// 現在のカーソル位置までの文字でComposingTextを作成し、返す
|
||||
@ -580,17 +599,20 @@ extension ComposingText.ConvertTargetElement: Equatable {}
|
||||
extension ComposingText {
|
||||
/// 2つの`ComposingText`のデータを比較し、差分を計算する。
|
||||
/// `convertTarget`との整合性をとるため、`convertTarget`に合わせた上で比較する
|
||||
func differenceSuffix(to previousData: ComposingText) -> (deleted: Int, addedCount: Int) {
|
||||
func differenceSuffix(to previousData: ComposingText) -> (deletedInput: Int, addedInput: Int, deletedSurface: Int, addedSurface: Int) {
|
||||
// k→か、sh→しゃ、のような場合、差分は全てx ... lastの範囲に現れるので、差分計算が問題なく動作する
|
||||
// かn → かんs、のような場合、「かんs、んs、s」のようなものは現れるが、「かん」が生成できない
|
||||
// 本質的にこれはポリシーの問題であり、「は|しゃ」の変換で「はし」が部分変換として現れないことと同根の問題である。
|
||||
// 解決のためには、inputの段階で「ん」をdirectで扱うべきである。
|
||||
|
||||
// 差分を計算する
|
||||
let common = self.input.commonPrefix(with: previousData.input)
|
||||
let deleted = previousData.input.count - common.count
|
||||
let added = self.input.dropFirst(common.count).count
|
||||
return (deleted, added)
|
||||
|
||||
let commonSurface = self.convertTarget.commonPrefix(with: previousData.convertTarget)
|
||||
let deletedSurface = previousData.convertTarget.count - commonSurface.count
|
||||
let addedSurface = self.convertTarget.suffix(from: commonSurface.startIndex).count
|
||||
return (deleted, added, deletedSurface, addedSurface)
|
||||
}
|
||||
|
||||
func inputHasSuffix(inputOf suffix: ComposingText) -> Bool {
|
||||
|
@ -14,19 +14,19 @@ final class ClauseDataUnitTests: XCTestCase {
|
||||
do {
|
||||
let unit1 = ClauseDataUnit()
|
||||
unit1.text = "僕が"
|
||||
unit1.inputRange = 0 ..< 3
|
||||
unit1.range = .input(from: 0, to: 3)
|
||||
unit1.mid = 0
|
||||
unit1.nextLcid = 0
|
||||
|
||||
let unit2 = ClauseDataUnit()
|
||||
unit2.text = "走る"
|
||||
unit2.inputRange = 3 ..< 6
|
||||
unit2.range = .input(from: 3, to: 6)
|
||||
unit2.mid = 1
|
||||
unit2.nextLcid = 1
|
||||
|
||||
unit1.merge(with: unit2)
|
||||
XCTAssertEqual(unit1.text, "僕が走る")
|
||||
XCTAssertEqual(unit1.inputRange, 0 ..< 6)
|
||||
XCTAssertEqual(unit1.range, .input(from: 0, to: 6))
|
||||
XCTAssertEqual(unit1.nextLcid, 1)
|
||||
XCTAssertEqual(unit1.mid, 0)
|
||||
}
|
||||
@ -34,19 +34,19 @@ final class ClauseDataUnitTests: XCTestCase {
|
||||
do {
|
||||
let unit1 = ClauseDataUnit()
|
||||
unit1.text = "君は"
|
||||
unit1.inputRange = 0 ..< 3
|
||||
unit1.range = .input(from: 0, to: 3)
|
||||
unit1.mid = 0
|
||||
unit1.nextLcid = 0
|
||||
|
||||
let unit2 = ClauseDataUnit()
|
||||
unit2.text = "笑った"
|
||||
unit2.inputRange = 3 ..< 7
|
||||
unit2.range = .input(from: 3, to: 7)
|
||||
unit2.mid = 3
|
||||
unit2.nextLcid = 3
|
||||
|
||||
unit1.merge(with: unit2)
|
||||
XCTAssertEqual(unit1.text, "君は笑った")
|
||||
XCTAssertEqual(unit1.inputRange, 0 ..< 7)
|
||||
XCTAssertEqual(unit1.range, .input(from: 0, to: 7))
|
||||
XCTAssertEqual(unit1.nextLcid, 3)
|
||||
XCTAssertEqual(unit1.mid, 0)
|
||||
}
|
||||
|
@ -202,8 +202,8 @@ final class ComposingTextTests: XCTestCase {
|
||||
var c2 = ComposingText()
|
||||
c2.insertAtCursorPosition("hasiru", inputStyle: .roman2kana)
|
||||
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).deleted, 0)
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).addedCount, 1)
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).deletedInput, 0)
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).addedInput, 1)
|
||||
}
|
||||
do {
|
||||
var c1 = ComposingText()
|
||||
@ -212,8 +212,8 @@ final class ComposingTextTests: XCTestCase {
|
||||
var c2 = ComposingText()
|
||||
c2.insertAtCursorPosition("tukatte", inputStyle: .roman2kana)
|
||||
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).deleted, 0)
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).addedCount, 1)
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).deletedInput, 0)
|
||||
XCTAssertEqual(c2.differenceSuffix(to: c1).addedInput, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ final class CandidateTests: XCTestCase {
|
||||
let candidate = Candidate(
|
||||
text: text,
|
||||
value: -40,
|
||||
correspondingCount: 4,
|
||||
composingCount: .inputCount(4),
|
||||
lastMid: 5,
|
||||
data: [DicdataElement(word: text, ruby: "サイコロ", cid: 0, mid: 5, value: -40)]
|
||||
)
|
||||
@ -27,7 +27,7 @@ final class CandidateTests: XCTestCase {
|
||||
print(candidate2.text)
|
||||
XCTAssertTrue(Set((1...3).map(String.init)).contains(candidate2.text))
|
||||
XCTAssertEqual(candidate.value, candidate2.value)
|
||||
XCTAssertEqual(candidate.correspondingCount, candidate2.correspondingCount)
|
||||
XCTAssertEqual(candidate.composingCount, candidate2.composingCount)
|
||||
XCTAssertEqual(candidate.lastMid, candidate2.lastMid)
|
||||
XCTAssertEqual(candidate.data, candidate2.data)
|
||||
XCTAssertEqual(candidate.actions, candidate2.actions)
|
||||
@ -38,7 +38,7 @@ final class CandidateTests: XCTestCase {
|
||||
let candidate = Candidate(
|
||||
text: text,
|
||||
value: 0,
|
||||
correspondingCount: 0,
|
||||
composingCount: .inputCount(0),
|
||||
lastMid: 0,
|
||||
data: [DicdataElement(word: text, ruby: "", cid: 0, mid: 0, value: 0)]
|
||||
)
|
||||
|
@ -88,7 +88,7 @@ final class LearningMemoryTests: XCTestCase {
|
||||
Candidate(
|
||||
text: element.word,
|
||||
value: element.value(),
|
||||
correspondingCount: 3,
|
||||
composingCount: .inputCount(3),
|
||||
lastMid: element.mid,
|
||||
data: [element]
|
||||
)
|
||||
@ -128,7 +128,7 @@ final class LearningMemoryTests: XCTestCase {
|
||||
Candidate(
|
||||
text: element.word,
|
||||
value: element.value(),
|
||||
correspondingCount: 3,
|
||||
composingCount: .inputCount(3),
|
||||
lastMid: element.mid,
|
||||
data: [element]
|
||||
)
|
||||
|
@ -12,16 +12,16 @@ import XCTest
|
||||
final class RegisteredNodeTests: XCTestCase {
|
||||
func testBOSNode() throws {
|
||||
let bos = RegisteredNode.BOSNode()
|
||||
XCTAssertEqual(bos.inputRange, 0..<0)
|
||||
XCTAssertEqual(bos.range, Lattice.LatticeRange.zero)
|
||||
XCTAssertNil(bos.prev)
|
||||
XCTAssertEqual(bos.totalValue, 0)
|
||||
XCTAssertEqual(bos.data.rcid, CIDData.BOS.cid)
|
||||
}
|
||||
|
||||
func testFromLastCandidate() throws {
|
||||
let candidate = Candidate(text: "我輩は猫", value: -20, correspondingCount: 7, lastMid: 100, data: [DicdataElement(word: "我輩は猫", ruby: "ワガハイハネコ", cid: CIDData.一般名詞.cid, mid: 100, value: -20)])
|
||||
let candidate = Candidate(text: "我輩は猫", value: -20, composingCount: .inputCount(7), lastMid: 100, data: [DicdataElement(word: "我輩は猫", ruby: "ワガハイハネコ", cid: CIDData.一般名詞.cid, mid: 100, value: -20)])
|
||||
let bos = RegisteredNode.fromLastCandidate(candidate)
|
||||
XCTAssertEqual(bos.inputRange, 0..<0)
|
||||
XCTAssertEqual(bos.range, Lattice.LatticeRange.zero)
|
||||
XCTAssertNil(bos.prev)
|
||||
XCTAssertEqual(bos.totalValue, 0)
|
||||
XCTAssertEqual(bos.data.rcid, CIDData.一般名詞.cid)
|
||||
@ -34,37 +34,37 @@ final class RegisteredNodeTests: XCTestCase {
|
||||
data: DicdataElement(word: "我輩", ruby: "ワガハイ", cid: CIDData.一般名詞.cid, mid: 1, value: -5),
|
||||
registered: bos,
|
||||
totalValue: -10,
|
||||
inputRange: 0..<4
|
||||
range: .input(from: 0, to: 4)
|
||||
)
|
||||
let node2 = RegisteredNode(
|
||||
data: DicdataElement(word: "は", ruby: "ハ", cid: CIDData.係助詞ハ.cid, mid: 2, value: -2),
|
||||
registered: node1,
|
||||
totalValue: -13,
|
||||
inputRange: 4..<5
|
||||
range: .input(from: 4, to: 5)
|
||||
)
|
||||
let node3 = RegisteredNode(
|
||||
data: DicdataElement(word: "猫", ruby: "ネコ", cid: CIDData.一般名詞.cid, mid: 3, value: -4),
|
||||
registered: node2,
|
||||
totalValue: -20,
|
||||
inputRange: 5..<7
|
||||
range: .input(from: 5, to: 7)
|
||||
)
|
||||
let node4 = RegisteredNode(
|
||||
data: DicdataElement(word: "です", ruby: "デス", cid: CIDData.助動詞デス基本形.cid, mid: 4, value: -3),
|
||||
registered: node3,
|
||||
totalValue: -25,
|
||||
inputRange: 7..<9
|
||||
range: .input(from: 7, to: 9)
|
||||
)
|
||||
let result = node4.getCandidateData()
|
||||
let clause1 = ClauseDataUnit()
|
||||
clause1.text = "我輩は"
|
||||
clause1.nextLcid = CIDData.一般名詞.cid
|
||||
clause1.inputRange = 0..<5
|
||||
clause1.range = .input(from: 0, to: 5)
|
||||
clause1.mid = 1
|
||||
|
||||
let clause2 = ClauseDataUnit()
|
||||
clause2.text = "猫です"
|
||||
clause2.nextLcid = CIDData.EOS.cid
|
||||
clause2.inputRange = 5..<9
|
||||
clause2.range = .input(from: 5, to: 9)
|
||||
clause2.mid = 3
|
||||
|
||||
let expectedResult: CandidateData = CandidateData(
|
||||
|
@ -290,7 +290,7 @@ final class DicdataStoreTests: XCTestCase {
|
||||
sequentialInput(&c, sequence: "tesutowaーdo", inputStyle: .roman2kana)
|
||||
let result = dicdataStore.getLOUDSDataInRange(inputData: c, from: 0, toIndexRange: c.input.endIndex - 1 ..< c.input.endIndex, needTypoCorrection: false)
|
||||
XCTAssertTrue(result.contains(where: {$0.data.word == "テストワード"}))
|
||||
XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.inputRange, 0 ..< 11)
|
||||
XCTAssertEqual(result.first(where: {$0.data.word == "テストワード"})?.range, .input(from: 0, to: 11))
|
||||
}
|
||||
|
||||
// 動的ユーザ辞書の単語が通常の辞書よりも優先されることのテスト
|
||||
|
Reference in New Issue
Block a user