Files
AzooKeyKanaKanjiConverter/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Core/FullInputProcessingWithPrefixConstraint.swift
2025-07-09 00:01:25 +09:00

104 lines
6.1 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import Foundation
import SwiftUtils
extension Kana2Kanji {
/// ,
/// - Parameters:
/// - inputData:
/// - N_best: N_best
/// - Returns:
///
/// ###
/// (0)
///
/// (1)
///
/// (2)(1)registerN_best
///
/// (3)(1)registerresultEOS
///
/// (4)
func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, lattice: Lattice) {
debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)")
let count: Int = inputData.input.count
let result: LatticeNode = LatticeNode.EOSNode
let lattice: Lattice = Lattice(nodes: (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)})
// inodes
for (i, nodeArray) in lattice.nodes.enumerated() {
// node
for node in nodeArray {
if node.prevs.isEmpty {
continue
}
//
let wValue: PValue = node.data.value()
if i == 0 {
// values
node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
} else {
// values
node.values = node.prevs.map {$0.totalValue + wValue}
}
//
let nextIndex: Int = node.inputRange.endIndex
// count
if nextIndex == count {
for index in node.prevs.indices {
let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
//
if node.data.metadata.isDisjoint(with: [.isLearned, .isFromUserDictionary]) {
let utf8Text = newnode.getCandidateData().data.reduce(into: []) { $0.append(contentsOf: $1.word.utf8)} + node.data.word.utf8
//
let condition = (!constraint.hasEOS && utf8Text.hasPrefix(constraint.constraint)) || (constraint.hasEOS && utf8Text == constraint.constraint)
guard condition else {
continue
}
}
result.prevs.append(newnode)
}
} else {
let candidates: [[String.UTF8View.Element]] = node.getCandidateData().map {
Array(($0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word).utf8)
}
// nodenextnode
for nextnode in lattice[inputIndex: nextIndex] {
//
let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
// nodeprevnode
for (index, value) in node.values.enumerated() {
//
// common prefix
// AB ABC (OK)
// AB A (OK)
// AB AC (NG)
//
if nextnode.data.metadata.isDisjoint(with: [.isLearned, .isFromUserDictionary]) {
let utf8Text = candidates[index] + nextnode.data.word.utf8
let condition = (!constraint.hasEOS && (utf8Text.hasPrefix(constraint.constraint) || constraint.constraint.hasPrefix(utf8Text))) || (constraint.hasEOS && utf8Text.count < constraint.constraint.count && constraint.constraint.hasPrefix(utf8Text))
guard condition else {
continue
}
}
let newValue: PValue = ccValue + value
// index
let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
if lastindex == N_best {
continue
}
let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
//
if nextnode.prevs.count >= N_best {
nextnode.prevs.removeLast()
}
// removeinsert (insertO(N))
nextnode.prevs.insert(newnode, at: lastindex)
}
}
}
}
}
return (result: result, lattice: lattice)
}
}