Files
AzooKeyKanaKanjiConverter/Sources/KanaKanjiConverterModule/ConversionAlgorithms/Prediction.swift
2025-07-01 23:42:05 +09:00

109 lines
5.1 KiB
Swift

//
// mid_composition_prediction.swift
// AzooKeyKanaKanjiConverter
//
// Created by ensan on 2020/12/09.
// Copyright © 2020 ensan. All rights reserved.
//
import Foundation
import SwiftUtils
//
extension Kana2Kanji {
/// CandidateData
/// - parameters:
/// - prepart: CandidateDataprepart
/// - lastRuby:
///
/// - N_best:
/// - returns:
///
/// - note:
///
func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] {
debug("getPredictionCandidates", composingText, lastClause.inputRange, lastClause.text)
let lastRuby = ComposingText.getConvertTarget(for: composingText.input[lastClause.inputRange]).toKatakana()
let lastRubyCount = lastClause.inputRange.count
let datas: [DicdataElement]
do {
var _str = ""
let prestring: String = prepart.clauses.reduce(into: "") {$0.append(contentsOf: $1.clause.text)}
var count: Int = .zero
while true {
if prestring == _str {
break
}
_str += prepart.data[count].word
count += 1
}
datas = Array(prepart.data.prefix(count))
}
let osuserdict: [DicdataElement] = dicdataStore.getPrefixMatchDynamicUserDict(lastRuby)
let lastCandidate: Candidate = prepart.isEmpty ? Candidate(text: "", value: .zero, correspondingCount: 0, lastMid: MIDData.EOS.mid, data: []) : self.processClauseCandidate(prepart)
let lastRcid: Int = lastCandidate.data.last?.rcid ?? CIDData.EOS.cid
let nextLcid: Int = prepart.lastClause?.nextLcid ?? CIDData.EOS.cid
let lastMid: Int = lastCandidate.lastMid
let correspoindingCount: Int = lastCandidate.correspondingCount + lastRubyCount
let ignoreCCValue: PValue = self.dicdataStore.getCCValue(lastRcid, nextLcid)
let inputStyle = composingText.input.last?.inputStyle ?? .direct
let dicdata: [DicdataElement]
switch inputStyle {
case .direct:
dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: lastRuby)
case .roman2kana:
let roman = lastRuby.suffix(while: {String($0).onlyRomanAlphabet})
if !roman.isEmpty {
let ruby: Substring = lastRuby.dropLast(roman.count)
if ruby.isEmpty {
dicdata = []
break
}
let possibleNexts: [Substring] = DicdataStore.possibleNexts[String(roman), default: []].map {ruby + $0}
debug("getPredictionCandidates", lastRuby, ruby, roman, possibleNexts, prepart, lastRubyCount)
dicdata = possibleNexts.flatMap { self.dicdataStore.getPredictionLOUDSDicdata(key: $0) }
} else {
debug("getPredicitonCandidates", lastRuby, roman)
dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: lastRuby)
}
}
var result: [Candidate] = []
result.reserveCapacity(N_best &+ 1)
for data in (dicdata + osuserdict) {
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
let mmValue: PValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(lastMid, data.mid):.zero
let ccValue: PValue = self.dicdataStore.getCCValue(lastRcid, data.lcid)
let penalty: PValue = -PValue(data.ruby.count &- lastRuby.count) * 3.0 //
let wValue: PValue = data.value()
let newValue: PValue = lastCandidate.value + mmValue + ccValue + wValue + penalty - ignoreCCValue
// index
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
if lastindex >= N_best {
continue
}
var nodedata: [DicdataElement] = datas
nodedata.append(data)
let candidate: Candidate = Candidate(
text: lastCandidate.text + data.word,
value: newValue,
correspondingCount: correspoindingCount,
lastMid: includeMMValueCalculation ? data.mid:lastMid,
data: nodedata
)
//
if result.count >= N_best {
result.removeLast()
}
// removeinsert (insertO(N))
result.insert(candidate, at: lastindex)
}
return result
}
}