mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 02:58:27 +00:00
Uniquify candidates
This commit is contained in:
@@ -116,6 +116,27 @@ import SwiftUtils
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/// 変換候補の重複を除去する関数。
|
||||
/// - Parameters:
|
||||
/// - candidates: uniqueを実行する候補列。
|
||||
/// - Returns:
|
||||
/// `candidates`から重複を削除したもの。
|
||||
private func getUniquePredictionCandidate(_ candidates: some Sequence<PredictionCandidate>, seenCandidates: Set<String> = []) -> [PredictionCandidate] {
|
||||
var result = [PredictionCandidate]()
|
||||
for candidate in candidates where !candidate.text.isEmpty && !seenCandidates.contains(candidate.text) {
|
||||
if let index = result.firstIndex(where: {$0.text == candidate.text}) {
|
||||
if result[index].value < candidate.value {
|
||||
result[index] = candidate
|
||||
}
|
||||
} else {
|
||||
result.append(candidate)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
/// 外国語への予測変換候補を生成する関数
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のデータ。
|
||||
@@ -596,10 +617,14 @@ import SwiftUtils
|
||||
|
||||
/// 変換確定後の予測変換候補を要求する関数
|
||||
public func requestPredictionCandidates(leftSideCandidate: Candidate, options: ConvertRequestOptions) -> [PredictionCandidate] {
|
||||
var seenCandidates: Set<String> = []
|
||||
// ゼロヒント予測変換に基づく候補を列挙
|
||||
let zeroHintResults = self.converter.getZeroHintPredictionCandidates(preparts: [leftSideCandidate], N_best: 10)
|
||||
let zeroHintResults = self.getUniquePredictionCandidate(self.converter.getZeroHintPredictionCandidates(preparts: [leftSideCandidate], N_best: 15))
|
||||
seenCandidates.formUnion(zeroHintResults.map{$0.text})
|
||||
// 予測変換に基づく候補を列挙
|
||||
let predictionResults = self.converter.getPredictionCandidates(prepart: leftSideCandidate, N_best: 10)
|
||||
let predictionResults = self.getUniquePredictionCandidate(self.converter.getPredictionCandidates(prepart: leftSideCandidate, N_best: 15), seenCandidates: seenCandidates)
|
||||
seenCandidates.formUnion(predictionResults.map{$0.text})
|
||||
|
||||
// 学習・ユーザ辞書に基づく候補を列挙
|
||||
// TODO: implement
|
||||
// 絵文字、記号類を列挙
|
||||
|
||||
@@ -43,120 +43,4 @@ struct Kana2Kanji {
|
||||
data: data.data
|
||||
)
|
||||
}
|
||||
|
||||
public func mergeCandidates(_ left: Candidate, _ right: Candidate) -> Candidate {
|
||||
guard let leftLast = left.data.last, let rightFirst = right.data.first else {
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: left.value + right.value,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
}
|
||||
let ccValue = self.dicdataStore.getCCValue(leftLast.lcid, rightFirst.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(rightFirst)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(left.lastMid, rightFirst.mid):.zero
|
||||
let newValue = left.value + mmValue + ccValue + right.value
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: newValue,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
}
|
||||
|
||||
func getPredictionCandidates(prepart: Candidate, N_best: Int) -> [PredictionCandidate] {
|
||||
var result: [PredictionCandidate] = []
|
||||
var count = 1
|
||||
var prefixCandidate = prepart
|
||||
prefixCandidate.actions = []
|
||||
var prefixCandidateData = prepart.data
|
||||
var totalWord = ""
|
||||
var totalRuby = ""
|
||||
var totalData: [DicdataElement] = []
|
||||
while count <= min(prepart.data.count, 3), let element = prefixCandidateData.popLast() {
|
||||
defer {
|
||||
count += 1
|
||||
}
|
||||
// prefixCandidateを更新する
|
||||
do {
|
||||
prefixCandidate.value -= element.value()
|
||||
prefixCandidate.value -= self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, element.lcid)
|
||||
if DicdataStore.includeMMValueCalculation(element) {
|
||||
let previousMid = prefixCandidateData.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
|
||||
prefixCandidate.lastMid = previousMid
|
||||
prefixCandidate.value -= self.dicdataStore.getMMValue(previousMid, element.mid)
|
||||
}
|
||||
prefixCandidate.data = prefixCandidateData
|
||||
|
||||
prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word }
|
||||
prefixCandidate.correspondingCount = prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count }
|
||||
}
|
||||
|
||||
|
||||
totalWord.insert(contentsOf: element.word, at: totalWord.startIndex)
|
||||
totalRuby.insert(contentsOf: element.ruby, at: totalRuby.startIndex)
|
||||
totalData.insert(element, at: 0)
|
||||
let dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: totalRuby).filter {$0.word.hasPrefix(totalWord)}
|
||||
|
||||
for data in dicdata {
|
||||
let ccValue = self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, data.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(prefixCandidate.lastMid, data.mid):.zero
|
||||
let wValue = data.value()
|
||||
let newValue = prefixCandidate.value + mmValue + ccValue + wValue
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if result.count >= N_best {
|
||||
result.removeLast()
|
||||
}
|
||||
// 共通接頭辞を切り落とす
|
||||
let text = String(data.word.dropFirst(totalWord.count))
|
||||
result.insert(.replacement(.init(text: text, targetData: totalData, replacementData: [data], value: newValue)), at: lastindex)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/// 入力がない状態から、妥当な候補を探す
|
||||
/// - parameters:
|
||||
/// - preparts: Candidate列。以前確定した候補など
|
||||
/// - N_best: 取得する候補数
|
||||
/// - returns:
|
||||
/// ゼロヒント予測変換の結果
|
||||
/// - note:
|
||||
/// 「食べちゃ-てる」「食べちゃ-いる」などの間抜けな候補を返すことが多いため、学習によるもの以外を無効化している。
|
||||
func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [PredictionCandidate] {
|
||||
var result: [PredictionCandidate] = []
|
||||
for candidate in preparts {
|
||||
if let last = candidate.data.last {
|
||||
let dicdata = self.dicdataStore.getZeroHintPredictionDicdata(lastRcid: last.rcid)
|
||||
for data in dicdata {
|
||||
let ccValue = self.dicdataStore.getCCValue(last.rcid, data.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):.zero
|
||||
let wValue = data.value()
|
||||
let newValue = candidate.value + mmValue + ccValue + wValue
|
||||
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if result.count >= N_best {
|
||||
result.removeLast()
|
||||
}
|
||||
result.insert(.additional(.init(text: data.word, data: [data], value: newValue)), at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
127
Sources/KanaKanjiConverterModule/Kana2Kanji/prediction.swift
Normal file
127
Sources/KanaKanjiConverterModule/Kana2Kanji/prediction.swift
Normal file
@@ -0,0 +1,127 @@
|
||||
//
|
||||
// prediction.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2023/09/19.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension Kana2Kanji {
|
||||
func mergeCandidates(_ left: Candidate, _ right: Candidate) -> Candidate {
|
||||
guard let leftLast = left.data.last, let rightFirst = right.data.first else {
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: left.value + right.value,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
}
|
||||
let ccValue = self.dicdataStore.getCCValue(leftLast.lcid, rightFirst.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(rightFirst)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(left.lastMid, rightFirst.mid):.zero
|
||||
let newValue = left.value + mmValue + ccValue + right.value
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: newValue,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
}
|
||||
|
||||
func getPredictionCandidates(prepart: Candidate, N_best: Int) -> [PredictionCandidate] {
|
||||
var result: [PredictionCandidate] = []
|
||||
var count = 1
|
||||
var prefixCandidate = prepart
|
||||
prefixCandidate.actions = []
|
||||
var prefixCandidateData = prepart.data
|
||||
var totalWord = ""
|
||||
var totalRuby = ""
|
||||
var totalData: [DicdataElement] = []
|
||||
while count <= min(prepart.data.count, 3), let element = prefixCandidateData.popLast() {
|
||||
defer {
|
||||
count += 1
|
||||
}
|
||||
// prefixCandidateを更新する
|
||||
do {
|
||||
prefixCandidate.value -= element.value()
|
||||
prefixCandidate.value -= self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, element.lcid)
|
||||
if DicdataStore.includeMMValueCalculation(element) {
|
||||
let previousMid = prefixCandidateData.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
|
||||
prefixCandidate.lastMid = previousMid
|
||||
prefixCandidate.value -= self.dicdataStore.getMMValue(previousMid, element.mid)
|
||||
}
|
||||
prefixCandidate.data = prefixCandidateData
|
||||
|
||||
prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word }
|
||||
prefixCandidate.correspondingCount = prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count }
|
||||
}
|
||||
|
||||
|
||||
totalWord.insert(contentsOf: element.word, at: totalWord.startIndex)
|
||||
totalRuby.insert(contentsOf: element.ruby, at: totalRuby.startIndex)
|
||||
totalData.insert(element, at: 0)
|
||||
let dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: totalRuby).filter {$0.word.hasPrefix(totalWord)}
|
||||
|
||||
for data in dicdata {
|
||||
let ccValue = self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, data.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(prefixCandidate.lastMid, data.mid):.zero
|
||||
let wValue = data.value()
|
||||
let newValue = prefixCandidate.value + mmValue + ccValue + wValue
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if result.count >= N_best {
|
||||
result.removeLast()
|
||||
}
|
||||
// 共通接頭辞を切り落とす
|
||||
let text = String(data.word.dropFirst(totalWord.count))
|
||||
result.insert(.replacement(.init(text: text, targetData: totalData, replacementData: [data], value: newValue)), at: lastindex)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/// 入力がない状態から、妥当な候補を探す
|
||||
/// - parameters:
|
||||
/// - preparts: Candidate列。以前確定した候補など
|
||||
/// - N_best: 取得する候補数
|
||||
/// - returns:
|
||||
/// ゼロヒント予測変換の結果
|
||||
/// - note:
|
||||
/// 「食べちゃ-てる」「食べちゃ-いる」などの間抜けな候補を返すことが多いため、学習によるもの以外を無効化している。
|
||||
func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [PredictionCandidate] {
|
||||
var result: [PredictionCandidate] = []
|
||||
for candidate in preparts {
|
||||
if let last = candidate.data.last {
|
||||
let dicdata = self.dicdataStore.getZeroHintPredictionDicdata(lastRcid: last.rcid)
|
||||
for data in dicdata {
|
||||
let ccValue = self.dicdataStore.getCCValue(last.rcid, data.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):.zero
|
||||
let wValue = data.value()
|
||||
let newValue = candidate.value + mmValue + ccValue + wValue
|
||||
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if result.count >= N_best {
|
||||
result.removeLast()
|
||||
}
|
||||
result.insert(.additional(.init(text: data.word, data: [data], value: newValue)), at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
@@ -7,16 +7,16 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum PredictionCandidate: Sendable {
|
||||
public enum PredictionCandidate: Sendable, Hashable {
|
||||
case additional(AdditionalPredictionCandidate)
|
||||
case replacement(ReplacementPredictionCandidate)
|
||||
|
||||
public struct AdditionalPredictionCandidate: Sendable {
|
||||
public struct AdditionalPredictionCandidate: Sendable, Hashable {
|
||||
public var text: String
|
||||
public var data: [DicdataElement]
|
||||
public var value: PValue
|
||||
}
|
||||
public struct ReplacementPredictionCandidate: Sendable {
|
||||
public struct ReplacementPredictionCandidate: Sendable, Hashable {
|
||||
/// 予測変換として表示するデータ
|
||||
public var text: String
|
||||
/// 置換対象のデータ
|
||||
|
||||
Reference in New Issue
Block a user