mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 02:58:27 +00:00
WIP: implementing zero query prediction
This commit is contained in:
@@ -414,15 +414,12 @@ import SwiftUtils
|
||||
|
||||
// 文全体変換5件と予測変換3件を混ぜてベスト8を出す
|
||||
let best8 = getUniqueCandidate(sentence_candidates.chained(prediction_candidates)).sorted {$0.value > $1.value}
|
||||
// ゼロヒント予測変換
|
||||
let zeroHintPrediction_candidates = converter.getZeroHintPredictionCandidates(preparts: best8, N_best: 3)
|
||||
// その他のトップレベル変換(先頭に表示されうる変換候補)
|
||||
let toplevel_additional_candidate = self.getTopLevelAdditionalCandidate(inputData, options: options)
|
||||
// best8、foreign_candidates、zeroHintPrediction_candidates、toplevel_additional_candidateを混ぜて上位5件を取得する
|
||||
let full_candidate = getUniqueCandidate(
|
||||
best8
|
||||
.chained(foreign_candidates)
|
||||
.chained(zeroHintPrediction_candidates)
|
||||
.chained(toplevel_additional_candidate)
|
||||
).min(count: 5, sortedBy: {$0.value > $1.value})
|
||||
// 重複のない変換候補を作成するための集合
|
||||
@@ -570,6 +567,11 @@ import SwiftUtils
|
||||
|
||||
}
|
||||
|
||||
/// 2つの連続する`Candidate`をマージする
|
||||
public func mergeCandidates(_ left: Candidate, _ right: Candidate) -> Candidate {
|
||||
converter.mergeCandidates(left, right)
|
||||
}
|
||||
|
||||
/// 外部から呼ばれる変換候補を要求する関数。
|
||||
/// - Parameters:
|
||||
/// - inputData: 変換対象のInputData。
|
||||
@@ -591,4 +593,17 @@ import SwiftUtils
|
||||
|
||||
return self.processResult(inputData: inputData, result: result, options: options)
|
||||
}
|
||||
|
||||
/// 変換確定後の予測変換候補を要求する関数
|
||||
public func requestPredictionCandidates(leftSideCandidate: Candidate, options: ConvertRequestOptions) -> [Candidate] {
|
||||
// ゼロヒント予測変換に基づく候補を列挙
|
||||
let zeroHintResults = self.converter.getZeroHintPredictionCandidates(preparts: [leftSideCandidate], N_best: 10)
|
||||
// 予測変換に基づく候補を列挙
|
||||
// TODO: implement
|
||||
// 学習・ユーザ辞書に基づく候補を列挙
|
||||
// TODO: implement
|
||||
// 絵文字、記号類を列挙
|
||||
// TODO: implement
|
||||
return zeroHintResults
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,6 @@ public final class DicdataStore {
|
||||
private var importedLoudses: Set<String> = []
|
||||
private var charsID: [Character: UInt8] = [:]
|
||||
private var learningManager = LearningManager()
|
||||
private var zeroHintPredictionDicdata: [DicdataElement]?
|
||||
|
||||
private var osUserDict: [DicdataElement] = []
|
||||
|
||||
@@ -361,20 +360,15 @@ public final class DicdataStore {
|
||||
}
|
||||
}
|
||||
|
||||
func getZeroHintPredictionDicdata() -> [DicdataElement] {
|
||||
if let dicdata = self.zeroHintPredictionDicdata {
|
||||
return dicdata
|
||||
}
|
||||
func getZeroHintPredictionDicdata(lastRcid: Int) -> [DicdataElement] {
|
||||
do {
|
||||
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_null.csv", isDirectory: false), encoding: String.Encoding.utf8)
|
||||
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/pc_\(lastRcid).csv", isDirectory: false), encoding: .utf8)
|
||||
let csvLines = csvString.split(separator: "\n")
|
||||
let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
|
||||
let dicdata: [DicdataElement] = csvData.map {self.parseLoudstxt2FormattedEntry(from: $0)}
|
||||
self.zeroHintPredictionDicdata = dicdata
|
||||
return dicdata
|
||||
} catch {
|
||||
debug(error)
|
||||
self.zeroHintPredictionDicdata = []
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,6 +44,29 @@ struct Kana2Kanji {
|
||||
)
|
||||
}
|
||||
|
||||
public func mergeCandidates(_ left: Candidate, _ right: Candidate) -> Candidate {
|
||||
guard let leftLast = left.data.last, let rightFirst = right.data.first else {
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: left.value + right.value,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
}
|
||||
let ccValue = self.dicdataStore.getCCValue(leftLast.lcid, rightFirst.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(rightFirst)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(left.lastMid, rightFirst.mid):.zero
|
||||
let newValue = left.value + mmValue + ccValue + right.value
|
||||
return Candidate(
|
||||
text: left.text + right.text,
|
||||
value: newValue,
|
||||
correspondingCount: left.correspondingCount + right.correspondingCount,
|
||||
lastMid: right.lastMid,
|
||||
data: left.data + right.data
|
||||
)
|
||||
}
|
||||
|
||||
/// 入力がない状態から、妥当な候補を探す
|
||||
/// - parameters:
|
||||
/// - preparts: Candidate列。以前確定した候補など
|
||||
@@ -53,54 +76,37 @@ struct Kana2Kanji {
|
||||
/// - note:
|
||||
/// 「食べちゃ-てる」「食べちゃ-いる」などの間抜けな候補を返すことが多いため、学習によるもの以外を無効化している。
|
||||
func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [Candidate] {
|
||||
// let dicdata = self.dicdataStore.getZeroHintPredictionDicdata()
|
||||
var result: [Candidate] = []
|
||||
/*
|
||||
result.reserveCapacity(N_best + 1)
|
||||
preparts.forEach{candidate in
|
||||
dicdata.forEach{data in
|
||||
let ccValue = self.dicdataStore.getCCValue(candidate.rcid, data.lcid)
|
||||
let isInposition = DicdataStore.isInposition(data)
|
||||
let mmValue = isInposition ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):0.0
|
||||
let wValue = data.value()
|
||||
let newValue = candidate.value + mmValue + ccValue + wValue
|
||||
//追加すべきindexを取得する
|
||||
let lastindex = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex >= N_best{
|
||||
return
|
||||
}
|
||||
var nodedata = candidate.data
|
||||
nodedata.append(data)
|
||||
|
||||
let candidate = Candidate(text: candidate.text + data.string, value: newValue, correspondingCount: candidate.correspondingCount, rcid: data.rcid, lastMid: isInposition ? data.mid:candidate.lastMid, data: nodedata)
|
||||
result.insert(candidate, at: lastindex)
|
||||
//カウントがオーバーしている場合は除去する
|
||||
if result.count == N_best &+ 1{
|
||||
result.removeLast()
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
for candidate in preparts {
|
||||
if let last = candidate.data.last {
|
||||
let nexts = [(DicdataElement, Int)]()
|
||||
for (data, count) in nexts where count > 1 {
|
||||
let dicdata = self.dicdataStore.getZeroHintPredictionDicdata(lastRcid: last.rcid)
|
||||
for data in dicdata {
|
||||
let ccValue = self.dicdataStore.getCCValue(last.rcid, data.lcid)
|
||||
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
|
||||
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):.zero
|
||||
let wValue = data.value()
|
||||
let bonus = PValue(count * 1)
|
||||
let newValue = candidate.value + mmValue + ccValue + wValue + bonus
|
||||
let newValue = candidate.value + mmValue + ccValue + wValue
|
||||
|
||||
// 追加すべきindexを取得する
|
||||
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
|
||||
if lastindex == N_best {
|
||||
continue
|
||||
}
|
||||
// データを作成する
|
||||
var nodedata = candidate.data
|
||||
nodedata.append(data)
|
||||
let candidate = Candidate(
|
||||
text: candidate.text + data.word,
|
||||
value: newValue,
|
||||
correspondingCount: candidate.correspondingCount,
|
||||
lastMid: includeMMValueCalculation ? data.mid:candidate.lastMid,
|
||||
data: nodedata
|
||||
text: data.word,
|
||||
value: data.value(),
|
||||
correspondingCount: data.ruby.count,
|
||||
lastMid: data.mid,
|
||||
data: [data]
|
||||
)
|
||||
result.append(candidate)
|
||||
// カウントがオーバーしている場合は除去する
|
||||
if result.count >= N_best {
|
||||
result.removeLast()
|
||||
}
|
||||
result.insert(candidate, at: lastindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user