WIP: implementing zero query prediction

This commit is contained in:
ensan-hcl
2023-09-19 01:04:22 +09:00
parent 49d489172c
commit f163350c0b
3 changed files with 63 additions and 48 deletions

View File

@@ -414,15 +414,12 @@ import SwiftUtils
// 538
let best8 = getUniqueCandidate(sentence_candidates.chained(prediction_candidates)).sorted {$0.value > $1.value}
//
let zeroHintPrediction_candidates = converter.getZeroHintPredictionCandidates(preparts: best8, N_best: 3)
//
let toplevel_additional_candidate = self.getTopLevelAdditionalCandidate(inputData, options: options)
// best8foreign_candidateszeroHintPrediction_candidatestoplevel_additional_candidate5
let full_candidate = getUniqueCandidate(
best8
.chained(foreign_candidates)
.chained(zeroHintPrediction_candidates)
.chained(toplevel_additional_candidate)
).min(count: 5, sortedBy: {$0.value > $1.value})
//
@@ -570,6 +567,11 @@ import SwiftUtils
}
/// 2`Candidate`
public func mergeCandidates(_ left: Candidate, _ right: Candidate) -> Candidate {
converter.mergeCandidates(left, right)
}
///
/// - Parameters:
/// - inputData: InputData
@@ -591,4 +593,17 @@ import SwiftUtils
return self.processResult(inputData: inputData, result: result, options: options)
}
///
public func requestPredictionCandidates(leftSideCandidate: Candidate, options: ConvertRequestOptions) -> [Candidate] {
//
let zeroHintResults = self.converter.getZeroHintPredictionCandidates(preparts: [leftSideCandidate], N_best: 10)
//
// TODO: implement
//
// TODO: implement
//
// TODO: implement
return zeroHintResults
}
}

View File

@@ -29,7 +29,6 @@ public final class DicdataStore {
private var importedLoudses: Set<String> = []
private var charsID: [Character: UInt8] = [:]
private var learningManager = LearningManager()
private var zeroHintPredictionDicdata: [DicdataElement]?
private var osUserDict: [DicdataElement] = []
@@ -361,20 +360,15 @@ public final class DicdataStore {
}
}
func getZeroHintPredictionDicdata() -> [DicdataElement] {
if let dicdata = self.zeroHintPredictionDicdata {
return dicdata
}
func getZeroHintPredictionDicdata(lastRcid: Int) -> [DicdataElement] {
do {
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_null.csv", isDirectory: false), encoding: String.Encoding.utf8)
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/pc_\(lastRcid).csv", isDirectory: false), encoding: .utf8)
let csvLines = csvString.split(separator: "\n")
let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
let dicdata: [DicdataElement] = csvData.map {self.parseLoudstxt2FormattedEntry(from: $0)}
self.zeroHintPredictionDicdata = dicdata
return dicdata
} catch {
debug(error)
self.zeroHintPredictionDicdata = []
return []
}
}

View File

@@ -44,6 +44,29 @@ struct Kana2Kanji {
)
}
public func mergeCandidates(_ left: Candidate, _ right: Candidate) -> Candidate {
guard let leftLast = left.data.last, let rightFirst = right.data.first else {
return Candidate(
text: left.text + right.text,
value: left.value + right.value,
correspondingCount: left.correspondingCount + right.correspondingCount,
lastMid: right.lastMid,
data: left.data + right.data
)
}
let ccValue = self.dicdataStore.getCCValue(leftLast.lcid, rightFirst.lcid)
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(rightFirst)
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(left.lastMid, rightFirst.mid):.zero
let newValue = left.value + mmValue + ccValue + right.value
return Candidate(
text: left.text + right.text,
value: newValue,
correspondingCount: left.correspondingCount + right.correspondingCount,
lastMid: right.lastMid,
data: left.data + right.data
)
}
///
/// - parameters:
/// - preparts: Candidate
@@ -53,54 +76,37 @@ struct Kana2Kanji {
/// - note:
/// --
func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [Candidate] {
// let dicdata = self.dicdataStore.getZeroHintPredictionDicdata()
var result: [Candidate] = []
/*
result.reserveCapacity(N_best + 1)
preparts.forEach{candidate in
dicdata.forEach{data in
let ccValue = self.dicdataStore.getCCValue(candidate.rcid, data.lcid)
let isInposition = DicdataStore.isInposition(data)
let mmValue = isInposition ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):0.0
let wValue = data.value()
let newValue = candidate.value + mmValue + ccValue + wValue
//index
let lastindex = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
if lastindex >= N_best{
return
}
var nodedata = candidate.data
nodedata.append(data)
let candidate = Candidate(text: candidate.text + data.string, value: newValue, correspondingCount: candidate.correspondingCount, rcid: data.rcid, lastMid: isInposition ? data.mid:candidate.lastMid, data: nodedata)
result.insert(candidate, at: lastindex)
//
if result.count == N_best &+ 1{
result.removeLast()
}
}
}
*/
for candidate in preparts {
if let last = candidate.data.last {
let nexts = [(DicdataElement, Int)]()
for (data, count) in nexts where count > 1 {
let dicdata = self.dicdataStore.getZeroHintPredictionDicdata(lastRcid: last.rcid)
for data in dicdata {
let ccValue = self.dicdataStore.getCCValue(last.rcid, data.lcid)
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):.zero
let wValue = data.value()
let bonus = PValue(count * 1)
let newValue = candidate.value + mmValue + ccValue + wValue + bonus
let newValue = candidate.value + mmValue + ccValue + wValue
// index
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
if lastindex == N_best {
continue
}
//
var nodedata = candidate.data
nodedata.append(data)
let candidate = Candidate(
text: candidate.text + data.word,
value: newValue,
correspondingCount: candidate.correspondingCount,
lastMid: includeMMValueCalculation ? data.mid:candidate.lastMid,
data: nodedata
text: data.word,
value: data.value(),
correspondingCount: data.ruby.count,
lastMid: data.mid,
data: [data]
)
result.append(candidate)
//
if result.count >= N_best {
result.removeLast()
}
result.insert(candidate, at: lastindex)
}
}
}