add prediction candidate

This commit is contained in:
ensan-hcl
2023-09-19 22:06:48 +09:00
parent f163350c0b
commit abd896f649
4 changed files with 126 additions and 19 deletions

View File

@@ -72,13 +72,13 @@ public struct Candidate: Sendable {
///
public var text: String
///
public let value: PValue
public var value: PValue
/// composingText.input
public var correspondingCount: Int
/// mid()
public let lastMid: Int
public var lastMid: Int
/// DicdataElement
public let data: [DicdataElement]
public var data: [DicdataElement]
/// `action`
/// - note:
public var actions: [CompleteAction]

View File

@@ -595,15 +595,15 @@ import SwiftUtils
}
///
public func requestPredictionCandidates(leftSideCandidate: Candidate, options: ConvertRequestOptions) -> [Candidate] {
public func requestPredictionCandidates(leftSideCandidate: Candidate, options: ConvertRequestOptions) -> [PredictionCandidate] {
//
let zeroHintResults = self.converter.getZeroHintPredictionCandidates(preparts: [leftSideCandidate], N_best: 10)
//
// TODO: implement
let predictionResults = self.converter.getPredictionCandidates(prepart: leftSideCandidate, N_best: 10)
//
// TODO: implement
//
// TODO: implement
return zeroHintResults
return zeroHintResults.chained(predictionResults).max(count: 10, sortedBy: {$0.value < $1.value})
}
}

View File

@@ -67,6 +67,63 @@ struct Kana2Kanji {
)
}
func getPredictionCandidates(prepart: Candidate, N_best: Int) -> [PredictionCandidate] {
var result: [PredictionCandidate] = []
var count = 1
var prefixCandidate = prepart
prefixCandidate.actions = []
var prefixCandidateData = prepart.data
var totalWord = ""
var totalRuby = ""
var totalData: [DicdataElement] = []
while count <= min(prepart.data.count, 3), let element = prefixCandidateData.popLast() {
defer {
count += 1
}
// prefixCandidate
do {
prefixCandidate.value -= element.value()
prefixCandidate.value -= self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, element.lcid)
if DicdataStore.includeMMValueCalculation(element) {
let previousMid = prefixCandidateData.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
prefixCandidate.lastMid = previousMid
prefixCandidate.value -= self.dicdataStore.getMMValue(previousMid, element.mid)
}
prefixCandidate.data = prefixCandidateData
prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word }
prefixCandidate.correspondingCount = prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count }
}
totalWord.insert(contentsOf: element.word, at: totalWord.startIndex)
totalRuby.insert(contentsOf: element.ruby, at: totalRuby.startIndex)
totalData.insert(element, at: 0)
let dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: totalRuby).filter {$0.word.hasPrefix(totalWord)}
for data in dicdata {
let ccValue = self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, data.lcid)
let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(prefixCandidate.lastMid, data.mid):.zero
let wValue = data.value()
let newValue = prefixCandidate.value + mmValue + ccValue + wValue
// index
let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
if lastindex == N_best {
continue
}
//
if result.count >= N_best {
result.removeLast()
}
//
let text = String(data.word.dropFirst(totalWord.count))
result.insert(.replacement(.init(text: text, targetData: totalData, replacementData: [data], value: newValue)), at: lastindex)
}
}
return result
}
///
/// - parameters:
/// - preparts: Candidate
@@ -75,8 +132,8 @@ struct Kana2Kanji {
///
/// - note:
/// --
func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [Candidate] {
var result: [Candidate] = []
func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [PredictionCandidate] {
var result: [PredictionCandidate] = []
for candidate in preparts {
if let last = candidate.data.last {
let dicdata = self.dicdataStore.getZeroHintPredictionDicdata(lastRcid: last.rcid)
@@ -92,21 +149,11 @@ struct Kana2Kanji {
if lastindex == N_best {
continue
}
//
var nodedata = candidate.data
nodedata.append(data)
let candidate = Candidate(
text: data.word,
value: data.value(),
correspondingCount: data.ruby.count,
lastMid: data.mid,
data: [data]
)
//
if result.count >= N_best {
result.removeLast()
}
result.insert(candidate, at: lastindex)
result.insert(.additional(.init(text: data.word, data: [data], value: newValue)), at: lastindex)
}
}
}

View File

@@ -0,0 +1,60 @@
//
// PredictionCandidate.swift
//
//
// Created by miwa on 2023/09/19.
//
import Foundation
public enum PredictionCandidate: Sendable {
case additional(AdditionalPredictionCandidate)
case replacement(ReplacementPredictionCandidate)
public struct AdditionalPredictionCandidate: Sendable {
public var text: String
public var data: [DicdataElement]
public var value: PValue
}
public struct ReplacementPredictionCandidate: Sendable {
///
public var text: String
///
public var targetData: [DicdataElement]
///
public var replacementData: [DicdataElement]
///
public var value: PValue
}
public var value: PValue {
switch self {
case .additional(let c):
c.value
case .replacement(let c):
c.value
}
}
public func join(to candidate: consuming Candidate) -> Candidate {
switch self {
case .additional(let c):
for data in c.data {
candidate.text.append(contentsOf: data.word)
candidate.data.append(data)
}
candidate.value = c.value
candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count }
candidate.lastMid = c.data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? candidate.lastMid
return candidate
case .replacement(let c):
candidate.data.removeLast(c.targetData.count)
candidate.data.append(contentsOf: c.replacementData)
candidate.text = candidate.data.reduce(into: "") {$0 += $1.word}
candidate.value = c.value
candidate.lastMid = candidate.data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count }
return candidate
}
}
}