[cli] 完全一致変換のみを出力するモードを追加 (#88)

* cli: add option to report score

* cli: 完全一致変換をサポート
This commit is contained in:
Miwa / Ensan
2024-05-04 11:39:20 +09:00
committed by GitHub
parent 3cf83bc05b
commit c211bc36cd
3 changed files with 55 additions and 4 deletions

View File

@@ -15,6 +15,9 @@ extension Subcommands {
@Flag(name: [.customLong("disable_prediction")], help: "Disable producing prediction candidates.")
var disablePrediction = false
@Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).")
var onlyWholeConversion = false
@Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.")
var reportScore = false
@@ -25,19 +28,31 @@ extension Subcommands {
var composingText = ComposingText()
composingText.insertAtCursorPosition(input, inputStyle: .direct)
let result = converter.requestCandidates(composingText, options: requestOptions())
for candidate in result.mainResults.prefix(self.displayTopN) {
let mainResults = result.mainResults.filter {
!self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana()
}
for candidate in mainResults.prefix(self.displayTopN) {
if self.reportScore {
print("\(candidate.text) \(bold: "score:") \(candidate.value)")
} else {
print(candidate.text)
}
}
if self.onlyWholeConversion {
// entropy
let expValues = mainResults.map { exp(Double($0.value)) }
let sumOfExpValues = expValues.reduce(into: 0, +=)
//
let probs = expValues.map { $0 / sumOfExpValues }
let entropy = -probs.reduce(into: 0) { $0 += $1 * log($1) }
print("\(bold: "Entropy:") \(entropy)")
}
}
func requestOptions() -> ConvertRequestOptions {
.withDefaultDictionary(
N_best: configNBest,
requireJapanesePrediction: !disablePrediction,
var option: ConvertRequestOptions = .withDefaultDictionary(
N_best: self.onlyWholeConversion ? max(self.configNBest, self.displayTopN) : self.configNBest,
requireJapanesePrediction: !self.onlyWholeConversion && !self.disablePrediction,
requireEnglishPrediction: false,
keyboardLanguage: .ja_JP,
typographyLetterCandidate: false,
@@ -52,6 +67,10 @@ extension Subcommands {
sharedContainerURL: URL(fileURLWithPath: ""),
metadata: .init(appVersionString: "anco")
)
if self.onlyWholeConversion {
option.requestQuery = .
}
return option
}
}
}

View File

@@ -49,6 +49,26 @@ public struct ConvertRequestOptions: Sendable {
self.dictionaryResourceURL = dictionaryResourceURL
}
package init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer = TextReplacer(), metadata: ConvertRequestOptions.Metadata, requestQuery: RequestQuery) {
self.N_best = N_best
self.requireJapanesePrediction = requireJapanesePrediction
self.requireEnglishPrediction = requireEnglishPrediction
self.keyboardLanguage = keyboardLanguage
self.typographyLetterCandidate = typographyLetterCandidate
self.unicodeCandidate = unicodeCandidate
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
self.fullWidthRomanCandidate = fullWidthRomanCandidate
self.halfWidthKanaCandidate = halfWidthKanaCandidate
self.learningType = learningType
self.maxMemoryCount = maxMemoryCount
self.shouldResetMemory = shouldResetMemory
self.memoryDirectoryURL = memoryDirectoryURL
self.sharedContainerURL = sharedContainerURL
self.metadata = metadata
self.textReplacer = textReplacer
self.dictionaryResourceURL = dictionaryResourceURL
}
public var N_best: Int
public var requireJapanesePrediction: Bool
public var requireEnglishPrediction: Bool
@@ -71,6 +91,9 @@ public struct ConvertRequestOptions: Sendable {
//
public var metadata: Metadata
// MARK: API
package var requestQuery: RequestQuery = .default
static var `default`: Self {
Self(
N_best: 10,
@@ -103,4 +126,9 @@ public struct ConvertRequestOptions: Sendable {
}
var appVersionString: String
}
package enum RequestQuery: Sendable {
case `default`
case
}
}

View File

@@ -428,6 +428,10 @@ import SwiftUtils
let sums: [(CandidateData, Candidate)] = clauseResult.map {($0, converter.processClauseCandidate($0))}
// 5
let whole_sentence_unique_candidates = self.getUniqueCandidate(sums.map {$0.1})
if case . = options.requestQuery {
// return
return ConversionResult(mainResults: whole_sentence_unique_candidates.sorted(by: {$0.value > $1.value}), firstClauseResults: [])
}
let sentence_candidates = whole_sentence_unique_candidates.min(count: 5, sortedBy: {$0.value > $1.value})
// 3
let prediction_candidates: [Candidate] = options.requireJapanesePrediction ? Array(self.getUniqueCandidate(self.getPredictionCandidate(sums, composingText: inputData, options: options)).min(count: 3, sortedBy: {$0.value > $1.value})) : []