mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 11:08:33 +00:00
[Experimental] Zenzai (#92)
* experimental rinna integration * Update impl * update * Bump swift-actions/setup-swift from 1 to 2 Bumps [swift-actions/setup-swift](https://github.com/swift-actions/setup-swift) from 1 to 2. - [Release notes](https://github.com/swift-actions/setup-swift/releases) - [Commits](https://github.com/swift-actions/setup-swift/compare/v1...v2) --- updated-dependencies: - dependency-name: swift-actions/setup-swift dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> * change test * change impl * take gpt2 weight as option * don't use async * support status check * enhance error * avoid percent encode * update * GPT-2 based kana-kanji conversion is now perfectly workinggit statusgit status * fix a bug * Rename gpt2/llama -> zenz * cleanup * internal apiを綺麗にした * cleanup experimental commands * update * partially support incremental input using cache * fix names * fix bug * support roman2kana * cleanup * fix minor bugs * improve logic * fix minor bug * fix minor bug * fix minor bug * optimize * optimize performance * Optimize cache hit * cli: add anco session command * fix cache hit bugs * improve session commands * maybe this will work better for incremental input environment * speed up zenzai by using n_best alternatives * update zenz context * adding no_typo api * add inference limit * fix bug * reset install_cli * make package buildable -- but llama.cpp features just do not work at this point because metal is not preprocessed * add proper availability checks * change macOS minimum version * fix several problems * code cleanup * enable ubuntu build * fix build error * fix ubuntu build * fix borrowing * update install_cli.sh --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
102
Sources/CliTool/Subcommands/SessionCommand.swift
Normal file
102
Sources/CliTool/Subcommands/SessionCommand.swift
Normal file
@@ -0,0 +1,102 @@
|
||||
import KanaKanjiConverterModuleWithDefaultDictionary
|
||||
import ArgumentParser
|
||||
import Foundation
|
||||
|
||||
extension Subcommands {
|
||||
struct Session: AsyncParsableCommand {
|
||||
@Argument(help: "ひらがなで表記された入力")
|
||||
var input: String = ""
|
||||
|
||||
@Option(name: [.customLong("config_n_best")], help: "The parameter n (n best parameter) for internal viterbi search.")
|
||||
var configNBest: Int = 10
|
||||
@Option(name: [.customShort("n"), .customLong("top_n")], help: "Display top n candidates.")
|
||||
var displayTopN: Int = 1
|
||||
@Option(name: [.customLong("zenz")], help: "gguf format model weight for zenz.")
|
||||
var zenzWeightPath: String = ""
|
||||
@Flag(name: [.customLong("disable_prediction")], help: "Disable producing prediction candidates.")
|
||||
var disablePrediction = false
|
||||
@Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).")
|
||||
var onlyWholeConversion = false
|
||||
@Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.")
|
||||
var reportScore = false
|
||||
@Flag(name: [.customLong("roman2kana")], help: "Use roman2kana input.")
|
||||
var roman2kana = false
|
||||
@Option(name: [.customLong("config_zenzai_inference_limit")], help: "inference limit for zenzai.")
|
||||
var configZenzaiInferenceLimit: Int = .max
|
||||
|
||||
|
||||
static var configuration = CommandConfiguration(commandName: "session", abstract: "Start session for incremental input.")
|
||||
|
||||
@MainActor mutating func run() async {
|
||||
let converter = KanaKanjiConverter()
|
||||
var composingText = ComposingText()
|
||||
let inputStyle: InputStyle = self.roman2kana ? .roman2kana : .direct
|
||||
while true {
|
||||
print()
|
||||
print("\(bold: "== type :q to end session, type :d to delete character, type :c to stop composition, type any other text to input ==")")
|
||||
let input = readLine(strippingNewline: true) ?? ""
|
||||
switch input {
|
||||
case ":q": return
|
||||
case ":d":
|
||||
composingText.deleteBackwardFromCursorPosition(count: 1)
|
||||
case ":c":
|
||||
composingText.stopComposition()
|
||||
converter.stopComposition()
|
||||
print("composition is stopped")
|
||||
continue
|
||||
default:
|
||||
composingText.insertAtCursorPosition(input, inputStyle: inputStyle)
|
||||
}
|
||||
print(composingText.convertTarget)
|
||||
let start = Date()
|
||||
let result = converter.requestCandidates(composingText, options: requestOptions())
|
||||
let mainResults = result.mainResults.filter {
|
||||
!self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana()
|
||||
}
|
||||
for candidate in mainResults.prefix(self.displayTopN) {
|
||||
if self.reportScore {
|
||||
print("\(candidate.text) \(bold: "score:") \(candidate.value)")
|
||||
} else {
|
||||
print(candidate.text)
|
||||
}
|
||||
}
|
||||
if self.onlyWholeConversion {
|
||||
// entropyを示す
|
||||
let mean = mainResults.reduce(into: 0) { $0 += Double($1.value) } / Double(mainResults.count)
|
||||
let expValues = mainResults.map { exp(Double($0.value) - mean) }
|
||||
let sumOfExpValues = expValues.reduce(into: 0, +=)
|
||||
// 確率値に補正
|
||||
let probs = mainResults.map { exp(Double($0.value) - mean) / sumOfExpValues }
|
||||
let entropy = -probs.reduce(into: 0) { $0 += $1 * log($1) }
|
||||
print("\(bold: "Entropy:") \(entropy)")
|
||||
}
|
||||
print("\(bold: "Time:") \(-start.timeIntervalSinceNow)")
|
||||
}
|
||||
}
|
||||
|
||||
func requestOptions() -> ConvertRequestOptions {
|
||||
var option: ConvertRequestOptions = .withDefaultDictionary(
|
||||
N_best: self.onlyWholeConversion ? max(self.configNBest, self.displayTopN) : self.configNBest,
|
||||
requireJapanesePrediction: !self.onlyWholeConversion && !self.disablePrediction,
|
||||
requireEnglishPrediction: false,
|
||||
keyboardLanguage: .ja_JP,
|
||||
typographyLetterCandidate: false,
|
||||
unicodeCandidate: true,
|
||||
englishCandidateInRoman2KanaInput: true,
|
||||
fullWidthRomanCandidate: false,
|
||||
halfWidthKanaCandidate: false,
|
||||
learningType: .nothing,
|
||||
maxMemoryCount: 0,
|
||||
shouldResetMemory: false,
|
||||
memoryDirectoryURL: URL(fileURLWithPath: ""),
|
||||
sharedContainerURL: URL(fileURLWithPath: ""),
|
||||
zenzaiMode: self.zenzWeightPath.isEmpty ? .off : .on(weight: URL(string: self.zenzWeightPath)!, inferenceLimit: self.configZenzaiInferenceLimit),
|
||||
metadata: .init(versionString: "anco for debugging")
|
||||
)
|
||||
if self.onlyWholeConversion {
|
||||
option.requestQuery = .完全一致
|
||||
}
|
||||
return option
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user