Files
AzooKeyKanaKanjiConverter/Sources/CliTool/Subcommands/SessionCommand.swift
Miwa 55ffe3c708 [Experimental] Zenzai (#92)
* experimental rinna integration

* Update impl

* update

* Bump swift-actions/setup-swift from 1 to 2

Bumps [swift-actions/setup-swift](https://github.com/swift-actions/setup-swift) from 1 to 2.
- [Release notes](https://github.com/swift-actions/setup-swift/releases)
- [Commits](https://github.com/swift-actions/setup-swift/compare/v1...v2)

---
updated-dependencies:
- dependency-name: swift-actions/setup-swift
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* change test

* change impl

* take gpt2 weight as option

* don't use async

* support status check

* enhance error

* avoid percent encode

* update

* GPT-2 based kana-kanji conversion is now perfectly workinggit statusgit status

* fix a bug

* Rename gpt2/llama -> zenz

* cleanup

* internal apiを綺麗にした

* cleanup experimental commands

* update

* partially support incremental input using cache

* fix names

* fix bug

* support roman2kana

* cleanup

* fix minor bugs

* improve logic

* fix minor bug

* fix minor bug

* fix minor bug

* optimize

* optimize performance

* Optimize cache hit

* cli: add anco session command

* fix cache hit bugs

* improve session commands

* maybe this will work better for incremental input environment

* speed up zenzai by using n_best alternatives

* update zenz context

* adding no_typo api

* add inference limit

* fix bug

* reset install_cli

* make package buildable -- but llama.cpp features just do not work at this point because metal is not preprocessed

* add proper availability checks

* change macOS minimum version

* fix several problems

* code cleanup

* enable ubuntu build

* fix build error

* fix ubuntu build

* fix borrowing

* update install_cli.sh

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-05-15 01:36:45 +09:00

103 lines
5.2 KiB
Swift

import KanaKanjiConverterModuleWithDefaultDictionary
import ArgumentParser
import Foundation
extension Subcommands {
struct Session: AsyncParsableCommand {
@Argument(help: "ひらがなで表記された入力")
var input: String = ""
@Option(name: [.customLong("config_n_best")], help: "The parameter n (n best parameter) for internal viterbi search.")
var configNBest: Int = 10
@Option(name: [.customShort("n"), .customLong("top_n")], help: "Display top n candidates.")
var displayTopN: Int = 1
@Option(name: [.customLong("zenz")], help: "gguf format model weight for zenz.")
var zenzWeightPath: String = ""
@Flag(name: [.customLong("disable_prediction")], help: "Disable producing prediction candidates.")
var disablePrediction = false
@Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).")
var onlyWholeConversion = false
@Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.")
var reportScore = false
@Flag(name: [.customLong("roman2kana")], help: "Use roman2kana input.")
var roman2kana = false
@Option(name: [.customLong("config_zenzai_inference_limit")], help: "inference limit for zenzai.")
var configZenzaiInferenceLimit: Int = .max
static var configuration = CommandConfiguration(commandName: "session", abstract: "Start session for incremental input.")
@MainActor mutating func run() async {
let converter = KanaKanjiConverter()
var composingText = ComposingText()
let inputStyle: InputStyle = self.roman2kana ? .roman2kana : .direct
while true {
print()
print("\(bold: "== type :q to end session, type :d to delete character, type :c to stop composition, type any other text to input ==")")
let input = readLine(strippingNewline: true) ?? ""
switch input {
case ":q": return
case ":d":
composingText.deleteBackwardFromCursorPosition(count: 1)
case ":c":
composingText.stopComposition()
converter.stopComposition()
print("composition is stopped")
continue
default:
composingText.insertAtCursorPosition(input, inputStyle: inputStyle)
}
print(composingText.convertTarget)
let start = Date()
let result = converter.requestCandidates(composingText, options: requestOptions())
let mainResults = result.mainResults.filter {
!self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana()
}
for candidate in mainResults.prefix(self.displayTopN) {
if self.reportScore {
print("\(candidate.text) \(bold: "score:") \(candidate.value)")
} else {
print(candidate.text)
}
}
if self.onlyWholeConversion {
// entropy
let mean = mainResults.reduce(into: 0) { $0 += Double($1.value) } / Double(mainResults.count)
let expValues = mainResults.map { exp(Double($0.value) - mean) }
let sumOfExpValues = expValues.reduce(into: 0, +=)
//
let probs = mainResults.map { exp(Double($0.value) - mean) / sumOfExpValues }
let entropy = -probs.reduce(into: 0) { $0 += $1 * log($1) }
print("\(bold: "Entropy:") \(entropy)")
}
print("\(bold: "Time:") \(-start.timeIntervalSinceNow)")
}
}
func requestOptions() -> ConvertRequestOptions {
var option: ConvertRequestOptions = .withDefaultDictionary(
N_best: self.onlyWholeConversion ? max(self.configNBest, self.displayTopN) : self.configNBest,
requireJapanesePrediction: !self.onlyWholeConversion && !self.disablePrediction,
requireEnglishPrediction: false,
keyboardLanguage: .ja_JP,
typographyLetterCandidate: false,
unicodeCandidate: true,
englishCandidateInRoman2KanaInput: true,
fullWidthRomanCandidate: false,
halfWidthKanaCandidate: false,
learningType: .nothing,
maxMemoryCount: 0,
shouldResetMemory: false,
memoryDirectoryURL: URL(fileURLWithPath: ""),
sharedContainerURL: URL(fileURLWithPath: ""),
zenzaiMode: self.zenzWeightPath.isEmpty ? .off : .on(weight: URL(string: self.zenzWeightPath)!, inferenceLimit: self.configZenzaiInferenceLimit),
metadata: .init(versionString: "anco for debugging")
)
if self.onlyWholeConversion {
option.requestQuery = .
}
return option
}
}
}