[Experimental] Zenzai (#92)

* experimental rinna integration

* Update impl

* update

* Bump swift-actions/setup-swift from 1 to 2

Bumps [swift-actions/setup-swift](https://github.com/swift-actions/setup-swift) from 1 to 2.
- [Release notes](https://github.com/swift-actions/setup-swift/releases)
- [Commits](https://github.com/swift-actions/setup-swift/compare/v1...v2)

---
updated-dependencies:
- dependency-name: swift-actions/setup-swift
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* change test

* change impl

* take gpt2 weight as option

* don't use async

* support status check

* enhance error

* avoid percent encode

* update

* GPT-2 based kana-kanji conversion is now perfectly workinggit statusgit status

* fix a bug

* Rename gpt2/llama -> zenz

* cleanup

* internal apiを綺麗にした

* cleanup experimental commands

* update

* partially support incremental input using cache

* fix names

* fix bug

* support roman2kana

* cleanup

* fix minor bugs

* improve logic

* fix minor bug

* fix minor bug

* fix minor bug

* optimize

* optimize performance

* Optimize cache hit

* cli: add anco session command

* fix cache hit bugs

* improve session commands

* maybe this will work better for incremental input environment

* speed up zenzai by using n_best alternatives

* update zenz context

* adding no_typo api

* add inference limit

* fix bug

* reset install_cli

* make package buildable -- but llama.cpp features just do not work at this point because metal is not preprocessed

* add proper availability checks

* change macOS minimum version

* fix several problems

* code cleanup

* enable ubuntu build

* fix build error

* fix ubuntu build

* fix borrowing

* update install_cli.sh

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
Miwa
2024-05-15 01:36:45 +09:00
committed by GitHub
parent c4aa3eee76
commit 55ffe3c708
20 changed files with 942 additions and 108 deletions

View File

@ -19,8 +19,8 @@ final class ConverterTests: XCTestCase {
func requestOptions() -> ConvertRequestOptions {
.withDefaultDictionary(
N_best: 5,
requireJapanesePrediction: true,
N_best: 10,
requireJapanesePrediction: false,
requireEnglishPrediction: false,
keyboardLanguage: .ja_JP,
typographyLetterCandidate: false,
@ -38,46 +38,41 @@ final class ConverterTests: XCTestCase {
}
func testFullConversion() async throws {
await MainActor.run {
do {
let converter = KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition("あずーきーはしんじだいのきーぼーどあぷりです", inputStyle: .direct)
let results = converter.requestCandidates(c, options: requestOptions())
XCTAssertEqual(results.mainResults.first?.text, "azooKeyは新時代のキーボードアプリです")
}
do {
let converter = KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition("ようしょうきからてにすすいえいやきゅうしょうりんじけんぽうなどさまざまなすぽーつをけいけんしながらそだちしょうがっこうじだいはろさんぜるすきんこうにたいざいしておりごるふやてにすをならっていた", inputStyle: .direct)
let results = converter.requestCandidates(c, options: requestOptions())
do {
let converter = await KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition("あずーきーはしんじだいのきーぼーどあぷりです", inputStyle: .direct)
let results = await converter.requestCandidates(c, options: requestOptions())
XCTAssertEqual(results.mainResults.first?.text, "azooKeyは新時代のキーボードアプリです")
}
do {
let converter = await KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition("ようしょうきからてにすすいえいやきゅうしょうりんじけんぽうなどさまざまなすぽーつをけいけんしながらそだちしょうがっこうじだいはろさんぜるすきんこうにたいざいしておりごるふやてにすをならっていた", inputStyle: .direct)
let results = await converter.requestCandidates(c, options: requestOptions())
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
}
}
// 1
// memo:
func testGradualConversion() async throws {
let converter = await KanaKanjiConverter()
var c = ComposingText()
let text = "ようしょうきからてにすすいえいやきゅうしょうりんじけんぽうなどさまざまなすぽーつをけいけんしながらそだちしょうがっこうじだいはろさんぜるすきんこうにたいざいしておりごるふやてにすをならっていた"
for char in text {
c.insertAtCursorPosition(String(char), inputStyle: .direct)
let results = await converter.requestCandidates(c, options: requestOptions())
if c.input.count == text.count {
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
}
}
}
// 1
// memo:
func testGradualConversion() async throws {
await MainActor.run {
let converter = KanaKanjiConverter()
var c = ComposingText()
let text = "ようしょうきからてにすすいえいやきゅうしょうりんじけんぽうなどさまざまなすぽーつをけいけんしながらそだちしょうがっこうじだいはろさんぜるすきんこうにたいざいしておりごるふやてにすをならっていた"
for char in text {
c.insertAtCursorPosition(String(char), inputStyle: .direct)
let results = converter.requestCandidates(c, options: requestOptions())
if c.input.count == text.count {
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
}
}
}
}
// 1
// memo:
func testRoman2KanaGradualConversion() async throws {
await MainActor.run {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
let text = "youshoukikaratenisusuieiyakyuushourinjikenpounadosamazamanasupoーtuwokeikennsinagarasodatishougakkouzidaiharosanzerusukinkounitaizaisiteorigoruhuyatenisuwonaratteita"
//
@ -87,19 +82,17 @@ final class ConverterTests: XCTestCase {
]
for char in text {
c.insertAtCursorPosition(String(char), inputStyle: .roman2kana)
let results = converter.requestCandidates(c, options: requestOptions())
let results = await converter.requestCandidates(c, options: requestOptions())
if c.input.count == text.count {
XCTAssertTrue(possibles.contains(results.mainResults.first!.text))
}
}
}
}
// 2,3
// memo:
func testSemiGradualConversion() async throws {
await MainActor.run {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
let text = "ようしょうきからてにすすいえいやきゅうしょうりんじけんぽうなどさまざまなすぽーつをけいけんしながらそだちしょうがっこうじだいはろさんぜるすきんこうにたいざいしておりごるふやてにすをならっていた"
var leftIndex = text.startIndex
@ -110,44 +103,40 @@ final class ConverterTests: XCTestCase {
let rightIndex = text.index(leftIndex, offsetBy: count, limitedBy: text.endIndex) ?? text.endIndex
let prefix = String(text[leftIndex ..< rightIndex])
c.insertAtCursorPosition(prefix, inputStyle: .direct)
let results = converter.requestCandidates(c, options: requestOptions())
let results = await converter.requestCandidates(c, options: requestOptions())
leftIndex = rightIndex
if rightIndex == text.endIndex {
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
}
}
}
}
// 1
// memo: deleted_last_n
func testGradualConversionWithDelete() async throws {
await MainActor.run {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
let text = Array("ようしょうきからてにすすいえいやきゅうしょうりんじけんぽうなどさまざまなすぽーつをけいけんしながらそだちしょうがっこうじだいはろさんぜるすきんこうにたいざいしておりごるふやてにすをならっていた")
let deleteIndices = [1, 4, 8, 10, 15, 18, 20, 21, 23, 25, 26, 28, 29, 33, 34, 37, 39, 40, 42, 44, 45, 49, 51, 54, 58, 60, 62, 64, 67, 69, 70, 75, 80]
for (i, char) in text.enumerated() {
c.insertAtCursorPosition(String(char), inputStyle: .direct)
let results = converter.requestCandidates(c, options: requestOptions())
let results = await converter.requestCandidates(c, options: requestOptions())
if deleteIndices.contains(i) {
let count = i % 3 + 1
c.deleteBackwardFromCursorPosition(count: count)
_ = converter.requestCandidates(c, options: requestOptions())
_ = await converter.requestCandidates(c, options: requestOptions())
c.insertAtCursorPosition(String(text[i - count + 1 ... i]), inputStyle: .direct)
_ = converter.requestCandidates(c, options: requestOptions())
_ = await converter.requestCandidates(c, options: requestOptions())
}
if c.input.count == text.count {
XCTAssertEqual(results.mainResults.first?.text, "幼少期からテニス水泳野球少林寺拳法など様々なスポーツを経験しながら育ち小学校時代はロサンゼルス近郊に滞在しておりゴルフやテニスを習っていた")
}
}
}
}
//
func testMustCases() async throws {
await MainActor.run {
//
do {
let cases: [(input: String, expect: String)] = [
@ -162,19 +151,19 @@ final class ConverterTests: XCTestCase {
var options = requestOptions()
options.requireJapanesePrediction = false
for (input, expect) in cases {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
sequentialInput(&c, sequence: input, inputStyle: .direct)
let results = converter.requestCandidates(c, options: options)
let results = await converter.requestCandidates(c, options: options)
XCTAssertEqual(results.mainResults.first?.text, expect)
}
// gradual input
for (input, expect) in cases {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
for char in input {
c.insertAtCursorPosition(String(char), inputStyle: .direct)
let results = converter.requestCandidates(c, options: options)
let results = await converter.requestCandidates(c, options: options)
if c.input.count == input.count {
XCTAssertEqual(results.mainResults.first?.text, expect)
}
@ -193,33 +182,31 @@ final class ConverterTests: XCTestCase {
var options = requestOptions()
options.requireJapanesePrediction = false
for (input, expect) in cases {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
sequentialInput(&c, sequence: input, inputStyle: .roman2kana)
let results = converter.requestCandidates(c, options: options)
let results = await converter.requestCandidates(c, options: options)
XCTAssertEqual(results.mainResults.first?.text, expect)
}
// gradual input
for (input, expect) in cases {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
for char in input {
c.insertAtCursorPosition(String(char), inputStyle: .roman2kana)
let results = converter.requestCandidates(c, options: options)
let results = await converter.requestCandidates(c, options: options)
if c.input.count == input.count {
XCTAssertEqual(results.mainResults.first?.text, expect)
}
}
}
}
}
}
//
//
func testAccuracy() async throws {
await MainActor.run {
let cases: [(input: String, expect: [String])] = [
("3がつ8にち", ["3月8日"]),
("いっていのわりあい", ["一定の割合"]),
@ -275,10 +262,10 @@ final class ConverterTests: XCTestCase {
var score: Double = 0
for (input, expect) in cases {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition(input, inputStyle: .direct)
let results = converter.requestCandidates(c, options: requestOptions())
let results = await converter.requestCandidates(c, options: requestOptions())
if expect.contains(results.mainResults[0].text) {
score += 1
@ -291,14 +278,12 @@ final class ConverterTests: XCTestCase {
let accuracy = score / Double(cases.count)
print("\(#function) Result: accuracy \(accuracy), score \(score), count \(cases.count)")
XCTAssertGreaterThan(accuracy, 0.7) // 0.7 < acuracy
}
}
//
//
//
func testVerbalAccuracy() async throws {
await MainActor.run {
let cases: [(input: String, expect: [String])] = [
("うわああああ、まじか", ["うわああああ、マジか", "うわああああ、まじか"]),
("は?", ["は?"]),
@ -326,10 +311,10 @@ final class ConverterTests: XCTestCase {
var score: Double = 0
for (input, expect) in cases {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition(input, inputStyle: .direct)
let results = converter.requestCandidates(c, options: requestOptions())
let results = await converter.requestCandidates(c, options: requestOptions())
if expect.contains(results.mainResults[0].text) {
score += 1
@ -342,12 +327,10 @@ final class ConverterTests: XCTestCase {
let accuracy = score / Double(cases.count)
print("\(#function) Result: accuracy \(accuracy), score \(score), count \(cases.count)")
XCTAssertGreaterThan(accuracy, 0.7) // 0.7 < acuracy
}
}
/// MID
func testMeaningBasedConversionAccuracy() async throws {
await MainActor.run {
let cases: [(input: String, expect: String)] = [
("しょうぼう、しょうか、ほのお", "消防、消火、炎"),
("いえき、しょうか、こうそ", "胃液、消化、酵素"),
@ -627,12 +610,12 @@ final class ConverterTests: XCTestCase {
var score: Double = 0
for (input, expect) in cases {
let converter = KanaKanjiConverter()
let converter = await KanaKanjiConverter()
var c = ComposingText()
c.insertAtCursorPosition(input, inputStyle: .direct)
var options = requestOptions()
options.requireJapanesePrediction = false
let results = converter.requestCandidates(c, options: options)
let results = await converter.requestCandidates(c, options: options)
if results.mainResults[0].text == expect {
score += 1
@ -645,10 +628,9 @@ final class ConverterTests: XCTestCase {
let accuracy = score / Double(cases.count)
print("\(#function) Result: accuracy \(accuracy), score \(score), count \(cases.count)")
XCTAssertGreaterThan(accuracy, 0.7) // 0.7 < accuracy
}
}
#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS) || os(visionOS)
#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS) || os(visionOS)
func testMozcEvaluationData() async throws {
// URL
let urlString = "https://raw.githubusercontent.com/google/mozc/master/src/data/dictionary_oss/evaluation.tsv"
@ -729,7 +711,7 @@ final class ConverterTests: XCTestCase {
XCTAssertTrue(mozcScore < azooKeyScore)
}
}
#endif
#endif
enum MozcCommand: Equatable {
/// `arg`