mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
Merge branch 'develop' into codex/modify-learningmanager.init-and-add-error-handling
This commit is contained in:
61
Docs/cli.md
61
Docs/cli.md
@ -5,13 +5,19 @@
|
||||
`anco`を利用するには、最初にinstallが必要です。`/usr/local/bin/`に`anco`が追加されます。
|
||||
|
||||
```bash
|
||||
sudo sh install_cli.sh
|
||||
./install_cli.sh
|
||||
```
|
||||
|
||||
Zenzaiを利用する場合は、`--zenzai`オプションを付けてください。
|
||||
|
||||
```bash
|
||||
sudo sh install_cli.sh --zenzai
|
||||
./install_cli.sh --zenzai
|
||||
```
|
||||
|
||||
デフォルトでは、ほとんどの情報は出力されません。デバッグモードで実行するには`--debug`オプションを付けてください。
|
||||
|
||||
```bash
|
||||
./install_cli.sh --debug
|
||||
```
|
||||
|
||||
例えば以下のように利用できます。
|
||||
@ -55,7 +61,7 @@ $ anco evaluate ./evaluation.tsv --config_n_best 1
|
||||
|
||||
出力はJSONフォーマットです。出力内容の安定が必要な場合`--stable`を指定することで比較的安定した出力を得られます。ただしスコアやエントロピーは辞書バージョンに依存します。
|
||||
|
||||
## 対話的実行
|
||||
## 対話的実行API
|
||||
|
||||
少しずつ入力を進めるような実用的な場面を模した環境として`anco session`コマンドが用意されています。
|
||||
|
||||
@ -67,6 +73,55 @@ $ anco session --roman2kana -n 10 --disable_prediction
|
||||
|
||||
キーを入力してEnterを押すと変換候補が表示されます。`:`で始まる特殊コマンドを利用することで、削除、確定、文脈の設定などの諸操作を行うことが出来ます。
|
||||
|
||||
### リプレイ
|
||||
|
||||
`--replay`を用いると、セッションの中での一連の動作を再現することができます。
|
||||
|
||||
```yaml
|
||||
anco session --roman2kana -n 10 --disable_prediction --replay history.txt
|
||||
```
|
||||
|
||||
`history.txt`は例えば以下のような内容が含まれます。
|
||||
|
||||
```
|
||||
a
|
||||
i
|
||||
u
|
||||
e
|
||||
e
|
||||
:del
|
||||
o
|
||||
:0
|
||||
```
|
||||
|
||||
現在実行中のセッションから`history.txt`を作成するには`:dump history.txt`と入力します。
|
||||
|
||||
### 学習機能のデバッグ
|
||||
学習機能のデバッグのため、セッションコマンドには複数の機能が用意されています。`--enable_memory`の状態では、デフォルトで学習が有効になり、一時ディレクトリに学習データが蓄積されます。
|
||||
|
||||
```bash
|
||||
$ anco session --roman2kana -n 10 --disable_prediction --enable_memory
|
||||
```
|
||||
|
||||
セーブを実施するには以下のように`:save`を入力します。
|
||||
|
||||
```txt
|
||||
rime
|
||||
:h
|
||||
:n
|
||||
:14
|
||||
:4
|
||||
:save
|
||||
```
|
||||
|
||||
すでに存在する学習データをread onlyで読み込むこともできます。
|
||||
|
||||
```bash
|
||||
$ anco session --roman2kana -n 10 --disable_prediction --readonly_memory ./memory
|
||||
```
|
||||
|
||||
この場合、`:save`コマンドは何も行いません。
|
||||
|
||||
## 辞書リーダ
|
||||
|
||||
`anco dict`コマンドを利用して辞書データを解析することが出来ます。
|
||||
|
@ -17,6 +17,8 @@ extension Subcommands {
|
||||
var disablePrediction = false
|
||||
@Flag(name: [.customLong("enable_memory")], help: "Enable memory.")
|
||||
var enableLearning = false
|
||||
@Option(name: [.customLong("readonly_memory")], help: "Enable readonly memory.")
|
||||
var readOnlyMemoryPath: String?
|
||||
@Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).")
|
||||
var onlyWholeConversion = false
|
||||
@Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.")
|
||||
@ -73,7 +75,19 @@ extension Subcommands {
|
||||
if !self.zenzWeightPath.isEmpty && (!self.zenzV1 && !self.zenzV2 && !self.zenzV3) {
|
||||
print("zenz version is not specified. By default, zenz-v3 will be used.")
|
||||
}
|
||||
let memoryDirectory = if self.enableLearning {
|
||||
let learningType: LearningType = if self.readOnlyMemoryPath != nil {
|
||||
// 読み取りのみ
|
||||
.onlyOutput
|
||||
} else if self.enableLearning {
|
||||
// 読み書き
|
||||
.inputAndOutput
|
||||
} else {
|
||||
// 読み書きなし
|
||||
.nothing
|
||||
}
|
||||
let memoryDirectory = if let readOnlyMemoryPath {
|
||||
URL(fileURLWithPath: readOnlyMemoryPath)
|
||||
} else if self.enableLearning {
|
||||
if let dir = self.getTemporaryDirectory() {
|
||||
dir
|
||||
} else {
|
||||
@ -82,8 +96,12 @@ extension Subcommands {
|
||||
} else {
|
||||
URL(fileURLWithPath: "")
|
||||
}
|
||||
print("Working with \(learningType) mode. Memory path is \(memoryDirectory).")
|
||||
|
||||
let converter = KanaKanjiConverter()
|
||||
converter.sendToDicdataStore(
|
||||
.setRequestOptions(requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: nil))
|
||||
)
|
||||
var composingText = ComposingText()
|
||||
let inputStyle: InputStyle = self.roman2kana ? .roman2kana : .direct
|
||||
var lastCandidates: [Candidate] = []
|
||||
@ -142,14 +160,18 @@ extension Subcommands {
|
||||
composingText.stopComposition()
|
||||
converter.stopComposition()
|
||||
converter.sendToDicdataStore(.closeKeyboard)
|
||||
if learningType.needUpdateMemory {
|
||||
print("saved")
|
||||
} else {
|
||||
print("anything should not be saved because the learning type is not for update memory")
|
||||
}
|
||||
continue
|
||||
case ":p", ":pred":
|
||||
// 次の文字の予測を取得する
|
||||
let results = converter.predictNextCharacter(
|
||||
leftSideContext: leftSideContext,
|
||||
count: 10,
|
||||
options: requestOptions(memoryDirectory: memoryDirectory, leftSideContext: leftSideContext)
|
||||
options: requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: leftSideContext)
|
||||
)
|
||||
if let firstCandidate = results.first {
|
||||
leftSideContext.append(firstCandidate.character)
|
||||
@ -212,7 +234,7 @@ extension Subcommands {
|
||||
}
|
||||
print(composingText.convertTarget)
|
||||
let start = Date()
|
||||
let result = converter.requestCandidates(composingText, options: requestOptions(memoryDirectory: memoryDirectory, leftSideContext: leftSideContext))
|
||||
let result = converter.requestCandidates(composingText, options: requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: leftSideContext))
|
||||
let mainResults = result.mainResults.filter {
|
||||
!self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana()
|
||||
}
|
||||
@ -239,7 +261,7 @@ extension Subcommands {
|
||||
}
|
||||
}
|
||||
|
||||
func requestOptions(memoryDirectory: URL, leftSideContext: String) -> ConvertRequestOptions {
|
||||
func requestOptions(learningType: LearningType, memoryDirectory: URL, leftSideContext: String?) -> ConvertRequestOptions {
|
||||
let zenzaiVersionDependentMode: ConvertRequestOptions.ZenzaiVersionDependentMode = if self.zenzV1 {
|
||||
.v1
|
||||
} else if self.zenzV2 {
|
||||
@ -271,8 +293,7 @@ extension Subcommands {
|
||||
englishCandidateInRoman2KanaInput: true,
|
||||
fullWidthRomanCandidate: false,
|
||||
halfWidthKanaCandidate: false,
|
||||
learningType: enableLearning ? .inputAndOutput : .nothing,
|
||||
maxMemoryCount: 0,
|
||||
learningType: learningType,
|
||||
shouldResetMemory: false,
|
||||
memoryDirectoryURL: memoryDirectory,
|
||||
sharedContainerURL: URL(fileURLWithPath: ""),
|
||||
|
@ -0,0 +1,46 @@
|
||||
import Foundation
|
||||
|
||||
extension KanaKanjiConverter {
|
||||
func commaSeparatedNumberCandidates(_ inputData: ComposingText) -> [Candidate] {
|
||||
var text = inputData.convertTarget
|
||||
guard !text.isEmpty else { return [] }
|
||||
|
||||
var negative = false
|
||||
if text.first == "-" {
|
||||
negative = true
|
||||
text.removeFirst()
|
||||
}
|
||||
let parts = text.split(separator: ".", omittingEmptySubsequences: false)
|
||||
guard parts.count <= 2,
|
||||
parts.allSatisfy({ !$0.isEmpty && $0.allSatisfy({ $0.isNumber && $0.isASCII }) }) else {
|
||||
return []
|
||||
}
|
||||
let integerPart = parts[0]
|
||||
guard integerPart.count > 3 else { return [] }
|
||||
|
||||
var reversed = Array(integerPart.reversed())
|
||||
var formatted = ""
|
||||
for (i, ch) in reversed.enumerated() {
|
||||
if i > 0 && i % 3 == 0 {
|
||||
formatted.append(",")
|
||||
}
|
||||
formatted.append(ch)
|
||||
}
|
||||
let integerString = String(formatted.reversed())
|
||||
var result = (negative ? "-" : "") + integerString
|
||||
if parts.count == 2 {
|
||||
let fractional = parts[1]
|
||||
result += "." + fractional
|
||||
}
|
||||
|
||||
let ruby = inputData.convertTarget.toKatakana()
|
||||
let candidate = Candidate(
|
||||
text: result,
|
||||
value: -10,
|
||||
correspondingCount: inputData.input.count,
|
||||
lastMid: MIDData.一般.mid,
|
||||
data: [DicdataElement(word: result, ruby: ruby, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)]
|
||||
)
|
||||
return [candidate]
|
||||
}
|
||||
}
|
@ -83,6 +83,7 @@ public struct ConvertRequestOptions: Sendable {
|
||||
specialCandidateProviders.append(.timeExpression)
|
||||
specialCandidateProviders.append(.calendar)
|
||||
specialCandidateProviders.append(.version)
|
||||
specialCandidateProviders.append(.commaSeparatedNumber)
|
||||
|
||||
self.N_best = N_best
|
||||
self.requireJapanesePrediction = requireJapanesePrediction
|
||||
|
@ -23,7 +23,8 @@ import EfficientNGram
|
||||
EmailAddressSpecialCandidateProvider(),
|
||||
UnicodeSpecialCandidateProvider(),
|
||||
VersionSpecialCandidateProvider(),
|
||||
TimeExpressionSpecialCandidateProvider()
|
||||
TimeExpressionSpecialCandidateProvider(),
|
||||
CommaSeparatedNumberSpecialCandidateProvider()
|
||||
]
|
||||
@MainActor private var checker = SpellChecker()
|
||||
private var checkerInitialized: [KeyboardLanguage: Bool] = [.none: true, .ja_JP: true]
|
||||
|
@ -45,6 +45,13 @@ public struct TimeExpressionSpecialCandidateProvider: SpecialCandidateProvider {
|
||||
}
|
||||
}
|
||||
|
||||
public struct CommaSeparatedNumberSpecialCandidateProvider: SpecialCandidateProvider {
|
||||
public init() {}
|
||||
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
|
||||
converter.commaSeparatedNumberCandidates(inputData)
|
||||
}
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == CalendarSpecialCandidateProvider {
|
||||
static var calendar: Self { .init() }
|
||||
}
|
||||
@ -68,3 +75,7 @@ public extension SpecialCandidateProvider where Self == VersionSpecialCandidateP
|
||||
public extension SpecialCandidateProvider where Self == TimeExpressionSpecialCandidateProvider {
|
||||
static var timeExpression: Self { .init() }
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == CommaSeparatedNumberSpecialCandidateProvider {
|
||||
static var commaSeparatedNumber: Self { .init() }
|
||||
}
|
||||
|
@ -67,7 +67,9 @@ public final class DicdataStore {
|
||||
self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount)
|
||||
}
|
||||
}
|
||||
self.reloadUser()
|
||||
_ = self.loadLOUDS(query: "user")
|
||||
self.reloadMemory()
|
||||
_ = self.loadLOUDS(query: "memory")
|
||||
|
||||
if requestOptions.preloadDictionary {
|
||||
|
@ -858,6 +858,7 @@ final class LearningManager {
|
||||
|
||||
func save() {
|
||||
if !options.learningType.needUpdateMemory {
|
||||
debug(#function, "options.learningType=\(options.learningType)", "skip memory update")
|
||||
return
|
||||
}
|
||||
do {
|
||||
|
@ -24,7 +24,7 @@ public enum LearningType: Int, CaseIterable, Sendable {
|
||||
case onlyOutput
|
||||
case nothing
|
||||
|
||||
var needUpdateMemory: Bool {
|
||||
package var needUpdateMemory: Bool {
|
||||
self == .inputAndOutput
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,41 @@
|
||||
import XCTest
|
||||
@testable import KanaKanjiConverterModule
|
||||
|
||||
final class CommaSeparatedNumberTests: XCTestCase {
|
||||
private func makeDirectInput(direct input: String) -> ComposingText {
|
||||
ComposingText(
|
||||
convertTargetCursorPosition: input.count,
|
||||
input: input.map { .init(character: $0, inputStyle: .direct) },
|
||||
convertTarget: input
|
||||
)
|
||||
}
|
||||
|
||||
func testCommaSeparatedNumberCandidates() async throws {
|
||||
let converter = await KanaKanjiConverter()
|
||||
|
||||
func result(_ text: String) async -> [Candidate] {
|
||||
await converter.commaSeparatedNumberCandidates(makeDirectInput(direct: text))
|
||||
}
|
||||
|
||||
let r1 = await result("49000")
|
||||
XCTAssertEqual(r1.first?.text, "49,000")
|
||||
|
||||
let r2 = await result("109428081")
|
||||
XCTAssertEqual(r2.first?.text, "109,428,081")
|
||||
|
||||
let r3 = await result("2129.49")
|
||||
XCTAssertEqual(r3.first?.text, "2,129.49")
|
||||
|
||||
let r4 = await result("-13932")
|
||||
XCTAssertEqual(r4.first?.text, "-13,932")
|
||||
|
||||
let r5 = await result("12")
|
||||
XCTAssertTrue(r5.isEmpty)
|
||||
|
||||
let r6 = await result("1A9B")
|
||||
XCTAssertTrue(r6.isEmpty)
|
||||
|
||||
let r7 = await result("123")
|
||||
XCTAssertTrue(r7.isEmpty)
|
||||
}
|
||||
}
|
@ -2,26 +2,50 @@
|
||||
set -e
|
||||
|
||||
USE_ZENZAI=0
|
||||
USE_DEBUG=0
|
||||
|
||||
# 引数の解析
|
||||
for arg in "$@"; do
|
||||
if [ "$arg" = "--zenzai" ]; then
|
||||
USE_ZENZAI=1
|
||||
fi
|
||||
if [ "$arg" = "--debug" ]; then
|
||||
echo "⚠️ Debug mode is enabled. This may cause performance issues."
|
||||
USE_DEBUG=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$USE_DEBUG" -eq 1 ]; then
|
||||
CONFIGURATION="debug"
|
||||
else
|
||||
CONFIGURATION="release"
|
||||
fi
|
||||
|
||||
if [ "$USE_ZENZAI" -eq 1 ]; then
|
||||
echo "📦 Building with Zenzai support..."
|
||||
swift build -c release -Xcxx -xobjective-c++ --traits Zenzai
|
||||
swift build -c $CONFIGURATION -Xcxx -xobjective-c++ --traits Zenzai
|
||||
else
|
||||
echo "📦 Building..."
|
||||
# Build
|
||||
swift build -c release -Xcxx -xobjective-c++
|
||||
swift build -c $CONFIGURATION -Xcxx -xobjective-c++
|
||||
fi
|
||||
|
||||
# Copy Required Resources
|
||||
sudo cp -R .build/release/llama.framework /usr/local/lib/
|
||||
sudo cp -R .build/${CONFIGURATION}/llama.framework /usr/local/lib/
|
||||
|
||||
# add rpath
|
||||
install_name_tool -add_rpath /usr/local/lib/ .build/release/CliTool
|
||||
RPATH="/usr/local/lib/"
|
||||
BINARY_PATH=".build/${CONFIGURATION}/CliTool"
|
||||
|
||||
if ! otool -l "$BINARY_PATH" | grep -q "$RPATH"; then
|
||||
install_name_tool -add_rpath "$RPATH" "$BINARY_PATH"
|
||||
else
|
||||
echo "✅ RPATH $RPATH is already present in $BINARY_PATH"
|
||||
fi
|
||||
# if debug mode, codesign is required to execute
|
||||
if [ "$USE_DEBUG" -eq 1 ]; then
|
||||
echo "🔒 Signing the binary for debug mode..."
|
||||
codesign --force --sign - .build/${CONFIGURATION}/CliTool
|
||||
fi
|
||||
|
||||
# Install
|
||||
sudo cp -f .build/release/CliTool /usr/local/bin/anco
|
||||
sudo cp -f .build/${CONFIGURATION}/CliTool /usr/local/bin/anco
|
||||
|
Reference in New Issue
Block a user