Merge branch 'develop' into codex/modify-learningmanager.init-and-add-error-handling

This commit is contained in:
Miwa
2025-06-01 20:26:39 +09:00
committed by GitHub
12 changed files with 222 additions and 19 deletions

View File

@ -5,13 +5,19 @@
`anco`を利用するには、最初にinstallが必要です。`/usr/local/bin/``anco`が追加されます。 `anco`を利用するには、最初にinstallが必要です。`/usr/local/bin/``anco`が追加されます。
```bash ```bash
sudo sh install_cli.sh ./install_cli.sh
``` ```
Zenzaiを利用する場合は、`--zenzai`オプションを付けてください。 Zenzaiを利用する場合は、`--zenzai`オプションを付けてください。
```bash ```bash
sudo sh install_cli.sh --zenzai ./install_cli.sh --zenzai
```
デフォルトでは、ほとんどの情報は出力されません。デバッグモードで実行するには`--debug`オプションを付けてください。
```bash
./install_cli.sh --debug
``` ```
例えば以下のように利用できます。 例えば以下のように利用できます。
@ -55,7 +61,7 @@ $ anco evaluate ./evaluation.tsv --config_n_best 1
出力はJSONフォーマットです。出力内容の安定が必要な場合`--stable`を指定することで比較的安定した出力を得られます。ただしスコアやエントロピーは辞書バージョンに依存します。 出力はJSONフォーマットです。出力内容の安定が必要な場合`--stable`を指定することで比較的安定した出力を得られます。ただしスコアやエントロピーは辞書バージョンに依存します。
## 対話的実行 ## 対話的実行API
少しずつ入力を進めるような実用的な場面を模した環境として`anco session`コマンドが用意されています。 少しずつ入力を進めるような実用的な場面を模した環境として`anco session`コマンドが用意されています。
@ -67,6 +73,55 @@ $ anco session --roman2kana -n 10 --disable_prediction
キーを入力してEnterを押すと変換候補が表示されます。`:`で始まる特殊コマンドを利用することで、削除、確定、文脈の設定などの諸操作を行うことが出来ます。 キーを入力してEnterを押すと変換候補が表示されます。`:`で始まる特殊コマンドを利用することで、削除、確定、文脈の設定などの諸操作を行うことが出来ます。
### リプレイ
`--replay`を用いると、セッションの中での一連の動作を再現することができます。
```yaml
anco session --roman2kana -n 10 --disable_prediction --replay history.txt
```
`history.txt`は例えば以下のような内容が含まれます。
```
a
i
u
e
e
:del
o
:0
```
現在実行中のセッションから`history.txt`を作成するには`:dump history.txt`と入力します。
### 学習機能のデバッグ
学習機能のデバッグのため、セッションコマンドには複数の機能が用意されています。`--enable_memory`の状態では、デフォルトで学習が有効になり、一時ディレクトリに学習データが蓄積されます。
```bash
$ anco session --roman2kana -n 10 --disable_prediction --enable_memory
```
セーブを実施するには以下のように`:save`を入力します。
```txt
rime
:h
:n
:14
:4
:save
```
すでに存在する学習データをread onlyで読み込むこともできます。
```bash
$ anco session --roman2kana -n 10 --disable_prediction --readonly_memory ./memory
```
この場合、`:save`コマンドは何も行いません。
## 辞書リーダ ## 辞書リーダ
`anco dict`コマンドを利用して辞書データを解析することが出来ます。 `anco dict`コマンドを利用して辞書データを解析することが出来ます。

View File

@ -17,6 +17,8 @@ extension Subcommands {
var disablePrediction = false var disablePrediction = false
@Flag(name: [.customLong("enable_memory")], help: "Enable memory.") @Flag(name: [.customLong("enable_memory")], help: "Enable memory.")
var enableLearning = false var enableLearning = false
@Option(name: [.customLong("readonly_memory")], help: "Enable readonly memory.")
var readOnlyMemoryPath: String?
@Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).") @Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).")
var onlyWholeConversion = false var onlyWholeConversion = false
@Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.") @Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.")
@ -73,7 +75,19 @@ extension Subcommands {
if !self.zenzWeightPath.isEmpty && (!self.zenzV1 && !self.zenzV2 && !self.zenzV3) { if !self.zenzWeightPath.isEmpty && (!self.zenzV1 && !self.zenzV2 && !self.zenzV3) {
print("zenz version is not specified. By default, zenz-v3 will be used.") print("zenz version is not specified. By default, zenz-v3 will be used.")
} }
let memoryDirectory = if self.enableLearning { let learningType: LearningType = if self.readOnlyMemoryPath != nil {
//
.onlyOutput
} else if self.enableLearning {
//
.inputAndOutput
} else {
//
.nothing
}
let memoryDirectory = if let readOnlyMemoryPath {
URL(fileURLWithPath: readOnlyMemoryPath)
} else if self.enableLearning {
if let dir = self.getTemporaryDirectory() { if let dir = self.getTemporaryDirectory() {
dir dir
} else { } else {
@ -82,8 +96,12 @@ extension Subcommands {
} else { } else {
URL(fileURLWithPath: "") URL(fileURLWithPath: "")
} }
print("Working with \(learningType) mode. Memory path is \(memoryDirectory).")
let converter = KanaKanjiConverter() let converter = KanaKanjiConverter()
converter.sendToDicdataStore(
.setRequestOptions(requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: nil))
)
var composingText = ComposingText() var composingText = ComposingText()
let inputStyle: InputStyle = self.roman2kana ? .roman2kana : .direct let inputStyle: InputStyle = self.roman2kana ? .roman2kana : .direct
var lastCandidates: [Candidate] = [] var lastCandidates: [Candidate] = []
@ -142,14 +160,18 @@ extension Subcommands {
composingText.stopComposition() composingText.stopComposition()
converter.stopComposition() converter.stopComposition()
converter.sendToDicdataStore(.closeKeyboard) converter.sendToDicdataStore(.closeKeyboard)
print("saved") if learningType.needUpdateMemory {
print("saved")
} else {
print("anything should not be saved because the learning type is not for update memory")
}
continue continue
case ":p", ":pred": case ":p", ":pred":
// //
let results = converter.predictNextCharacter( let results = converter.predictNextCharacter(
leftSideContext: leftSideContext, leftSideContext: leftSideContext,
count: 10, count: 10,
options: requestOptions(memoryDirectory: memoryDirectory, leftSideContext: leftSideContext) options: requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: leftSideContext)
) )
if let firstCandidate = results.first { if let firstCandidate = results.first {
leftSideContext.append(firstCandidate.character) leftSideContext.append(firstCandidate.character)
@ -212,7 +234,7 @@ extension Subcommands {
} }
print(composingText.convertTarget) print(composingText.convertTarget)
let start = Date() let start = Date()
let result = converter.requestCandidates(composingText, options: requestOptions(memoryDirectory: memoryDirectory, leftSideContext: leftSideContext)) let result = converter.requestCandidates(composingText, options: requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: leftSideContext))
let mainResults = result.mainResults.filter { let mainResults = result.mainResults.filter {
!self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana() !self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana()
} }
@ -239,7 +261,7 @@ extension Subcommands {
} }
} }
func requestOptions(memoryDirectory: URL, leftSideContext: String) -> ConvertRequestOptions { func requestOptions(learningType: LearningType, memoryDirectory: URL, leftSideContext: String?) -> ConvertRequestOptions {
let zenzaiVersionDependentMode: ConvertRequestOptions.ZenzaiVersionDependentMode = if self.zenzV1 { let zenzaiVersionDependentMode: ConvertRequestOptions.ZenzaiVersionDependentMode = if self.zenzV1 {
.v1 .v1
} else if self.zenzV2 { } else if self.zenzV2 {
@ -271,8 +293,7 @@ extension Subcommands {
englishCandidateInRoman2KanaInput: true, englishCandidateInRoman2KanaInput: true,
fullWidthRomanCandidate: false, fullWidthRomanCandidate: false,
halfWidthKanaCandidate: false, halfWidthKanaCandidate: false,
learningType: enableLearning ? .inputAndOutput : .nothing, learningType: learningType,
maxMemoryCount: 0,
shouldResetMemory: false, shouldResetMemory: false,
memoryDirectoryURL: memoryDirectory, memoryDirectoryURL: memoryDirectory,
sharedContainerURL: URL(fileURLWithPath: ""), sharedContainerURL: URL(fileURLWithPath: ""),

View File

@ -0,0 +1,46 @@
import Foundation
extension KanaKanjiConverter {
func commaSeparatedNumberCandidates(_ inputData: ComposingText) -> [Candidate] {
var text = inputData.convertTarget
guard !text.isEmpty else { return [] }
var negative = false
if text.first == "-" {
negative = true
text.removeFirst()
}
let parts = text.split(separator: ".", omittingEmptySubsequences: false)
guard parts.count <= 2,
parts.allSatisfy({ !$0.isEmpty && $0.allSatisfy({ $0.isNumber && $0.isASCII }) }) else {
return []
}
let integerPart = parts[0]
guard integerPart.count > 3 else { return [] }
var reversed = Array(integerPart.reversed())
var formatted = ""
for (i, ch) in reversed.enumerated() {
if i > 0 && i % 3 == 0 {
formatted.append(",")
}
formatted.append(ch)
}
let integerString = String(formatted.reversed())
var result = (negative ? "-" : "") + integerString
if parts.count == 2 {
let fractional = parts[1]
result += "." + fractional
}
let ruby = inputData.convertTarget.toKatakana()
let candidate = Candidate(
text: result,
value: -10,
correspondingCount: inputData.input.count,
lastMid: MIDData..mid,
data: [DicdataElement(word: result, ruby: ruby, cid: CIDData..cid, mid: MIDData..mid, value: -10)]
)
return [candidate]
}
}

View File

@ -83,6 +83,7 @@ public struct ConvertRequestOptions: Sendable {
specialCandidateProviders.append(.timeExpression) specialCandidateProviders.append(.timeExpression)
specialCandidateProviders.append(.calendar) specialCandidateProviders.append(.calendar)
specialCandidateProviders.append(.version) specialCandidateProviders.append(.version)
specialCandidateProviders.append(.commaSeparatedNumber)
self.N_best = N_best self.N_best = N_best
self.requireJapanesePrediction = requireJapanesePrediction self.requireJapanesePrediction = requireJapanesePrediction

View File

@ -23,7 +23,8 @@ import EfficientNGram
EmailAddressSpecialCandidateProvider(), EmailAddressSpecialCandidateProvider(),
UnicodeSpecialCandidateProvider(), UnicodeSpecialCandidateProvider(),
VersionSpecialCandidateProvider(), VersionSpecialCandidateProvider(),
TimeExpressionSpecialCandidateProvider() TimeExpressionSpecialCandidateProvider(),
CommaSeparatedNumberSpecialCandidateProvider()
] ]
@MainActor private var checker = SpellChecker() @MainActor private var checker = SpellChecker()
private var checkerInitialized: [KeyboardLanguage: Bool] = [.none: true, .ja_JP: true] private var checkerInitialized: [KeyboardLanguage: Bool] = [.none: true, .ja_JP: true]

View File

@ -45,6 +45,13 @@ public struct TimeExpressionSpecialCandidateProvider: SpecialCandidateProvider {
} }
} }
public struct CommaSeparatedNumberSpecialCandidateProvider: SpecialCandidateProvider {
public init() {}
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
converter.commaSeparatedNumberCandidates(inputData)
}
}
public extension SpecialCandidateProvider where Self == CalendarSpecialCandidateProvider { public extension SpecialCandidateProvider where Self == CalendarSpecialCandidateProvider {
static var calendar: Self { .init() } static var calendar: Self { .init() }
} }
@ -68,3 +75,7 @@ public extension SpecialCandidateProvider where Self == VersionSpecialCandidateP
public extension SpecialCandidateProvider where Self == TimeExpressionSpecialCandidateProvider { public extension SpecialCandidateProvider where Self == TimeExpressionSpecialCandidateProvider {
static var timeExpression: Self { .init() } static var timeExpression: Self { .init() }
} }
public extension SpecialCandidateProvider where Self == CommaSeparatedNumberSpecialCandidateProvider {
static var commaSeparatedNumber: Self { .init() }
}

View File

@ -67,7 +67,9 @@ public final class DicdataStore {
self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount) self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount)
} }
} }
self.reloadUser()
_ = self.loadLOUDS(query: "user") _ = self.loadLOUDS(query: "user")
self.reloadMemory()
_ = self.loadLOUDS(query: "memory") _ = self.loadLOUDS(query: "memory")
if requestOptions.preloadDictionary { if requestOptions.preloadDictionary {

View File

@ -858,6 +858,7 @@ final class LearningManager {
func save() { func save() {
if !options.learningType.needUpdateMemory { if !options.learningType.needUpdateMemory {
debug(#function, "options.learningType=\(options.learningType)", "skip memory update")
return return
} }
do { do {

View File

@ -24,7 +24,7 @@ public enum LearningType: Int, CaseIterable, Sendable {
case onlyOutput case onlyOutput
case nothing case nothing
var needUpdateMemory: Bool { package var needUpdateMemory: Bool {
self == .inputAndOutput self == .inputAndOutput
} }

View File

@ -0,0 +1,41 @@
import XCTest
@testable import KanaKanjiConverterModule
final class CommaSeparatedNumberTests: XCTestCase {
private func makeDirectInput(direct input: String) -> ComposingText {
ComposingText(
convertTargetCursorPosition: input.count,
input: input.map { .init(character: $0, inputStyle: .direct) },
convertTarget: input
)
}
func testCommaSeparatedNumberCandidates() async throws {
let converter = await KanaKanjiConverter()
func result(_ text: String) async -> [Candidate] {
await converter.commaSeparatedNumberCandidates(makeDirectInput(direct: text))
}
let r1 = await result("49000")
XCTAssertEqual(r1.first?.text, "49,000")
let r2 = await result("109428081")
XCTAssertEqual(r2.first?.text, "109,428,081")
let r3 = await result("2129.49")
XCTAssertEqual(r3.first?.text, "2,129.49")
let r4 = await result("-13932")
XCTAssertEqual(r4.first?.text, "-13,932")
let r5 = await result("12")
XCTAssertTrue(r5.isEmpty)
let r6 = await result("1A9B")
XCTAssertTrue(r6.isEmpty)
let r7 = await result("")
XCTAssertTrue(r7.isEmpty)
}
}

View File

@ -2,26 +2,50 @@
set -e set -e
USE_ZENZAI=0 USE_ZENZAI=0
USE_DEBUG=0
# 引数の解析 # 引数の解析
for arg in "$@"; do for arg in "$@"; do
if [ "$arg" = "--zenzai" ]; then if [ "$arg" = "--zenzai" ]; then
USE_ZENZAI=1 USE_ZENZAI=1
fi fi
if [ "$arg" = "--debug" ]; then
echo "⚠️ Debug mode is enabled. This may cause performance issues."
USE_DEBUG=1
fi
done done
if [ "$USE_DEBUG" -eq 1 ]; then
CONFIGURATION="debug"
else
CONFIGURATION="release"
fi
if [ "$USE_ZENZAI" -eq 1 ]; then if [ "$USE_ZENZAI" -eq 1 ]; then
echo "📦 Building with Zenzai support..." echo "📦 Building with Zenzai support..."
swift build -c release -Xcxx -xobjective-c++ --traits Zenzai swift build -c $CONFIGURATION -Xcxx -xobjective-c++ --traits Zenzai
else else
echo "📦 Building..." echo "📦 Building..."
# Build swift build -c $CONFIGURATION -Xcxx -xobjective-c++
swift build -c release -Xcxx -xobjective-c++
fi fi
# Copy Required Resources # Copy Required Resources
sudo cp -R .build/release/llama.framework /usr/local/lib/ sudo cp -R .build/${CONFIGURATION}/llama.framework /usr/local/lib/
# add rpath # add rpath
install_name_tool -add_rpath /usr/local/lib/ .build/release/CliTool RPATH="/usr/local/lib/"
BINARY_PATH=".build/${CONFIGURATION}/CliTool"
if ! otool -l "$BINARY_PATH" | grep -q "$RPATH"; then
install_name_tool -add_rpath "$RPATH" "$BINARY_PATH"
else
echo "✅ RPATH $RPATH is already present in $BINARY_PATH"
fi
# if debug mode, codesign is required to execute
if [ "$USE_DEBUG" -eq 1 ]; then
echo "🔒 Signing the binary for debug mode..."
codesign --force --sign - .build/${CONFIGURATION}/CliTool
fi
# Install # Install
sudo cp -f .build/release/CliTool /usr/local/bin/anco sudo cp -f .build/${CONFIGURATION}/CliTool /usr/local/bin/anco