From 15fe1ab16662635953952af6ddd4232e5bf61769 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 06:45:43 +0000 Subject: [PATCH 1/6] build(deps): bump Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage Bumps [Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage](https://github.com/ensan-hcl/azooKey_dictionary_storage) from `cb41cee` to `dd6ffab`. - [Release notes](https://github.com/ensan-hcl/azooKey_dictionary_storage/releases) - [Commits](https://github.com/ensan-hcl/azooKey_dictionary_storage/compare/cb41ceeb78cb49ac90f5f4cd227ec4a6f3ded566...dd6ffab8f11e789d30e84117577639b6797c517e) --- updated-dependencies: - dependency-name: Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage dependency-version: dd6ffab8f11e789d30e84117577639b6797c517e dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .../azooKey_dictionary_storage | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage b/Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage index cb41cee..dd6ffab 160000 --- a/Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage +++ b/Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage @@ -1 +1 @@ -Subproject commit cb41ceeb78cb49ac90f5f4cd227ec4a6f3ded566 +Subproject commit dd6ffab8f11e789d30e84117577639b6797c517e From 5e5845889517616962d69e00b60283ac85e2bd40 Mon Sep 17 00:00:00 2001 From: Miwa <63481257+ensan-hcl@users.noreply.github.com> Date: Sun, 25 May 2025 18:40:18 +0900 Subject: [PATCH 2/6] Add comma separated number special candidate --- .../Converter/CommaSeparatedNumber.swift | 46 +++++++++++++++++++ .../Converter/ConvertRequestOptions.swift | 1 + .../Converter/KanaKanjiConverter.swift | 3 +- .../Converter/SpecialCandidateProvider.swift | 11 +++++ .../CommaSeparatedNumberTests.swift | 41 +++++++++++++++++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 Sources/KanaKanjiConverterModule/Converter/CommaSeparatedNumber.swift create mode 100644 Tests/KanaKanjiConverterModuleTests/ConverterTests/CommaSeparatedNumberTests.swift diff --git a/Sources/KanaKanjiConverterModule/Converter/CommaSeparatedNumber.swift b/Sources/KanaKanjiConverterModule/Converter/CommaSeparatedNumber.swift new file mode 100644 index 0000000..9e238d4 --- /dev/null +++ b/Sources/KanaKanjiConverterModule/Converter/CommaSeparatedNumber.swift @@ -0,0 +1,46 @@ +import Foundation + +extension KanaKanjiConverter { + func commaSeparatedNumberCandidates(_ inputData: ComposingText) -> [Candidate] { + var text = inputData.convertTarget + guard !text.isEmpty else { return [] } + + var negative = false + if text.first == "-" { + negative = true + text.removeFirst() + } + let parts = text.split(separator: ".", omittingEmptySubsequences: false) + guard parts.count <= 2, + parts.allSatisfy({ !$0.isEmpty && $0.allSatisfy({ $0.isNumber && $0.isASCII }) }) else { + return [] + } + let integerPart = parts[0] + guard integerPart.count > 3 else { return [] } + + var reversed = Array(integerPart.reversed()) + var formatted = "" + for (i, ch) in reversed.enumerated() { + if i > 0 && i % 3 == 0 { + formatted.append(",") + } + formatted.append(ch) + } + let integerString = String(formatted.reversed()) + var result = (negative ? "-" : "") + integerString + if parts.count == 2 { + let fractional = parts[1] + result += "." + fractional + } + + let ruby = inputData.convertTarget.toKatakana() + let candidate = Candidate( + text: result, + value: -10, + correspondingCount: inputData.input.count, + lastMid: MIDData.一般.mid, + data: [DicdataElement(word: result, ruby: ruby, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)] + ) + return [candidate] + } +} diff --git a/Sources/KanaKanjiConverterModule/Converter/ConvertRequestOptions.swift b/Sources/KanaKanjiConverterModule/Converter/ConvertRequestOptions.swift index 4723609..a3e3d96 100644 --- a/Sources/KanaKanjiConverterModule/Converter/ConvertRequestOptions.swift +++ b/Sources/KanaKanjiConverterModule/Converter/ConvertRequestOptions.swift @@ -83,6 +83,7 @@ public struct ConvertRequestOptions: Sendable { specialCandidateProviders.append(.timeExpression) specialCandidateProviders.append(.calendar) specialCandidateProviders.append(.version) + specialCandidateProviders.append(.commaSeparatedNumber) self.N_best = N_best self.requireJapanesePrediction = requireJapanesePrediction diff --git a/Sources/KanaKanjiConverterModule/Converter/KanaKanjiConverter.swift b/Sources/KanaKanjiConverterModule/Converter/KanaKanjiConverter.swift index b04ad9c..1ec8498 100644 --- a/Sources/KanaKanjiConverterModule/Converter/KanaKanjiConverter.swift +++ b/Sources/KanaKanjiConverterModule/Converter/KanaKanjiConverter.swift @@ -23,7 +23,8 @@ import EfficientNGram EmailAddressSpecialCandidateProvider(), UnicodeSpecialCandidateProvider(), VersionSpecialCandidateProvider(), - TimeExpressionSpecialCandidateProvider() + TimeExpressionSpecialCandidateProvider(), + CommaSeparatedNumberSpecialCandidateProvider() ] @MainActor private var checker = SpellChecker() private var checkerInitialized: [KeyboardLanguage: Bool] = [.none: true, .ja_JP: true] diff --git a/Sources/KanaKanjiConverterModule/Converter/SpecialCandidateProvider.swift b/Sources/KanaKanjiConverterModule/Converter/SpecialCandidateProvider.swift index f7dd403..54d81c1 100644 --- a/Sources/KanaKanjiConverterModule/Converter/SpecialCandidateProvider.swift +++ b/Sources/KanaKanjiConverterModule/Converter/SpecialCandidateProvider.swift @@ -45,6 +45,13 @@ public struct TimeExpressionSpecialCandidateProvider: SpecialCandidateProvider { } } +public struct CommaSeparatedNumberSpecialCandidateProvider: SpecialCandidateProvider { + public init() {} + @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] { + converter.commaSeparatedNumberCandidates(inputData) + } +} + public extension SpecialCandidateProvider where Self == CalendarSpecialCandidateProvider { static var calendar: Self { .init() } } @@ -68,3 +75,7 @@ public extension SpecialCandidateProvider where Self == VersionSpecialCandidateP public extension SpecialCandidateProvider where Self == TimeExpressionSpecialCandidateProvider { static var timeExpression: Self { .init() } } + +public extension SpecialCandidateProvider where Self == CommaSeparatedNumberSpecialCandidateProvider { + static var commaSeparatedNumber: Self { .init() } +} diff --git a/Tests/KanaKanjiConverterModuleTests/ConverterTests/CommaSeparatedNumberTests.swift b/Tests/KanaKanjiConverterModuleTests/ConverterTests/CommaSeparatedNumberTests.swift new file mode 100644 index 0000000..c08b36e --- /dev/null +++ b/Tests/KanaKanjiConverterModuleTests/ConverterTests/CommaSeparatedNumberTests.swift @@ -0,0 +1,41 @@ +import XCTest +@testable import KanaKanjiConverterModule + +final class CommaSeparatedNumberTests: XCTestCase { + private func makeDirectInput(direct input: String) -> ComposingText { + ComposingText( + convertTargetCursorPosition: input.count, + input: input.map { .init(character: $0, inputStyle: .direct) }, + convertTarget: input + ) + } + + func testCommaSeparatedNumberCandidates() async throws { + let converter = await KanaKanjiConverter() + + func result(_ text: String) async -> [Candidate] { + await converter.commaSeparatedNumberCandidates(makeDirectInput(direct: text)) + } + + let r1 = await result("49000") + XCTAssertEqual(r1.first?.text, "49,000") + + let r2 = await result("109428081") + XCTAssertEqual(r2.first?.text, "109,428,081") + + let r3 = await result("2129.49") + XCTAssertEqual(r3.first?.text, "2,129.49") + + let r4 = await result("-13932") + XCTAssertEqual(r4.first?.text, "-13,932") + + let r5 = await result("12") + XCTAssertTrue(r5.isEmpty) + + let r6 = await result("1A9B") + XCTAssertTrue(r6.isEmpty) + + let r7 = await result("123") + XCTAssertTrue(r7.isEmpty) + } +} From db76b988fd1c4108b3941cf72b197ec82b5bc336 Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Sun, 1 Jun 2025 18:40:49 +0900 Subject: [PATCH 3/6] =?UTF-8?q?fix:=20setup=E6=99=82=E3=81=AB=E5=BF=85?= =?UTF-8?q?=E3=81=9A=E3=83=A1=E3=83=A2=E3=83=AA=E3=83=BC=E3=82=92=E3=83=AA?= =?UTF-8?q?=E3=82=BB=E3=83=83=E3=83=88=E3=81=97=E3=81=A6=E3=81=8B=E3=82=89?= =?UTF-8?q?=E3=83=AD=E3=83=BC=E3=83=89=E3=81=99=E3=82=8B=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../KanaKanjiConverterModule/DicdataStore/DicdataStore.swift | 2 ++ .../KanaKanjiConverterModule/DicdataStore/LearningMemory.swift | 1 + 2 files changed, 3 insertions(+) diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift index 7e3da24..c08944b 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift @@ -67,7 +67,9 @@ public final class DicdataStore { self.mmValue = [PValue].init(repeating: .zero, count: self.midCount * self.midCount) } } + self.reloadUser() _ = self.loadLOUDS(query: "user") + self.reloadMemory() _ = self.loadLOUDS(query: "memory") if requestOptions.preloadDictionary { diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift index 27f9d43..7066974 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/LearningMemory.swift @@ -845,6 +845,7 @@ final class LearningManager { func save() { if !options.learningType.needUpdateMemory { + debug(#function, "options.learningType=\(options.learningType)", "skip memory update") return } do { From 6d49f671646d6d11ab91bef4e9c2220b394efe82 Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Sun, 1 Jun 2025 18:41:34 +0900 Subject: [PATCH 4/6] =?UTF-8?q?feat:=20--readonly=5Fmemory=20option?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E5=A4=96=E9=83=A8?= =?UTF-8?q?=E3=81=AE=E5=AD=A6=E7=BF=92=E3=83=87=E3=83=BC=E3=82=BF=E3=82=92?= =?UTF-8?q?=E7=94=A8=E3=81=84=E3=81=9F=E3=83=87=E3=83=90=E3=83=83=E3=82=B0?= =?UTF-8?q?=E3=82=92=E5=AE=9F=E8=A1=8C=E5=8F=AF=E8=83=BD=E3=81=AB=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../CliTool/Subcommands/SessionCommand.swift | 35 +++++++++++++++---- Sources/KanaKanjiConverterModule/States.swift | 2 +- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/Sources/CliTool/Subcommands/SessionCommand.swift b/Sources/CliTool/Subcommands/SessionCommand.swift index b490d72..504637c 100644 --- a/Sources/CliTool/Subcommands/SessionCommand.swift +++ b/Sources/CliTool/Subcommands/SessionCommand.swift @@ -17,6 +17,8 @@ extension Subcommands { var disablePrediction = false @Flag(name: [.customLong("enable_memory")], help: "Enable memory.") var enableLearning = false + @Option(name: [.customLong("readonly_memory")], help: "Enable readonly memory.") + var readOnlyMemoryPath: String? @Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).") var onlyWholeConversion = false @Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.") @@ -73,7 +75,19 @@ extension Subcommands { if !self.zenzWeightPath.isEmpty && (!self.zenzV1 && !self.zenzV2 && !self.zenzV3) { print("zenz version is not specified. By default, zenz-v3 will be used.") } - let memoryDirectory = if self.enableLearning { + let learningType: LearningType = if self.readOnlyMemoryPath != nil { + // 読み取りのみ + .onlyOutput + } else if self.enableLearning { + // 読み書き + .inputAndOutput + } else { + // 読み書きなし + .nothing + } + let memoryDirectory = if let readOnlyMemoryPath { + URL(fileURLWithPath: readOnlyMemoryPath) + } else if self.enableLearning { if let dir = self.getTemporaryDirectory() { dir } else { @@ -82,8 +96,12 @@ extension Subcommands { } else { URL(fileURLWithPath: "") } + print("Working with \(learningType) mode. Memory path is \(memoryDirectory).") let converter = KanaKanjiConverter() + converter.sendToDicdataStore( + .setRequestOptions(requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: nil)) + ) var composingText = ComposingText() let inputStyle: InputStyle = self.roman2kana ? .roman2kana : .direct var lastCandidates: [Candidate] = [] @@ -142,14 +160,18 @@ extension Subcommands { composingText.stopComposition() converter.stopComposition() converter.sendToDicdataStore(.closeKeyboard) - print("saved") + if learningType.needUpdateMemory { + print("saved") + } else { + print("anything should not be saved because the learning type is not for update memory") + } continue case ":p", ":pred": // 次の文字の予測を取得する let results = converter.predictNextCharacter( leftSideContext: leftSideContext, count: 10, - options: requestOptions(memoryDirectory: memoryDirectory, leftSideContext: leftSideContext) + options: requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: leftSideContext) ) if let firstCandidate = results.first { leftSideContext.append(firstCandidate.character) @@ -212,7 +234,7 @@ extension Subcommands { } print(composingText.convertTarget) let start = Date() - let result = converter.requestCandidates(composingText, options: requestOptions(memoryDirectory: memoryDirectory, leftSideContext: leftSideContext)) + let result = converter.requestCandidates(composingText, options: requestOptions(learningType: learningType, memoryDirectory: memoryDirectory, leftSideContext: leftSideContext)) let mainResults = result.mainResults.filter { !self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana() } @@ -239,7 +261,7 @@ extension Subcommands { } } - func requestOptions(memoryDirectory: URL, leftSideContext: String) -> ConvertRequestOptions { + func requestOptions(learningType: LearningType, memoryDirectory: URL, leftSideContext: String?) -> ConvertRequestOptions { let zenzaiVersionDependentMode: ConvertRequestOptions.ZenzaiVersionDependentMode = if self.zenzV1 { .v1 } else if self.zenzV2 { @@ -271,8 +293,7 @@ extension Subcommands { englishCandidateInRoman2KanaInput: true, fullWidthRomanCandidate: false, halfWidthKanaCandidate: false, - learningType: enableLearning ? .inputAndOutput : .nothing, - maxMemoryCount: 0, + learningType: learningType, shouldResetMemory: false, memoryDirectoryURL: memoryDirectory, sharedContainerURL: URL(fileURLWithPath: ""), diff --git a/Sources/KanaKanjiConverterModule/States.swift b/Sources/KanaKanjiConverterModule/States.swift index 11caba5..12629d1 100644 --- a/Sources/KanaKanjiConverterModule/States.swift +++ b/Sources/KanaKanjiConverterModule/States.swift @@ -24,7 +24,7 @@ public enum LearningType: Int, CaseIterable, Sendable { case onlyOutput case nothing - var needUpdateMemory: Bool { + package var needUpdateMemory: Bool { self == .inputAndOutput } From 2c5166740fc00c80149824815e9ab20dfd5f7db5 Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Sun, 1 Jun 2025 18:41:49 +0900 Subject: [PATCH 5/6] feat: add --debug mode --- install_cli.sh | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/install_cli.sh b/install_cli.sh index 597f414..433680c 100755 --- a/install_cli.sh +++ b/install_cli.sh @@ -2,26 +2,50 @@ set -e USE_ZENZAI=0 +USE_DEBUG=0 # 引数の解析 for arg in "$@"; do if [ "$arg" = "--zenzai" ]; then USE_ZENZAI=1 fi + if [ "$arg" = "--debug" ]; then + echo "⚠️ Debug mode is enabled. This may cause performance issues." + USE_DEBUG=1 + fi done +if [ "$USE_DEBUG" -eq 1 ]; then + CONFIGURATION="debug" +else + CONFIGURATION="release" +fi + if [ "$USE_ZENZAI" -eq 1 ]; then echo "📦 Building with Zenzai support..." - swift build -c release -Xcxx -xobjective-c++ --traits Zenzai + swift build -c $CONFIGURATION -Xcxx -xobjective-c++ --traits Zenzai else echo "📦 Building..." - # Build - swift build -c release -Xcxx -xobjective-c++ + swift build -c $CONFIGURATION -Xcxx -xobjective-c++ fi # Copy Required Resources -sudo cp -R .build/release/llama.framework /usr/local/lib/ +sudo cp -R .build/${CONFIGURATION}/llama.framework /usr/local/lib/ + # add rpath -install_name_tool -add_rpath /usr/local/lib/ .build/release/CliTool +RPATH="/usr/local/lib/" +BINARY_PATH=".build/${CONFIGURATION}/CliTool" + +if ! otool -l "$BINARY_PATH" | grep -q "$RPATH"; then + install_name_tool -add_rpath "$RPATH" "$BINARY_PATH" +else + echo "✅ RPATH $RPATH is already present in $BINARY_PATH" +fi +# if debug mode, codesign is required to execute +if [ "$USE_DEBUG" -eq 1 ]; then + echo "🔒 Signing the binary for debug mode..." + codesign --force --sign - .build/${CONFIGURATION}/CliTool +fi + # Install -sudo cp -f .build/release/CliTool /usr/local/bin/anco +sudo cp -f .build/${CONFIGURATION}/CliTool /usr/local/bin/anco From 007d53455633e2d436e2d837e6cc20b672062b72 Mon Sep 17 00:00:00 2001 From: Miwa / Ensan Date: Sun, 1 Jun 2025 18:42:02 +0900 Subject: [PATCH 6/6] =?UTF-8?q?feat:=20cli=E3=81=AE=E4=BD=BF=E3=81=84?= =?UTF-8?q?=E6=96=B9=E3=82=92=E8=A3=9C=E8=B6=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Docs/cli.md | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/Docs/cli.md b/Docs/cli.md index 2720ed2..981abae 100644 --- a/Docs/cli.md +++ b/Docs/cli.md @@ -5,13 +5,19 @@ `anco`を利用するには、最初にinstallが必要です。`/usr/local/bin/`に`anco`が追加されます。 ```bash -sudo sh install_cli.sh +./install_cli.sh ``` Zenzaiを利用する場合は、`--zenzai`オプションを付けてください。 ```bash -sudo sh install_cli.sh --zenzai +./install_cli.sh --zenzai +``` + +デフォルトでは、ほとんどの情報は出力されません。デバッグモードで実行するには`--debug`オプションを付けてください。 + +```bash +./install_cli.sh --debug ``` 例えば以下のように利用できます。 @@ -55,7 +61,7 @@ $ anco evaluate ./evaluation.tsv --config_n_best 1 出力はJSONフォーマットです。出力内容の安定が必要な場合`--stable`を指定することで比較的安定した出力を得られます。ただしスコアやエントロピーは辞書バージョンに依存します。 -## 対話的実行 +## 対話的実行API 少しずつ入力を進めるような実用的な場面を模した環境として`anco session`コマンドが用意されています。 @@ -67,6 +73,55 @@ $ anco session --roman2kana -n 10 --disable_prediction キーを入力してEnterを押すと変換候補が表示されます。`:`で始まる特殊コマンドを利用することで、削除、確定、文脈の設定などの諸操作を行うことが出来ます。 +### リプレイ + +`--replay`を用いると、セッションの中での一連の動作を再現することができます。 + +```yaml +anco session --roman2kana -n 10 --disable_prediction --replay history.txt +``` + +`history.txt`は例えば以下のような内容が含まれます。 + +``` +a +i +u +e +e +:del +o +:0 +``` + +現在実行中のセッションから`history.txt`を作成するには`:dump history.txt`と入力します。 + +### 学習機能のデバッグ +学習機能のデバッグのため、セッションコマンドには複数の機能が用意されています。`--enable_memory`の状態では、デフォルトで学習が有効になり、一時ディレクトリに学習データが蓄積されます。 + +```bash +$ anco session --roman2kana -n 10 --disable_prediction --enable_memory +``` + +セーブを実施するには以下のように`:save`を入力します。 + +```txt +rime +:h +:n +:14 +:4 +:save +``` + +すでに存在する学習データをread onlyで読み込むこともできます。 + +```bash +$ anco session --roman2kana -n 10 --disable_prediction --readonly_memory ./memory +``` + +この場合、`:save`コマンドは何も行いません。 + ## 辞書リーダ `anco dict`コマンドを利用して辞書データを解析することが出来ます。