From 4f2750ef3ea5484428568634fe7522bdb6f2a7f0 Mon Sep 17 00:00:00 2001 From: Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com> Date: Mon, 29 Apr 2024 19:26:28 +0900 Subject: [PATCH] =?UTF-8?q?[Tools]=20=E8=BE=9E=E6=9B=B8=E3=83=87=E3=83=BC?= =?UTF-8?q?=E3=82=BF=E8=A7=A3=E6=9E=90=E7=94=A8=E3=81=AE=E3=82=B5=E3=83=96?= =?UTF-8?q?=E3=82=B3=E3=83=9E=E3=83=B3=E3=83=89=E3=82=92`anco`=E3=81=AB?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=20(#85)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * base value should be updated here * feature: implement cli tool for reading louds data * Update document --- Docs/cli.md | 43 +++++- Sources/CliTool/Anco.swift | 2 +- ...StringInterpolation+CommandLineUtils.swift | 14 ++ .../DictCommands/DictCommand.swift | 13 ++ .../DictCommands/ReadCommand.swift | 123 ++++++++++++++++++ .../DicdataStore/DicdataStore.swift | 2 +- .../LOUDS/LOUDS.swift | 4 +- .../LOUDS/extension LOUDS.swift | 2 +- 8 files changed, 195 insertions(+), 8 deletions(-) create mode 100644 Sources/CliTool/DefaultStringInterpolation+CommandLineUtils.swift create mode 100644 Sources/CliTool/Subcommands/DictCommands/DictCommand.swift create mode 100644 Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift diff --git a/Docs/cli.md b/Docs/cli.md index 937b022..3553bc4 100644 --- a/Docs/cli.md +++ b/Docs/cli.md @@ -4,13 +4,13 @@ `anco`を利用するには、最初にinstallが必要です。 -``` -sh install_cli.sh +```bash +sudo sh install_cli.sh ``` 例えば以下のように利用できます。 -``` +```bash your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10 日本語入力 にほんご入力 @@ -23,3 +23,40 @@ your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10 にほんご 2本後 ``` + +## 変換API + +`anco run`コマンドを利用して変換を行うことが出来ます。 + +## 辞書リーダ + +`anco dict`コマンドを利用して辞書データを解析することが出来ます。 + +```bash +your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ +=== Summary for target ア === +- directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ +- target: ア +- memory?: false +- count of entry: 24189 +- time for execute: 0.0378040075302124 +``` + +`--ruby`および`--word`オプションを利用して、正規表現でフィルターをかけることが出来ます。 + +```bash +your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ --word ".*全" +=== Summary for target ア === +- directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ +- target: ア +- memory?: false +- count of entry: 24189 +- time for execute: 0.07062792778015137 +=== Found Entries === +- count of found entry: 3 +Ruby: アキラ Word: 全 Value: -11.7107 CID: (1291, 1291) MID: 424 +Ruby: アンゼン Word: 安全 Value: -7.241 CID: (1287, 1287) MID: 169 +Ruby: アンシンアンゼン Word: 安心安全 Value: -11.7638 CID: (1283, 1287) MID: 17 +``` + +`--sort`オプションを使うとエントリーの並び替えが可能です。 diff --git a/Sources/CliTool/Anco.swift b/Sources/CliTool/Anco.swift index e96f17c..59bf34f 100644 --- a/Sources/CliTool/Anco.swift +++ b/Sources/CliTool/Anco.swift @@ -5,7 +5,7 @@ import ArgumentParser public struct Anco: ParsableCommand { public static var configuration = CommandConfiguration( abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter", - subcommands: [Subcommands.Run.self], + subcommands: [Subcommands.Run.self, Subcommands.Dict.self], defaultSubcommand: Subcommands.Run.self ) diff --git a/Sources/CliTool/DefaultStringInterpolation+CommandLineUtils.swift b/Sources/CliTool/DefaultStringInterpolation+CommandLineUtils.swift new file mode 100644 index 0000000..d2294e8 --- /dev/null +++ b/Sources/CliTool/DefaultStringInterpolation+CommandLineUtils.swift @@ -0,0 +1,14 @@ +// +// DefaultStringInterpolation+CommandLineUtils.swift +// +// +// Created by miwa on 2024/04/29. +// + +import Foundation + +extension DefaultStringInterpolation { + mutating func appendInterpolation(bold value: String){ + self.appendInterpolation("\u{1B}[1m" + value + "\u{1B}[m") + } +} diff --git a/Sources/CliTool/Subcommands/DictCommands/DictCommand.swift b/Sources/CliTool/Subcommands/DictCommands/DictCommand.swift new file mode 100644 index 0000000..12c32bc --- /dev/null +++ b/Sources/CliTool/Subcommands/DictCommands/DictCommand.swift @@ -0,0 +1,13 @@ +import Foundation +import KanaKanjiConverterModuleWithDefaultDictionary +import ArgumentParser + +extension Subcommands { + struct Dict: ParsableCommand { + static var configuration = CommandConfiguration( + commandName: "dict", + abstract: "Show dict information", + subcommands: [Self.Read.self] + ) + } +} diff --git a/Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift b/Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift new file mode 100644 index 0000000..8dbf261 --- /dev/null +++ b/Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift @@ -0,0 +1,123 @@ +import Foundation +import KanaKanjiConverterModule +import ArgumentParser + +extension Subcommands.Dict { + struct Read: ParsableCommand { + enum SortOrder: String, Codable, ExpressibleByArgument { + case value + case ruby + case word + + init?(argument: String) { + self.init(rawValue: argument) + } + } + + @Argument(help: "辞書データのfilename") + var target: String = "" + + @Option(name: [.customLong("dictionary_dir"), .customShort("d")], help: "The directory for dictionary data.") + var dictionaryDirectory: String = "./" + + @Option(name: [.customLong("ruby")], help: "Regex for entry ruby filter") + var rubyFilter: String = "" + + @Option(name: [.customLong("word")], help: "Regex for entry word filter") + var wordFilter: String = "" + + @Option(name: [.customLong("sort")], help: "Sort order") + var sortOrder: SortOrder = .ruby + + static var configuration = CommandConfiguration( + commandName: "read", + abstract: "Read dictionary data and extract informations" + ) + + @MainActor mutating func run() throws { + guard #available(macOS 13, *) else { + return + } + let start = Date() + let isMemory = self.target == "memory" + guard let louds = LOUDS.load(self.target, option: self.requestOptions()) else { + print( + """ + \(bold: "=== Summary for target \(self.target) ===") + - directory: \(self.dictionaryDirectory) + - target: \(self.target) + - memory?: \(isMemory) + - result: LOUDS data was not found + - time for execute: \(Date().timeIntervalSince(start)) + """ + ) + return + } + // ありったけ取り出す + let nodeIndices = louds.prefixNodeIndices(chars: [], maxDepth: .max) + let store = DicdataStore(convertRequestOptions: self.requestOptions()) + let result = store.getDicdataFromLoudstxt3(identifier: self.target, indices: nodeIndices) + var filteredResult = result + var hasFilter = false + if !rubyFilter.isEmpty { + let filter = try Regex(rubyFilter) + hasFilter = true + filteredResult = filteredResult.filter { + $0.ruby.wholeMatch(of: filter) != nil + } + } + if !wordFilter.isEmpty { + let filter = try Regex(wordFilter) + hasFilter = true + filteredResult = filteredResult.filter { + $0.word.wholeMatch(of: filter) != nil + } + } + + print( + """ + \(bold: "=== Summary for target \(self.target) ===") + - directory: \(self.dictionaryDirectory) + - target: \(self.target) + - memory?: \(isMemory) + - count of entry: \(result.count) + - time for execute: \(Date().timeIntervalSince(start)) + """ + ) + + if hasFilter { + let sortFunction: (DicdataElement, DicdataElement) -> Bool = switch self.sortOrder { + case .ruby: { $0.ruby < $1.ruby || $0.ruby.count < $1.ruby.count} + case .value: { $0.value() < $1.value() } + case .word: { $0.word < $1.word } + } + + print("\(bold: "=== Found Entries ===")") + print("- count of found entry: \(filteredResult.count)") + for entry in filteredResult.sorted(by: sortFunction) { + print("\(bold: "Ruby:") \(entry.ruby) \(bold: "Word:") \(entry.word) \(bold: "Value:") \(entry.value()) \(bold: "CID:") \((entry.lcid, entry.rcid)) \(bold: "MID:") \(entry.mid)") + } + } + } + + func requestOptions() -> ConvertRequestOptions { + .init( + N_best: 0, + requireJapanesePrediction: false, + requireEnglishPrediction: false, + keyboardLanguage: .ja_JP, + typographyLetterCandidate: false, + unicodeCandidate: true, + englishCandidateInRoman2KanaInput: true, + fullWidthRomanCandidate: false, + halfWidthKanaCandidate: false, + learningType: .nothing, + maxMemoryCount: 0, + dictionaryResourceURL: URL(fileURLWithPath: self.dictionaryDirectory), + memoryDirectoryURL: URL(fileURLWithPath: self.dictionaryDirectory), + sharedContainerURL: URL(fileURLWithPath: self.dictionaryDirectory), + metadata: .init(appVersionString: "anco") + ) + } + } +} diff --git a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift index 2861e1a..c343e38 100644 --- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift +++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift @@ -189,7 +189,7 @@ public final class DicdataStore { return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth) } - func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence) -> [DicdataElement] { + package func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence) -> [DicdataElement] { debug("getDicdataFromLoudstxt3", identifier, indices) // split = 2048 let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11}) diff --git a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift index d1e2540..c8afd63 100644 --- a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift +++ b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift @@ -9,7 +9,7 @@ import Foundation /// LOUDS -struct LOUDS: Sendable { +package struct LOUDS: Sendable { private typealias Unit = UInt64 private static let unit = 64 private static let uExp = 6 @@ -182,7 +182,7 @@ struct LOUDS: Sendable { /// - Parameter chars: CharIDに変換した文字列 /// - Parameter maxDepth: 先に進む深さの最大値 /// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト - @inlinable func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] { + @inlinable package func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] { guard let nodeIndex = self.searchNodeIndex(chars: chars) else { return [] } diff --git a/Sources/KanaKanjiConverterModule/LOUDS/extension LOUDS.swift b/Sources/KanaKanjiConverterModule/LOUDS/extension LOUDS.swift index b5c15fb..eda8745 100644 --- a/Sources/KanaKanjiConverterModule/LOUDS/extension LOUDS.swift +++ b/Sources/KanaKanjiConverterModule/LOUDS/extension LOUDS.swift @@ -54,7 +54,7 @@ extension LOUDS { /// LOUDSをファイルから読み込む関数 /// - Parameter identifier: ファイル名 /// - Returns: 存在すればLOUDSデータを返し、存在しなければ`nil`を返す。 - static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? { + package static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? { let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option) let nodeIndex2ID: [UInt8] do {