[Tools] 辞書データ解析用のサブコマンドをancoに追加 (#85)

* base value should be updated here * feature: implement cli tool for reading louds data * Update document
2025-08-22 15:05:26 +00:00 · 2024-04-29 19:26:28 +09:00
parent f17958af47
commit 4f2750ef3e
8 changed files with 195 additions and 8 deletions
--- a/Docs/cli.md
+++ b/Docs/cli.md
@ -4,13 +4,13 @@
 `anco`を利用するには、最初にinstallが必要です。
-```
+```bash
-sh install_cli.sh
+sudo sh install_cli.sh
 ```
 例えば以下のように利用できます。
-```
+```bash
 your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
 日本語入力
 にほんご入力
@ -23,3 +23,40 @@ your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
 にほんご
 2本後
 ```
 ## 変換API
 `anco run`コマンドを利用して変換を行うことが出来ます。
 ## 辞書リーダ
 `anco dict`コマンドを利用して辞書データを解析することが出来ます。
 ```bash
 your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/                       
 === Summary for target ア ===
 - directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
 - target: ア
 - memory?: false
 - count of entry: 24189
 - time for execute: 0.0378040075302124
 ```
 `--ruby`および`--word`オプションを利用して、正規表現でフィルターをかけることが出来ます。
 ```bash
 your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ --word ".*全"
 === Summary for target ア ===
 - directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
 - target: ア
 - memory?: false
 - count of entry: 24189
 - time for execute: 0.07062792778015137
 === Found Entries ===
 - count of found entry: 3
 Ruby: アキラ Word: 全 Value: -11.7107 CID: (1291, 1291) MID: 424
 Ruby: アンゼン Word: 安全 Value: -7.241 CID: (1287, 1287) MID: 169
 Ruby: アンシンアンゼン Word: 安心安全 Value: -11.7638 CID: (1283, 1287) MID: 17
 ```
 `--sort`オプションを使うとエントリーの並び替えが可能です。
--- a/Sources/CliTool/Anco.swift
+++ b/Sources/CliTool/Anco.swift
@ -5,7 +5,7 @@ import ArgumentParser
 public struct Anco: ParsableCommand {
    public static var configuration = CommandConfiguration(
        abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter",
-        subcommands: [Subcommands.Run.self],
+        subcommands: [Subcommands.Run.self, Subcommands.Dict.self],
        defaultSubcommand: Subcommands.Run.self
    )
--- a/Sources/CliTool/DefaultStringInterpolation+CommandLineUtils.swift
+++ b/Sources/CliTool/DefaultStringInterpolation+CommandLineUtils.swift
@ -0,0 +1,14 @@
 //
 //  DefaultStringInterpolation+CommandLineUtils.swift
 //
 //
 //  Created by miwa on 2024/04/29.
 //
 import Foundation
 extension DefaultStringInterpolation {
    mutating func appendInterpolation(bold value: String){
        self.appendInterpolation("\u{1B}[1m" + value + "\u{1B}[m")
    }
 }
--- a/Sources/CliTool/Subcommands/DictCommands/DictCommand.swift
+++ b/Sources/CliTool/Subcommands/DictCommands/DictCommand.swift
@ -0,0 +1,13 @@
 import Foundation
 import KanaKanjiConverterModuleWithDefaultDictionary
 import ArgumentParser
 extension Subcommands {
    struct Dict: ParsableCommand {
        static var configuration = CommandConfiguration(
            commandName: "dict",
            abstract: "Show dict information", 
            subcommands: [Self.Read.self]
        )
    }
 }
--- a/Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift
+++ b/Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift
@ -0,0 +1,123 @@
 import Foundation
 import KanaKanjiConverterModule
 import ArgumentParser
 extension Subcommands.Dict {
    struct Read: ParsableCommand {
        enum SortOrder: String, Codable, ExpressibleByArgument {
            case value
            case ruby
            case word
            init?(argument: String) {
                self.init(rawValue: argument)
            }
        }
        @Argument(help: "辞書データのfilename")
        var target: String = ""
        @Option(name: [.customLong("dictionary_dir"), .customShort("d")], help: "The directory for dictionary data.")
        var dictionaryDirectory: String = "./"
        @Option(name: [.customLong("ruby")], help: "Regex for entry ruby filter")
        var rubyFilter: String = ""
        @Option(name: [.customLong("word")], help: "Regex for entry word filter")
        var wordFilter: String = ""
        @Option(name: [.customLong("sort")], help: "Sort order")
        var sortOrder: SortOrder = .ruby
        static var configuration = CommandConfiguration(
            commandName: "read",
            abstract: "Read dictionary data and extract informations"
        )
        @MainActor mutating func run() throws {
            guard #available(macOS 13, *) else {
                return
            }
            let start = Date()
            let isMemory = self.target == "memory"
            guard let louds = LOUDS.load(self.target, option: self.requestOptions()) else {
                print(
                    """
                    \(bold: "=== Summary for target \(self.target) ===")
                    - directory: \(self.dictionaryDirectory)
                    - target: \(self.target)
                    - memory?: \(isMemory)
                    - result: LOUDS data was not found
                    - time for execute: \(Date().timeIntervalSince(start))
                    """
                )
                return
            }
            // ありったけ取り出す
            let nodeIndices = louds.prefixNodeIndices(chars: [], maxDepth: .max)
            let store = DicdataStore(convertRequestOptions: self.requestOptions())
            let result = store.getDicdataFromLoudstxt3(identifier: self.target, indices: nodeIndices)
            var filteredResult = result
            var hasFilter = false
            if !rubyFilter.isEmpty {
                let filter = try Regex(rubyFilter)
                hasFilter = true
                filteredResult = filteredResult.filter {
                    $0.ruby.wholeMatch(of: filter) != nil
                }
            }
            if !wordFilter.isEmpty {
                let filter = try Regex(wordFilter)
                hasFilter = true
                filteredResult = filteredResult.filter {
                    $0.word.wholeMatch(of: filter) != nil
                }
            }
            print(
                """
                \(bold: "=== Summary for target \(self.target) ===")
                - directory: \(self.dictionaryDirectory)
                - target: \(self.target)
                - memory?: \(isMemory)
                - count of entry: \(result.count)
                - time for execute: \(Date().timeIntervalSince(start))
                """
            )
            if hasFilter {
                let sortFunction: (DicdataElement, DicdataElement) -> Bool = switch self.sortOrder {
                case .ruby: { $0.ruby < $1.ruby || $0.ruby.count < $1.ruby.count}
                case .value: { $0.value() < $1.value() }
                case .word: { $0.word < $1.word }
                }
                print("\(bold: "=== Found Entries ===")")
                print("- count of found entry: \(filteredResult.count)")
                for entry in filteredResult.sorted(by: sortFunction) {
                    print("\(bold: "Ruby:") \(entry.ruby) \(bold: "Word:") \(entry.word) \(bold: "Value:") \(entry.value()) \(bold: "CID:") \((entry.lcid, entry.rcid)) \(bold: "MID:") \(entry.mid)")
                }
            }
        }
        func requestOptions() -> ConvertRequestOptions {
            .init(
                N_best: 0,
                requireJapanesePrediction: false,
                requireEnglishPrediction: false,
                keyboardLanguage: .ja_JP,
                typographyLetterCandidate: false,
                unicodeCandidate: true,
                englishCandidateInRoman2KanaInput: true,
                fullWidthRomanCandidate: false,
                halfWidthKanaCandidate: false,
                learningType: .nothing,
                maxMemoryCount: 0,
                dictionaryResourceURL: URL(fileURLWithPath: self.dictionaryDirectory),
                memoryDirectoryURL: URL(fileURLWithPath: self.dictionaryDirectory),
                sharedContainerURL: URL(fileURLWithPath: self.dictionaryDirectory),
                metadata: .init(appVersionString: "anco")
            )
        }
    }
 }
--- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
+++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
@ -189,7 +189,7 @@ public final class DicdataStore {
        return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
    }
-    func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>) -> [DicdataElement] {
+    package func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>) -> [DicdataElement] {
        debug("getDicdataFromLoudstxt3", identifier, indices)
        // split = 2048
        let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
--- a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
+++ b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
@ -9,7 +9,7 @@
 import Foundation
 /// LOUDS
-struct LOUDS: Sendable {
+package struct LOUDS: Sendable {
    private typealias Unit = UInt64
    private static let unit = 64
    private static let uExp = 6
@ -182,7 +182,7 @@ struct LOUDS: Sendable {
    /// - Parameter chars: CharIDに変換した文字列
    /// - Parameter maxDepth: 先に進む深さの最大値
    /// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト
-    @inlinable func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] {
+    @inlinable package func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] {
        guard let nodeIndex = self.searchNodeIndex(chars: chars) else {
            return []
        }
--- a/Sources/KanaKanjiConverterModule/LOUDS/extension
+++ b/Sources/KanaKanjiConverterModule/LOUDS/extension
@ -54,7 +54,7 @@ extension LOUDS {
    /// LOUDSをファイルから読み込む関数
    /// - Parameter identifier: ファイル名
    /// - Returns: 存在すればLOUDSデータを返し、存在しなければ`nil`を返す。
-    static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
+    package static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
        let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option)
        let nodeIndex2ID: [UInt8]
        do {