mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
[Tools] 辞書データ解析用のサブコマンドをanco
に追加 (#85)
* base value should be updated here * feature: implement cli tool for reading louds data * Update document
This commit is contained in:
43
Docs/cli.md
43
Docs/cli.md
@ -4,13 +4,13 @@
|
||||
|
||||
`anco`を利用するには、最初にinstallが必要です。
|
||||
|
||||
```
|
||||
sh install_cli.sh
|
||||
```bash
|
||||
sudo sh install_cli.sh
|
||||
```
|
||||
|
||||
例えば以下のように利用できます。
|
||||
|
||||
```
|
||||
```bash
|
||||
your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
|
||||
日本語入力
|
||||
にほんご入力
|
||||
@ -23,3 +23,40 @@ your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
|
||||
にほんご
|
||||
2本後
|
||||
```
|
||||
|
||||
## 変換API
|
||||
|
||||
`anco run`コマンドを利用して変換を行うことが出来ます。
|
||||
|
||||
## 辞書リーダ
|
||||
|
||||
`anco dict`コマンドを利用して辞書データを解析することが出来ます。
|
||||
|
||||
```bash
|
||||
your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
|
||||
=== Summary for target ア ===
|
||||
- directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
|
||||
- target: ア
|
||||
- memory?: false
|
||||
- count of entry: 24189
|
||||
- time for execute: 0.0378040075302124
|
||||
```
|
||||
|
||||
`--ruby`および`--word`オプションを利用して、正規表現でフィルターをかけることが出来ます。
|
||||
|
||||
```bash
|
||||
your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ --word ".*全"
|
||||
=== Summary for target ア ===
|
||||
- directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
|
||||
- target: ア
|
||||
- memory?: false
|
||||
- count of entry: 24189
|
||||
- time for execute: 0.07062792778015137
|
||||
=== Found Entries ===
|
||||
- count of found entry: 3
|
||||
Ruby: アキラ Word: 全 Value: -11.7107 CID: (1291, 1291) MID: 424
|
||||
Ruby: アンゼン Word: 安全 Value: -7.241 CID: (1287, 1287) MID: 169
|
||||
Ruby: アンシンアンゼン Word: 安心安全 Value: -11.7638 CID: (1283, 1287) MID: 17
|
||||
```
|
||||
|
||||
`--sort`オプションを使うとエントリーの並び替えが可能です。
|
||||
|
@ -5,7 +5,7 @@ import ArgumentParser
|
||||
public struct Anco: ParsableCommand {
|
||||
public static var configuration = CommandConfiguration(
|
||||
abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter",
|
||||
subcommands: [Subcommands.Run.self],
|
||||
subcommands: [Subcommands.Run.self, Subcommands.Dict.self],
|
||||
defaultSubcommand: Subcommands.Run.self
|
||||
)
|
||||
|
||||
|
@ -0,0 +1,14 @@
|
||||
//
|
||||
// DefaultStringInterpolation+CommandLineUtils.swift
|
||||
//
|
||||
//
|
||||
// Created by miwa on 2024/04/29.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension DefaultStringInterpolation {
|
||||
mutating func appendInterpolation(bold value: String){
|
||||
self.appendInterpolation("\u{1B}[1m" + value + "\u{1B}[m")
|
||||
}
|
||||
}
|
13
Sources/CliTool/Subcommands/DictCommands/DictCommand.swift
Normal file
13
Sources/CliTool/Subcommands/DictCommands/DictCommand.swift
Normal file
@ -0,0 +1,13 @@
|
||||
import Foundation
|
||||
import KanaKanjiConverterModuleWithDefaultDictionary
|
||||
import ArgumentParser
|
||||
|
||||
extension Subcommands {
|
||||
struct Dict: ParsableCommand {
|
||||
static var configuration = CommandConfiguration(
|
||||
commandName: "dict",
|
||||
abstract: "Show dict information",
|
||||
subcommands: [Self.Read.self]
|
||||
)
|
||||
}
|
||||
}
|
123
Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift
Normal file
123
Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift
Normal file
@ -0,0 +1,123 @@
|
||||
import Foundation
|
||||
import KanaKanjiConverterModule
|
||||
import ArgumentParser
|
||||
|
||||
extension Subcommands.Dict {
|
||||
struct Read: ParsableCommand {
|
||||
enum SortOrder: String, Codable, ExpressibleByArgument {
|
||||
case value
|
||||
case ruby
|
||||
case word
|
||||
|
||||
init?(argument: String) {
|
||||
self.init(rawValue: argument)
|
||||
}
|
||||
}
|
||||
|
||||
@Argument(help: "辞書データのfilename")
|
||||
var target: String = ""
|
||||
|
||||
@Option(name: [.customLong("dictionary_dir"), .customShort("d")], help: "The directory for dictionary data.")
|
||||
var dictionaryDirectory: String = "./"
|
||||
|
||||
@Option(name: [.customLong("ruby")], help: "Regex for entry ruby filter")
|
||||
var rubyFilter: String = ""
|
||||
|
||||
@Option(name: [.customLong("word")], help: "Regex for entry word filter")
|
||||
var wordFilter: String = ""
|
||||
|
||||
@Option(name: [.customLong("sort")], help: "Sort order")
|
||||
var sortOrder: SortOrder = .ruby
|
||||
|
||||
static var configuration = CommandConfiguration(
|
||||
commandName: "read",
|
||||
abstract: "Read dictionary data and extract informations"
|
||||
)
|
||||
|
||||
@MainActor mutating func run() throws {
|
||||
guard #available(macOS 13, *) else {
|
||||
return
|
||||
}
|
||||
let start = Date()
|
||||
let isMemory = self.target == "memory"
|
||||
guard let louds = LOUDS.load(self.target, option: self.requestOptions()) else {
|
||||
print(
|
||||
"""
|
||||
\(bold: "=== Summary for target \(self.target) ===")
|
||||
- directory: \(self.dictionaryDirectory)
|
||||
- target: \(self.target)
|
||||
- memory?: \(isMemory)
|
||||
- result: LOUDS data was not found
|
||||
- time for execute: \(Date().timeIntervalSince(start))
|
||||
"""
|
||||
)
|
||||
return
|
||||
}
|
||||
// ありったけ取り出す
|
||||
let nodeIndices = louds.prefixNodeIndices(chars: [], maxDepth: .max)
|
||||
let store = DicdataStore(convertRequestOptions: self.requestOptions())
|
||||
let result = store.getDicdataFromLoudstxt3(identifier: self.target, indices: nodeIndices)
|
||||
var filteredResult = result
|
||||
var hasFilter = false
|
||||
if !rubyFilter.isEmpty {
|
||||
let filter = try Regex(rubyFilter)
|
||||
hasFilter = true
|
||||
filteredResult = filteredResult.filter {
|
||||
$0.ruby.wholeMatch(of: filter) != nil
|
||||
}
|
||||
}
|
||||
if !wordFilter.isEmpty {
|
||||
let filter = try Regex(wordFilter)
|
||||
hasFilter = true
|
||||
filteredResult = filteredResult.filter {
|
||||
$0.word.wholeMatch(of: filter) != nil
|
||||
}
|
||||
}
|
||||
|
||||
print(
|
||||
"""
|
||||
\(bold: "=== Summary for target \(self.target) ===")
|
||||
- directory: \(self.dictionaryDirectory)
|
||||
- target: \(self.target)
|
||||
- memory?: \(isMemory)
|
||||
- count of entry: \(result.count)
|
||||
- time for execute: \(Date().timeIntervalSince(start))
|
||||
"""
|
||||
)
|
||||
|
||||
if hasFilter {
|
||||
let sortFunction: (DicdataElement, DicdataElement) -> Bool = switch self.sortOrder {
|
||||
case .ruby: { $0.ruby < $1.ruby || $0.ruby.count < $1.ruby.count}
|
||||
case .value: { $0.value() < $1.value() }
|
||||
case .word: { $0.word < $1.word }
|
||||
}
|
||||
|
||||
print("\(bold: "=== Found Entries ===")")
|
||||
print("- count of found entry: \(filteredResult.count)")
|
||||
for entry in filteredResult.sorted(by: sortFunction) {
|
||||
print("\(bold: "Ruby:") \(entry.ruby) \(bold: "Word:") \(entry.word) \(bold: "Value:") \(entry.value()) \(bold: "CID:") \((entry.lcid, entry.rcid)) \(bold: "MID:") \(entry.mid)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func requestOptions() -> ConvertRequestOptions {
|
||||
.init(
|
||||
N_best: 0,
|
||||
requireJapanesePrediction: false,
|
||||
requireEnglishPrediction: false,
|
||||
keyboardLanguage: .ja_JP,
|
||||
typographyLetterCandidate: false,
|
||||
unicodeCandidate: true,
|
||||
englishCandidateInRoman2KanaInput: true,
|
||||
fullWidthRomanCandidate: false,
|
||||
halfWidthKanaCandidate: false,
|
||||
learningType: .nothing,
|
||||
maxMemoryCount: 0,
|
||||
dictionaryResourceURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||
memoryDirectoryURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||
sharedContainerURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||
metadata: .init(appVersionString: "anco")
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
@ -189,7 +189,7 @@ public final class DicdataStore {
|
||||
return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
|
||||
}
|
||||
|
||||
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>) -> [DicdataElement] {
|
||||
package func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>) -> [DicdataElement] {
|
||||
debug("getDicdataFromLoudstxt3", identifier, indices)
|
||||
// split = 2048
|
||||
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
|
||||
|
@ -9,7 +9,7 @@
|
||||
import Foundation
|
||||
|
||||
/// LOUDS
|
||||
struct LOUDS: Sendable {
|
||||
package struct LOUDS: Sendable {
|
||||
private typealias Unit = UInt64
|
||||
private static let unit = 64
|
||||
private static let uExp = 6
|
||||
@ -182,7 +182,7 @@ struct LOUDS: Sendable {
|
||||
/// - Parameter chars: CharIDに変換した文字列
|
||||
/// - Parameter maxDepth: 先に進む深さの最大値
|
||||
/// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト
|
||||
@inlinable func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] {
|
||||
@inlinable package func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] {
|
||||
guard let nodeIndex = self.searchNodeIndex(chars: chars) else {
|
||||
return []
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ extension LOUDS {
|
||||
/// LOUDSをファイルから読み込む関数
|
||||
/// - Parameter identifier: ファイル名
|
||||
/// - Returns: 存在すればLOUDSデータを返し、存在しなければ`nil`を返す。
|
||||
static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
|
||||
package static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
|
||||
let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option)
|
||||
let nodeIndex2ID: [UInt8]
|
||||
do {
|
||||
|
Reference in New Issue
Block a user