mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-08-22 15:05:26 +00:00
[Tools] 辞書データ解析用のサブコマンドをanco
に追加 (#85)
* base value should be updated here * feature: implement cli tool for reading louds data * Update document
This commit is contained in:
43
Docs/cli.md
43
Docs/cli.md
@ -4,13 +4,13 @@
|
|||||||
|
|
||||||
`anco`を利用するには、最初にinstallが必要です。
|
`anco`を利用するには、最初にinstallが必要です。
|
||||||
|
|
||||||
```
|
```bash
|
||||||
sh install_cli.sh
|
sudo sh install_cli.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
例えば以下のように利用できます。
|
例えば以下のように利用できます。
|
||||||
|
|
||||||
```
|
```bash
|
||||||
your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
|
your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
|
||||||
日本語入力
|
日本語入力
|
||||||
にほんご入力
|
にほんご入力
|
||||||
@ -23,3 +23,40 @@ your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
|
|||||||
にほんご
|
にほんご
|
||||||
2本後
|
2本後
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 変換API
|
||||||
|
|
||||||
|
`anco run`コマンドを利用して変換を行うことが出来ます。
|
||||||
|
|
||||||
|
## 辞書リーダ
|
||||||
|
|
||||||
|
`anco dict`コマンドを利用して辞書データを解析することが出来ます。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
|
||||||
|
=== Summary for target ア ===
|
||||||
|
- directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
|
||||||
|
- target: ア
|
||||||
|
- memory?: false
|
||||||
|
- count of entry: 24189
|
||||||
|
- time for execute: 0.0378040075302124
|
||||||
|
```
|
||||||
|
|
||||||
|
`--ruby`および`--word`オプションを利用して、正規表現でフィルターをかけることが出来ます。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ --word ".*全"
|
||||||
|
=== Summary for target ア ===
|
||||||
|
- directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
|
||||||
|
- target: ア
|
||||||
|
- memory?: false
|
||||||
|
- count of entry: 24189
|
||||||
|
- time for execute: 0.07062792778015137
|
||||||
|
=== Found Entries ===
|
||||||
|
- count of found entry: 3
|
||||||
|
Ruby: アキラ Word: 全 Value: -11.7107 CID: (1291, 1291) MID: 424
|
||||||
|
Ruby: アンゼン Word: 安全 Value: -7.241 CID: (1287, 1287) MID: 169
|
||||||
|
Ruby: アンシンアンゼン Word: 安心安全 Value: -11.7638 CID: (1283, 1287) MID: 17
|
||||||
|
```
|
||||||
|
|
||||||
|
`--sort`オプションを使うとエントリーの並び替えが可能です。
|
||||||
|
@ -5,7 +5,7 @@ import ArgumentParser
|
|||||||
public struct Anco: ParsableCommand {
|
public struct Anco: ParsableCommand {
|
||||||
public static var configuration = CommandConfiguration(
|
public static var configuration = CommandConfiguration(
|
||||||
abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter",
|
abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter",
|
||||||
subcommands: [Subcommands.Run.self],
|
subcommands: [Subcommands.Run.self, Subcommands.Dict.self],
|
||||||
defaultSubcommand: Subcommands.Run.self
|
defaultSubcommand: Subcommands.Run.self
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -0,0 +1,14 @@
|
|||||||
|
//
|
||||||
|
// DefaultStringInterpolation+CommandLineUtils.swift
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Created by miwa on 2024/04/29.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
extension DefaultStringInterpolation {
|
||||||
|
mutating func appendInterpolation(bold value: String){
|
||||||
|
self.appendInterpolation("\u{1B}[1m" + value + "\u{1B}[m")
|
||||||
|
}
|
||||||
|
}
|
13
Sources/CliTool/Subcommands/DictCommands/DictCommand.swift
Normal file
13
Sources/CliTool/Subcommands/DictCommands/DictCommand.swift
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import Foundation
|
||||||
|
import KanaKanjiConverterModuleWithDefaultDictionary
|
||||||
|
import ArgumentParser
|
||||||
|
|
||||||
|
extension Subcommands {
|
||||||
|
struct Dict: ParsableCommand {
|
||||||
|
static var configuration = CommandConfiguration(
|
||||||
|
commandName: "dict",
|
||||||
|
abstract: "Show dict information",
|
||||||
|
subcommands: [Self.Read.self]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
123
Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift
Normal file
123
Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
import Foundation
|
||||||
|
import KanaKanjiConverterModule
|
||||||
|
import ArgumentParser
|
||||||
|
|
||||||
|
extension Subcommands.Dict {
|
||||||
|
struct Read: ParsableCommand {
|
||||||
|
enum SortOrder: String, Codable, ExpressibleByArgument {
|
||||||
|
case value
|
||||||
|
case ruby
|
||||||
|
case word
|
||||||
|
|
||||||
|
init?(argument: String) {
|
||||||
|
self.init(rawValue: argument)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Argument(help: "辞書データのfilename")
|
||||||
|
var target: String = ""
|
||||||
|
|
||||||
|
@Option(name: [.customLong("dictionary_dir"), .customShort("d")], help: "The directory for dictionary data.")
|
||||||
|
var dictionaryDirectory: String = "./"
|
||||||
|
|
||||||
|
@Option(name: [.customLong("ruby")], help: "Regex for entry ruby filter")
|
||||||
|
var rubyFilter: String = ""
|
||||||
|
|
||||||
|
@Option(name: [.customLong("word")], help: "Regex for entry word filter")
|
||||||
|
var wordFilter: String = ""
|
||||||
|
|
||||||
|
@Option(name: [.customLong("sort")], help: "Sort order")
|
||||||
|
var sortOrder: SortOrder = .ruby
|
||||||
|
|
||||||
|
static var configuration = CommandConfiguration(
|
||||||
|
commandName: "read",
|
||||||
|
abstract: "Read dictionary data and extract informations"
|
||||||
|
)
|
||||||
|
|
||||||
|
@MainActor mutating func run() throws {
|
||||||
|
guard #available(macOS 13, *) else {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
let start = Date()
|
||||||
|
let isMemory = self.target == "memory"
|
||||||
|
guard let louds = LOUDS.load(self.target, option: self.requestOptions()) else {
|
||||||
|
print(
|
||||||
|
"""
|
||||||
|
\(bold: "=== Summary for target \(self.target) ===")
|
||||||
|
- directory: \(self.dictionaryDirectory)
|
||||||
|
- target: \(self.target)
|
||||||
|
- memory?: \(isMemory)
|
||||||
|
- result: LOUDS data was not found
|
||||||
|
- time for execute: \(Date().timeIntervalSince(start))
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// ありったけ取り出す
|
||||||
|
let nodeIndices = louds.prefixNodeIndices(chars: [], maxDepth: .max)
|
||||||
|
let store = DicdataStore(convertRequestOptions: self.requestOptions())
|
||||||
|
let result = store.getDicdataFromLoudstxt3(identifier: self.target, indices: nodeIndices)
|
||||||
|
var filteredResult = result
|
||||||
|
var hasFilter = false
|
||||||
|
if !rubyFilter.isEmpty {
|
||||||
|
let filter = try Regex(rubyFilter)
|
||||||
|
hasFilter = true
|
||||||
|
filteredResult = filteredResult.filter {
|
||||||
|
$0.ruby.wholeMatch(of: filter) != nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !wordFilter.isEmpty {
|
||||||
|
let filter = try Regex(wordFilter)
|
||||||
|
hasFilter = true
|
||||||
|
filteredResult = filteredResult.filter {
|
||||||
|
$0.word.wholeMatch(of: filter) != nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print(
|
||||||
|
"""
|
||||||
|
\(bold: "=== Summary for target \(self.target) ===")
|
||||||
|
- directory: \(self.dictionaryDirectory)
|
||||||
|
- target: \(self.target)
|
||||||
|
- memory?: \(isMemory)
|
||||||
|
- count of entry: \(result.count)
|
||||||
|
- time for execute: \(Date().timeIntervalSince(start))
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
if hasFilter {
|
||||||
|
let sortFunction: (DicdataElement, DicdataElement) -> Bool = switch self.sortOrder {
|
||||||
|
case .ruby: { $0.ruby < $1.ruby || $0.ruby.count < $1.ruby.count}
|
||||||
|
case .value: { $0.value() < $1.value() }
|
||||||
|
case .word: { $0.word < $1.word }
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\(bold: "=== Found Entries ===")")
|
||||||
|
print("- count of found entry: \(filteredResult.count)")
|
||||||
|
for entry in filteredResult.sorted(by: sortFunction) {
|
||||||
|
print("\(bold: "Ruby:") \(entry.ruby) \(bold: "Word:") \(entry.word) \(bold: "Value:") \(entry.value()) \(bold: "CID:") \((entry.lcid, entry.rcid)) \(bold: "MID:") \(entry.mid)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func requestOptions() -> ConvertRequestOptions {
|
||||||
|
.init(
|
||||||
|
N_best: 0,
|
||||||
|
requireJapanesePrediction: false,
|
||||||
|
requireEnglishPrediction: false,
|
||||||
|
keyboardLanguage: .ja_JP,
|
||||||
|
typographyLetterCandidate: false,
|
||||||
|
unicodeCandidate: true,
|
||||||
|
englishCandidateInRoman2KanaInput: true,
|
||||||
|
fullWidthRomanCandidate: false,
|
||||||
|
halfWidthKanaCandidate: false,
|
||||||
|
learningType: .nothing,
|
||||||
|
maxMemoryCount: 0,
|
||||||
|
dictionaryResourceURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||||
|
memoryDirectoryURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||||
|
sharedContainerURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||||
|
metadata: .init(appVersionString: "anco")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -189,7 +189,7 @@ public final class DicdataStore {
|
|||||||
return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
|
return louds.prefixNodeIndices(chars: charIDs, maxDepth: depth)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>) -> [DicdataElement] {
|
package func getDicdataFromLoudstxt3(identifier: String, indices: some Sequence<Int>) -> [DicdataElement] {
|
||||||
debug("getDicdataFromLoudstxt3", identifier, indices)
|
debug("getDicdataFromLoudstxt3", identifier, indices)
|
||||||
// split = 2048
|
// split = 2048
|
||||||
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
|
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
|
|
||||||
/// LOUDS
|
/// LOUDS
|
||||||
struct LOUDS: Sendable {
|
package struct LOUDS: Sendable {
|
||||||
private typealias Unit = UInt64
|
private typealias Unit = UInt64
|
||||||
private static let unit = 64
|
private static let unit = 64
|
||||||
private static let uExp = 6
|
private static let uExp = 6
|
||||||
@ -182,7 +182,7 @@ struct LOUDS: Sendable {
|
|||||||
/// - Parameter chars: CharIDに変換した文字列
|
/// - Parameter chars: CharIDに変換した文字列
|
||||||
/// - Parameter maxDepth: 先に進む深さの最大値
|
/// - Parameter maxDepth: 先に進む深さの最大値
|
||||||
/// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト
|
/// - Returns: 対応するloudstxt3ファイル内のインデックスのリスト
|
||||||
@inlinable func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] {
|
@inlinable package func prefixNodeIndices(chars: [UInt8], maxDepth: Int) -> [Int] {
|
||||||
guard let nodeIndex = self.searchNodeIndex(chars: chars) else {
|
guard let nodeIndex = self.searchNodeIndex(chars: chars) else {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,7 @@ extension LOUDS {
|
|||||||
/// LOUDSをファイルから読み込む関数
|
/// LOUDSをファイルから読み込む関数
|
||||||
/// - Parameter identifier: ファイル名
|
/// - Parameter identifier: ファイル名
|
||||||
/// - Returns: 存在すればLOUDSデータを返し、存在しなければ`nil`を返す。
|
/// - Returns: 存在すればLOUDSデータを返し、存在しなければ`nil`を返す。
|
||||||
static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
|
package static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
|
||||||
let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option)
|
let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option)
|
||||||
let nodeIndex2ID: [UInt8]
|
let nodeIndex2ID: [UInt8]
|
||||||
do {
|
do {
|
||||||
|
Reference in New Issue
Block a user