Merge branch 'develop' into feature/improve_convert_graph_impls

2025-12-03 02:58:27 +00:00 · 2024-03-16 11:56:56 +09:00
parent ea0219e66e c8f013b0df
commit 85c3bd3d02
17 changed files with 214 additions and 69 deletions
--- a/Sources/CliTool/Anco.swift
+++ b/Sources/CliTool/Anco.swift
@@ -0,0 +1,13 @@
+import KanaKanjiConverterModuleWithDefaultDictionary
+import ArgumentParser
+
+@main
+public struct Anco: ParsableCommand {
+    public static var configuration = CommandConfiguration(
+        abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter",
+        subcommands: [Subcommands.Run.self],
+        defaultSubcommand: Subcommands.Run.self
+    )
+
+    public init() {}
+}
--- a/Sources/CliTool/Subcommands/Commands.swift
+++ b/Sources/CliTool/Subcommands/Commands.swift
@@ -0,0 +1,2 @@
+/// namespace for subcommands
+enum Subcommands {}
--- a/Sources/CliTool/Subcommands/RunCommand.swift
+++ b/Sources/CliTool/Subcommands/RunCommand.swift
@@ -0,0 +1,50 @@
+import KanaKanjiConverterModuleWithDefaultDictionary
+import ArgumentParser
+import Foundation
+
+extension Subcommands {
+    struct Run: ParsableCommand {
+        @Argument(help: "ひらがなで表記された入力")
+        var input: String = ""
+
+        @Option(name: [.customLong("config_n_best")], help: "The parameter n (n best parameter) for internal viterbi search.")
+        var configNBest: Int = 10
+        @Option(name: [.customShort("n"), .customLong("top_n")], help: "Display top n candidates.")
+        var displayTopN: Int = 1
+
+        @Flag(name: [.customLong("disable_prediction")], help: "Disable producing prediction candidates.")
+        var disablePrediction = false
+
+        static var configuration = CommandConfiguration(commandName: "run", abstract: "Show help for this utility.")
+
+        @MainActor mutating func run() {
+            let converter = KanaKanjiConverter()
+            var composingText = ComposingText()
+            composingText.insertAtCursorPosition(input, inputStyle: .direct)
+            let result = converter.requestCandidates(composingText, options: requestOptions())
+            for candidate in result.mainResults.prefix(self.displayTopN) {
+                print(candidate.text)
+            }
+        }
+
+        func requestOptions() -> ConvertRequestOptions {
+            .withDefaultDictionary(
+                N_best: configNBest,
+                requireJapanesePrediction: !disablePrediction,
+                requireEnglishPrediction: false,
+                keyboardLanguage: .ja_JP,
+                typographyLetterCandidate: false,
+                unicodeCandidate: true,
+                englishCandidateInRoman2KanaInput: true,
+                fullWidthRomanCandidate: false,
+                halfWidthKanaCandidate: false,
+                learningType: .nothing,
+                maxMemoryCount: 0,
+                shouldResetMemory: false,
+                memoryDirectoryURL: URL(fileURLWithPath: ""),
+                sharedContainerURL: URL(fileURLWithPath: ""),
+                metadata: .init(appVersionString: "anco")
+            )
+        }
+    }
+}
--- a/Sources/KanaKanjiConverterModule/Converter/CalendarCandidate.swift
+++ b/Sources/KanaKanjiConverterModule/Converter/CalendarCandidate.swift
@@ -48,7 +48,7 @@ extension KanaKanjiConverter {
        }
        var string = string[...]
        // ネンをdropする
-        guard "ネン" == string.suffix(2) else {
+        guard string.hasSuffix("ネン") else {
            return nil
        }
        string = string.dropLast(2)
--- a/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
+++ b/Sources/KanaKanjiConverterModule/DicdataStore/DicdataStore.swift
@@ -30,7 +30,7 @@ public final class DicdataStore {
    private var charsID: [Character: UInt8] = [:]
    private var learningManager = LearningManager()

-    private var osUserDict: [DicdataElement] = []
+    private var dynamicUserDict: [DicdataElement] = []

    /// 辞書のエントリの最大長さ
    ///  - TODO: make this value as an option
@@ -71,6 +71,10 @@ public final class DicdataStore {
    }

    public enum Notification {
+        /// use `importDynamicUserDict` for data that cannot be obtained statically.
+        /// - warning: Too many dynamic user dictionary will damage conversion performance, as dynamic user dictionary uses inefficent algorithms for looking up. If your entries can be listed up statically, then use normal user dictionaries.
+        case importDynamicUserDict([DicdataElement])
+        @available(*, deprecated, renamed: "importDynamicUserDict", message: "it will be removed in AzooKeyKanaKanjiConverter v1.0")
        case importOSUserDict([DicdataElement])
        case setRequestOptions(ConvertRequestOptions)
        case forgetMemory(Candidate)
@@ -81,8 +85,8 @@ public final class DicdataStore {
        switch data {
        case .closeKeyboard:
            self.closeKeyboard()
-        case let .importOSUserDict(osUserDict):
-            self.osUserDict = osUserDict
+        case .importOSUserDict(let dicdata), .importDynamicUserDict(let dicdata):
+            self.dynamicUserDict = dicdata
        case let .forgetMemory(candidate):
            self.learningManager.forgetMemory(data: candidate.data)
            // loudsの処理があるので、リセットを実施する
@@ -420,57 +424,29 @@ public final class DicdataStore {
        if count == .zero {
            return []
        }
-        // 1文字に対する予測変換は検索が難しいので、特別に用意した辞書を用いて実施する
-        if count == 1 {
-            do {
-                let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_\(key).csv", isDirectory: false), encoding: String.Encoding.utf8)
-                let csvLines = csvString.split(separator: "\n")
-                let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
-                let dicdata: [DicdataElement] = csvData
-                    .map {self.parseLoudstxt2FormattedEntry(from: $0)}
-                    .filter { Self.predictionUsable[$0.rcid] }
-                return dicdata
-            } catch {
-                debug("ファイルが存在しません: \(error)")
-                return []
-            }
-        } else if count == 2 {
-            var result: [DicdataElement] = []
-            let first = String(key.first!)
-            let charIDs = key.map(self.character2charId)
-            // 最大700件に絞ることによって低速化を回避する。
-            let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: 5).prefix(700)
-            result.append(
-                contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices))
-                    .filter { Self.predictionUsable[$0.rcid] }
-            )
-            let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: 5).prefix(700)
-            result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
-            if learningManager.enabled {
-                let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs, depth: 5).prefix(700)
-                result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
-                result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
-            }
-            return result
+        // 最大700件に絞ることによって低速化を回避する。
+        var result: [DicdataElement] = []
+        let first = String(key.first!)
+        let charIDs = key.map(self.character2charId)
+        // 1, 2文字に対する予測変換は候補数が大きいので、depth（〜文字数）を制限する
+        let depth = if count == 1 || count == 2 {
+            5
        } else {
-            var result: [DicdataElement] = []
-            let first = String(key.first!)
-            let charIDs = key.map(self.character2charId)
-            // 最大700件に絞ることによって低速化を回避する。
-            let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs).prefix(700)
-            result.append(
-                contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices))
-                    .filter { Self.predictionUsable[$0.rcid] }
-            )
-            let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs).prefix(700)
-            result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
-            if learningManager.enabled {
-                let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700)
-                result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
-                result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
-            }
-            return result
+            Int.max
        }
+        let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: depth).prefix(700)
+        result.append(
+            contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(consume prefixIndices))
+                .filter { Self.predictionUsable[$0.rcid] }
+        )
+        let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: depth).prefix(700)
+        result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices)))
+        if learningManager.enabled {
+            let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700)
+            result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices)))
+            result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
+        }
+        return result
    }

    private func parseLoudstxt2FormattedEntry(from dataString: [some StringProtocol]) -> DicdataElement {
@@ -634,12 +610,12 @@ public final class DicdataStore {

    /// OSのユーザ辞書からrubyに等しい語を返す。
    func getMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
-        self.osUserDict.filter {$0.ruby == ruby}
+        self.dynamicUserDict.filter {$0.ruby == ruby}
    }

    /// OSのユーザ辞書からrubyに先頭一致する語を返す。
    func getPrefixMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
-        self.osUserDict.filter {$0.ruby.hasPrefix(ruby)}
+        self.dynamicUserDict.filter {$0.ruby.hasPrefix(ruby)}
    }

    // 学習を反映する
--- a/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
+++ b/Sources/KanaKanjiConverterModule/LOUDS/LOUDS.swift
@@ -54,7 +54,7 @@ struct LOUDS: Sendable {
            }
            return flatChar2nodeIndices
        }
-        self.flatChar2nodeIndicesIndex = consume flatChar2nodeIndicesIndex
+        self.flatChar2nodeIndicesIndex = flatChar2nodeIndicesIndex

        var rankLarge: [UInt32] = .init(repeating: 0, count: bytes.count + 1)
        rankLarge.withUnsafeMutableBufferPointer { buffer in
@@ -62,7 +62,7 @@ struct LOUDS: Sendable {
                buffer[i + 1] = buffer[i] &+ UInt32(Self.unit &- byte.nonzeroBitCount)
            }
        }
-        self.rankLarge = consume rankLarge
+        self.rankLarge = rankLarge
    }

    /// parentNodeIndex個の0を探索し、その次から1個増えるまでのIndexを返す。
--- a/Sources/KanaKanjiConverterModule/Replacer/TextReplacer.swift
+++ b/Sources/KanaKanjiConverterModule/Replacer/TextReplacer.swift
@@ -53,7 +53,7 @@ public struct TextReplacer: Sendable {
        }
    }

-    @available(*, deprecated, renamed: "init(emojiDataProvider:)", message: "init() is depreacted and will be removed in v1.0. Use init(emojiDataProvider:) instead")
+    @available(*, deprecated, renamed: "init(emojiDataProvider:)", message: "it be removed in AzooKeyKanaKanjiConverter v1.0")
    public init() {
        self.init {
            if #available(iOS 16.4, *) {