mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 02:58:27 +00:00
feat: 特殊な変換のプロトコルを定め、外から挿入できるようにした
This commit is contained in:
@@ -106,8 +106,6 @@ extension Subcommands.Dict {
|
||||
requireJapanesePrediction: false,
|
||||
requireEnglishPrediction: false,
|
||||
keyboardLanguage: .ja_JP,
|
||||
typographyLetterCandidate: false,
|
||||
unicodeCandidate: true,
|
||||
englishCandidateInRoman2KanaInput: true,
|
||||
fullWidthRomanCandidate: false,
|
||||
halfWidthKanaCandidate: false,
|
||||
@@ -117,6 +115,7 @@ extension Subcommands.Dict {
|
||||
memoryDirectoryURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||
sharedContainerURL: URL(fileURLWithPath: self.dictionaryDirectory),
|
||||
textReplacer: .empty,
|
||||
specialCandidateProviders: nil,
|
||||
metadata: .init(versionString: "anco for debugging")
|
||||
)
|
||||
}
|
||||
|
||||
@@ -9,6 +9,46 @@
|
||||
import Foundation
|
||||
|
||||
public struct ConvertRequestOptions: Sendable {
|
||||
/// 変換リクエストに必要な設定データ
|
||||
///
|
||||
/// - parameters:
|
||||
/// - N_best: 変換候補の数。上位`N`件までの言語モデル上の妥当性を保証します。大きくすると計算量が増加します。
|
||||
/// - requireJapanesePrediction: 日本語の予測変換候補の必要性。`false`にすると、日本語の予測変換候補を出力しなくなります。
|
||||
/// - requireEnglishPrediction: 英語の予測変換候補の必要性。`false`にすると、英語の予測変換候補を出力しなくなります。ローマ字入力を用いた日本語入力では`false`にした方が良いでしょう。
|
||||
/// - keyboardLanguage: キーボードの言語を指定します。
|
||||
/// - englishCandidateInRoman2KanaInput: `true`の場合、日本語ローマ字入力時に英語変換候補を出力します。`false`の場合、ローマ字入力時に英語変換候補を出力しません。
|
||||
/// - fullWidthRomanCandidate: `true`の場合、全角英数字の変換候補が出力に含まれるようになります。
|
||||
/// - halfWidthKanaCandidate: `true`の場合、半角カナの変換候補が出力に含まれるようになります。
|
||||
/// - learningType: 学習モードを指定します。詳しくは`LearningType`を参照してください。
|
||||
/// - maxMemoryCount: 学習が有効な場合に保持するデータの最大数を指定します。`0`の場合`learningType`を`nothing`に指定する方が適切です。
|
||||
/// - shouldResetMemory: `true`の場合、変換を開始する前に学習データをリセットします。
|
||||
/// - dictionaryResourceURL: 内蔵辞書データの読み出し先を指定します。
|
||||
/// - memoryDirectoryURL: 学習データの保存先を指定します。書き込み可能なディレクトリを指定してください。
|
||||
/// - sharedContainerURL: ユーザ辞書など、キーボード外で書き込んだ設定データの保存されているディレクトリを指定します。
|
||||
/// - textReplacer: 予測変換のための置換機を指定します。
|
||||
/// - specialCandidateProviders: 特殊変換を実施する変換関数を挿入します
|
||||
/// - metadata: メタデータを指定します。詳しくは`ConvertRequestOptions.Metadata`を参照してください。
|
||||
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, specialCandidateProviders: [any SpecialCandidateProvider]?, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
|
||||
self.N_best = N_best
|
||||
self.requireJapanesePrediction = requireJapanesePrediction
|
||||
self.requireEnglishPrediction = requireEnglishPrediction
|
||||
self.keyboardLanguage = keyboardLanguage
|
||||
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
|
||||
self.fullWidthRomanCandidate = fullWidthRomanCandidate
|
||||
self.halfWidthKanaCandidate = halfWidthKanaCandidate
|
||||
self.learningType = learningType
|
||||
self.maxMemoryCount = maxMemoryCount
|
||||
self.shouldResetMemory = shouldResetMemory
|
||||
self.memoryDirectoryURL = memoryDirectoryURL
|
||||
self.sharedContainerURL = sharedContainerURL
|
||||
self.metadata = metadata
|
||||
self.textReplacer = textReplacer
|
||||
self.specialCandidateProviders = specialCandidateProviders ?? KanaKanjiConverter.defaultSpecialCandidateProviders
|
||||
self.zenzaiMode = zenzaiMode
|
||||
self.preloadDictionary = preloadDictionary
|
||||
self.dictionaryResourceURL = dictionaryResourceURL
|
||||
}
|
||||
|
||||
/// 変換リクエストに必要な設定データ
|
||||
///
|
||||
/// - parameters:
|
||||
@@ -29,13 +69,26 @@ public struct ConvertRequestOptions: Sendable {
|
||||
/// - sharedContainerURL: ユーザ辞書など、キーボード外で書き込んだ設定データの保存されているディレクトリを指定します。
|
||||
/// - textReplacer: 予測変換のための置換機を指定します。
|
||||
/// - metadata: メタデータを指定します。詳しくは`ConvertRequestOptions.Metadata`を参照してください。
|
||||
@available(*, deprecated, message: "it be removed in AzooKeyKanaKanjiConverter v1.0")
|
||||
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
|
||||
|
||||
var specialCandidateProviders = [any SpecialCandidateProvider]()
|
||||
if typographyLetterCandidate {
|
||||
specialCandidateProviders.append(.typography)
|
||||
}
|
||||
if unicodeCandidate {
|
||||
specialCandidateProviders.append(.unicode)
|
||||
}
|
||||
specialCandidateProviders.append(.emailAddress)
|
||||
specialCandidateProviders.append(.timeExpression)
|
||||
specialCandidateProviders.append(.calendar)
|
||||
specialCandidateProviders.append(.version)
|
||||
|
||||
self.N_best = N_best
|
||||
self.requireJapanesePrediction = requireJapanesePrediction
|
||||
self.requireEnglishPrediction = requireEnglishPrediction
|
||||
self.keyboardLanguage = keyboardLanguage
|
||||
self.typographyLetterCandidate = typographyLetterCandidate
|
||||
self.unicodeCandidate = unicodeCandidate
|
||||
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
|
||||
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
|
||||
self.fullWidthRomanCandidate = fullWidthRomanCandidate
|
||||
self.halfWidthKanaCandidate = halfWidthKanaCandidate
|
||||
@@ -46,6 +99,7 @@ public struct ConvertRequestOptions: Sendable {
|
||||
self.sharedContainerURL = sharedContainerURL
|
||||
self.metadata = metadata
|
||||
self.textReplacer = textReplacer
|
||||
self.specialCandidateProviders = specialCandidateProviders
|
||||
self.zenzaiMode = zenzaiMode
|
||||
self.preloadDictionary = preloadDictionary
|
||||
self.dictionaryResourceURL = dictionaryResourceURL
|
||||
@@ -103,8 +157,6 @@ public struct ConvertRequestOptions: Sendable {
|
||||
public var requireEnglishPrediction: Bool
|
||||
public var keyboardLanguage: KeyboardLanguage
|
||||
// KeyboardSettingのinjection用途
|
||||
public var typographyLetterCandidate: Bool
|
||||
public var unicodeCandidate: Bool
|
||||
public var englishCandidateInRoman2KanaInput: Bool
|
||||
public var fullWidthRomanCandidate: Bool
|
||||
public var halfWidthKanaCandidate: Bool
|
||||
@@ -117,6 +169,8 @@ public struct ConvertRequestOptions: Sendable {
|
||||
public var memoryDirectoryURL: URL
|
||||
public var sharedContainerURL: URL
|
||||
public var dictionaryResourceURL: URL
|
||||
/// providers to generate "special" candidates such as Unicode conversion.
|
||||
public var specialCandidateProviders: [any SpecialCandidateProvider]
|
||||
public var zenzaiMode: ZenzaiMode
|
||||
public var preloadDictionary: Bool
|
||||
// メタデータ
|
||||
@@ -131,11 +185,7 @@ public struct ConvertRequestOptions: Sendable {
|
||||
requireJapanesePrediction: true,
|
||||
requireEnglishPrediction: true,
|
||||
keyboardLanguage: .ja_JP,
|
||||
typographyLetterCandidate: false,
|
||||
unicodeCandidate: true,
|
||||
englishCandidateInRoman2KanaInput: true,
|
||||
fullWidthRomanCandidate: true,
|
||||
halfWidthKanaCandidate: false,
|
||||
learningType: .inputAndOutput,
|
||||
maxMemoryCount: 65536,
|
||||
shouldResetMemory: false,
|
||||
@@ -146,6 +196,7 @@ public struct ConvertRequestOptions: Sendable {
|
||||
// dummy data, won't work
|
||||
sharedContainerURL: Bundle.main.bundleURL,
|
||||
textReplacer: .empty,
|
||||
specialCandidateProviders: nil,
|
||||
preloadDictionary: false,
|
||||
metadata: nil
|
||||
)
|
||||
|
||||
@@ -18,6 +18,13 @@ import EfficientNGram
|
||||
}
|
||||
|
||||
private var converter = Kana2Kanji()
|
||||
nonisolated(unsafe) public static let defaultSpecialCandidateProviders: [any SpecialCandidateProvider] = [
|
||||
CalendarSpecialCandidateProvider(),
|
||||
EmailAddressSpecialCandidateProvider(),
|
||||
UnicodeSpecialCandidateProvider(),
|
||||
VersionSpecialCandidateProvider(),
|
||||
TimeExpressionSpecialCandidateProvider()
|
||||
]
|
||||
@MainActor private var checker = SpellChecker()
|
||||
private var checkerInitialized: [KeyboardLanguage: Bool] = [.none: true, .ja_JP: true]
|
||||
|
||||
@@ -141,23 +148,10 @@ import EfficientNGram
|
||||
/// - string: 入力されたString
|
||||
/// - Returns:
|
||||
/// `賢い変換候補
|
||||
private func getWiseCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
var result = [Candidate]()
|
||||
|
||||
// toWarekiCandidates/toSeirekiCandidatesは以前は設定可能にしていたが、特にoffにする需要がなさそうなので常時有効化した
|
||||
result.append(contentsOf: self.toWarekiCandidates(inputData))
|
||||
result.append(contentsOf: self.toSeirekiCandidates(inputData))
|
||||
result.append(contentsOf: self.toEmailAddressCandidates(inputData))
|
||||
|
||||
if options.typographyLetterCandidate {
|
||||
result.append(contentsOf: self.typographicalCandidates(inputData))
|
||||
private func getSpecialCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
options.specialCandidateProviders.flatMap { provider in
|
||||
provider.provideCandidates(converter: self, inputData: inputData, options: options)
|
||||
}
|
||||
if options.unicodeCandidate {
|
||||
result.append(contentsOf: self.unicodeCandidates(inputData))
|
||||
}
|
||||
result.append(contentsOf: self.toVersionCandidate(inputData, options: options))
|
||||
result.append(contentsOf: self.convertToTimeExpression(inputData))
|
||||
return result
|
||||
}
|
||||
|
||||
/// 変換候補の重複を除去する関数。
|
||||
@@ -563,7 +557,7 @@ import EfficientNGram
|
||||
seenCandidate.formUnion(word_candidates.map {$0.text})
|
||||
|
||||
// 賢く変換するパターン(任意件数)
|
||||
let wise_candidates: [Candidate] = self.getUniqueCandidate(self.getWiseCandidate(inputData, options: options), seenCandidates: seenCandidate)
|
||||
let wise_candidates: [Candidate] = self.getUniqueCandidate(self.getSpecialCandidate(inputData, options: options), seenCandidates: seenCandidate)
|
||||
// 途中でwise_candidatesを挟む
|
||||
word_candidates.insert(contentsOf: wise_candidates, at: min(5, word_candidates.endIndex))
|
||||
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
public protocol SpecialCandidateProvider: Sendable {
|
||||
@MainActor
|
||||
func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate]
|
||||
}
|
||||
|
||||
public struct CalendarSpecialCandidateProvider: SpecialCandidateProvider {
|
||||
public init() {}
|
||||
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
|
||||
converter.toWarekiCandidates(inputData) + converter.toSeirekiCandidates(inputData)
|
||||
}
|
||||
}
|
||||
|
||||
public struct EmailAddressSpecialCandidateProvider: SpecialCandidateProvider {
|
||||
public init() {}
|
||||
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
|
||||
converter.toEmailAddressCandidates(inputData)
|
||||
}
|
||||
}
|
||||
|
||||
public struct TypographySpecialCandidateProvider: SpecialCandidateProvider {
|
||||
public init() {}
|
||||
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
|
||||
converter.typographicalCandidates(inputData)
|
||||
}
|
||||
}
|
||||
|
||||
public struct UnicodeSpecialCandidateProvider: SpecialCandidateProvider {
|
||||
public init() {}
|
||||
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
|
||||
converter.unicodeCandidates(inputData)
|
||||
}
|
||||
}
|
||||
|
||||
public struct VersionSpecialCandidateProvider: SpecialCandidateProvider {
|
||||
public init() {}
|
||||
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
|
||||
converter.toVersionCandidate(inputData, options: options)
|
||||
}
|
||||
}
|
||||
|
||||
public struct TimeExpressionSpecialCandidateProvider: SpecialCandidateProvider {
|
||||
public init() {}
|
||||
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
|
||||
converter.convertToTimeExpression(inputData)
|
||||
}
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == CalendarSpecialCandidateProvider {
|
||||
static var calendar: Self { .init() }
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == EmailAddressSpecialCandidateProvider {
|
||||
static var emailAddress: Self { .init() }
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == TypographySpecialCandidateProvider {
|
||||
static var typography: Self { .init() }
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == UnicodeSpecialCandidateProvider {
|
||||
static var unicode: Self { .init() }
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == VersionSpecialCandidateProvider {
|
||||
static var version: Self { .init() }
|
||||
}
|
||||
|
||||
public extension SpecialCandidateProvider where Self == TimeExpressionSpecialCandidateProvider {
|
||||
static var timeExpression: Self { .init() }
|
||||
}
|
||||
Reference in New Issue
Block a user