Merge pull request #183 from azooKey/feat/special_candidate_providers

feat: 特殊な変換のプロトコルを定め、外から挿入できるようにした
This commit is contained in:
Miwa
2025-05-25 18:28:45 +09:00
committed by GitHub
4 changed files with 141 additions and 27 deletions

View File

@@ -106,8 +106,6 @@ extension Subcommands.Dict {
requireJapanesePrediction: false,
requireEnglishPrediction: false,
keyboardLanguage: .ja_JP,
typographyLetterCandidate: false,
unicodeCandidate: true,
englishCandidateInRoman2KanaInput: true,
fullWidthRomanCandidate: false,
halfWidthKanaCandidate: false,
@@ -117,6 +115,7 @@ extension Subcommands.Dict {
memoryDirectoryURL: URL(fileURLWithPath: self.dictionaryDirectory),
sharedContainerURL: URL(fileURLWithPath: self.dictionaryDirectory),
textReplacer: .empty,
specialCandidateProviders: nil,
metadata: .init(versionString: "anco for debugging")
)
}

View File

@@ -9,6 +9,46 @@
import Foundation
public struct ConvertRequestOptions: Sendable {
///
///
/// - parameters:
/// - N_best: `N`
/// - requireJapanesePrediction: `false`
/// - requireEnglishPrediction: `false``false`
/// - keyboardLanguage:
/// - englishCandidateInRoman2KanaInput: `true``false`
/// - fullWidthRomanCandidate: `true`
/// - halfWidthKanaCandidate: `true`
/// - learningType: `LearningType`
/// - maxMemoryCount: `0``learningType``nothing`
/// - shouldResetMemory: `true`
/// - dictionaryResourceURL:
/// - memoryDirectoryURL:
/// - sharedContainerURL:
/// - textReplacer:
/// - specialCandidateProviders:
/// - metadata: `ConvertRequestOptions.Metadata`
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, specialCandidateProviders: [any SpecialCandidateProvider]?, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
self.N_best = N_best
self.requireJapanesePrediction = requireJapanesePrediction
self.requireEnglishPrediction = requireEnglishPrediction
self.keyboardLanguage = keyboardLanguage
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
self.fullWidthRomanCandidate = fullWidthRomanCandidate
self.halfWidthKanaCandidate = halfWidthKanaCandidate
self.learningType = learningType
self.maxMemoryCount = maxMemoryCount
self.shouldResetMemory = shouldResetMemory
self.memoryDirectoryURL = memoryDirectoryURL
self.sharedContainerURL = sharedContainerURL
self.metadata = metadata
self.textReplacer = textReplacer
self.specialCandidateProviders = specialCandidateProviders ?? KanaKanjiConverter.defaultSpecialCandidateProviders
self.zenzaiMode = zenzaiMode
self.preloadDictionary = preloadDictionary
self.dictionaryResourceURL = dictionaryResourceURL
}
///
///
/// - parameters:
@@ -29,13 +69,26 @@ public struct ConvertRequestOptions: Sendable {
/// - sharedContainerURL:
/// - textReplacer:
/// - metadata: `ConvertRequestOptions.Metadata`
@available(*, deprecated, message: "it be removed in AzooKeyKanaKanjiConverter v1.0")
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
var specialCandidateProviders = [any SpecialCandidateProvider]()
if typographyLetterCandidate {
specialCandidateProviders.append(.typography)
}
if unicodeCandidate {
specialCandidateProviders.append(.unicode)
}
specialCandidateProviders.append(.emailAddress)
specialCandidateProviders.append(.timeExpression)
specialCandidateProviders.append(.calendar)
specialCandidateProviders.append(.version)
self.N_best = N_best
self.requireJapanesePrediction = requireJapanesePrediction
self.requireEnglishPrediction = requireEnglishPrediction
self.keyboardLanguage = keyboardLanguage
self.typographyLetterCandidate = typographyLetterCandidate
self.unicodeCandidate = unicodeCandidate
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
self.englishCandidateInRoman2KanaInput = englishCandidateInRoman2KanaInput
self.fullWidthRomanCandidate = fullWidthRomanCandidate
self.halfWidthKanaCandidate = halfWidthKanaCandidate
@@ -46,6 +99,7 @@ public struct ConvertRequestOptions: Sendable {
self.sharedContainerURL = sharedContainerURL
self.metadata = metadata
self.textReplacer = textReplacer
self.specialCandidateProviders = specialCandidateProviders
self.zenzaiMode = zenzaiMode
self.preloadDictionary = preloadDictionary
self.dictionaryResourceURL = dictionaryResourceURL
@@ -103,8 +157,6 @@ public struct ConvertRequestOptions: Sendable {
public var requireEnglishPrediction: Bool
public var keyboardLanguage: KeyboardLanguage
// KeyboardSettinginjection
public var typographyLetterCandidate: Bool
public var unicodeCandidate: Bool
public var englishCandidateInRoman2KanaInput: Bool
public var fullWidthRomanCandidate: Bool
public var halfWidthKanaCandidate: Bool
@@ -117,6 +169,8 @@ public struct ConvertRequestOptions: Sendable {
public var memoryDirectoryURL: URL
public var sharedContainerURL: URL
public var dictionaryResourceURL: URL
/// providers to generate "special" candidates such as Unicode conversion.
public var specialCandidateProviders: [any SpecialCandidateProvider]
public var zenzaiMode: ZenzaiMode
public var preloadDictionary: Bool
//
@@ -131,11 +185,7 @@ public struct ConvertRequestOptions: Sendable {
requireJapanesePrediction: true,
requireEnglishPrediction: true,
keyboardLanguage: .ja_JP,
typographyLetterCandidate: false,
unicodeCandidate: true,
englishCandidateInRoman2KanaInput: true,
fullWidthRomanCandidate: true,
halfWidthKanaCandidate: false,
learningType: .inputAndOutput,
maxMemoryCount: 65536,
shouldResetMemory: false,
@@ -146,6 +196,7 @@ public struct ConvertRequestOptions: Sendable {
// dummy data, won't work
sharedContainerURL: Bundle.main.bundleURL,
textReplacer: .empty,
specialCandidateProviders: nil,
preloadDictionary: false,
metadata: nil
)

View File

@@ -18,6 +18,13 @@ import EfficientNGram
}
private var converter = Kana2Kanji()
nonisolated(unsafe) public static let defaultSpecialCandidateProviders: [any SpecialCandidateProvider] = [
CalendarSpecialCandidateProvider(),
EmailAddressSpecialCandidateProvider(),
UnicodeSpecialCandidateProvider(),
VersionSpecialCandidateProvider(),
TimeExpressionSpecialCandidateProvider()
]
@MainActor private var checker = SpellChecker()
private var checkerInitialized: [KeyboardLanguage: Bool] = [.none: true, .ja_JP: true]
@@ -141,23 +148,10 @@ import EfficientNGram
/// - string: String
/// - Returns:
/// `
private func getWiseCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
var result = [Candidate]()
// toWarekiCandidates/toSeirekiCandidatesoff
result.append(contentsOf: self.toWarekiCandidates(inputData))
result.append(contentsOf: self.toSeirekiCandidates(inputData))
result.append(contentsOf: self.toEmailAddressCandidates(inputData))
if options.typographyLetterCandidate {
result.append(contentsOf: self.typographicalCandidates(inputData))
private func getSpecialCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
options.specialCandidateProviders.flatMap { provider in
provider.provideCandidates(converter: self, inputData: inputData, options: options)
}
if options.unicodeCandidate {
result.append(contentsOf: self.unicodeCandidates(inputData))
}
result.append(contentsOf: self.toVersionCandidate(inputData, options: options))
result.append(contentsOf: self.convertToTimeExpression(inputData))
return result
}
///
@@ -563,7 +557,7 @@ import EfficientNGram
seenCandidate.formUnion(word_candidates.map {$0.text})
//
let wise_candidates: [Candidate] = self.getUniqueCandidate(self.getWiseCandidate(inputData, options: options), seenCandidates: seenCandidate)
let wise_candidates: [Candidate] = self.getUniqueCandidate(self.getSpecialCandidate(inputData, options: options), seenCandidates: seenCandidate)
// wise_candidates
word_candidates.insert(contentsOf: wise_candidates, at: min(5, word_candidates.endIndex))

View File

@@ -0,0 +1,70 @@
public protocol SpecialCandidateProvider: Sendable {
@MainActor
func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate]
}
public struct CalendarSpecialCandidateProvider: SpecialCandidateProvider {
public init() {}
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
converter.toWarekiCandidates(inputData) + converter.toSeirekiCandidates(inputData)
}
}
public struct EmailAddressSpecialCandidateProvider: SpecialCandidateProvider {
public init() {}
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
converter.toEmailAddressCandidates(inputData)
}
}
public struct TypographySpecialCandidateProvider: SpecialCandidateProvider {
public init() {}
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
converter.typographicalCandidates(inputData)
}
}
public struct UnicodeSpecialCandidateProvider: SpecialCandidateProvider {
public init() {}
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
converter.unicodeCandidates(inputData)
}
}
public struct VersionSpecialCandidateProvider: SpecialCandidateProvider {
public init() {}
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
converter.toVersionCandidate(inputData, options: options)
}
}
public struct TimeExpressionSpecialCandidateProvider: SpecialCandidateProvider {
public init() {}
@MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
converter.convertToTimeExpression(inputData)
}
}
public extension SpecialCandidateProvider where Self == CalendarSpecialCandidateProvider {
static var calendar: Self { .init() }
}
public extension SpecialCandidateProvider where Self == EmailAddressSpecialCandidateProvider {
static var emailAddress: Self { .init() }
}
public extension SpecialCandidateProvider where Self == TypographySpecialCandidateProvider {
static var typography: Self { .init() }
}
public extension SpecialCandidateProvider where Self == UnicodeSpecialCandidateProvider {
static var unicode: Self { .init() }
}
public extension SpecialCandidateProvider where Self == VersionSpecialCandidateProvider {
static var version: Self { .init() }
}
public extension SpecialCandidateProvider where Self == TimeExpressionSpecialCandidateProvider {
static var timeExpression: Self { .init() }
}