Merge branch 'develop' into feature/improve_convert_graph_impls

This commit is contained in:
Miwa / Ensan
2024-03-16 11:56:56 +09:00
17 changed files with 214 additions and 69 deletions

View File

@@ -0,0 +1,13 @@
import KanaKanjiConverterModuleWithDefaultDictionary
import ArgumentParser
@main
public struct Anco: ParsableCommand {
public static var configuration = CommandConfiguration(
abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter",
subcommands: [Subcommands.Run.self],
defaultSubcommand: Subcommands.Run.self
)
public init() {}
}

View File

@@ -0,0 +1,2 @@
/// namespace for subcommands
enum Subcommands {}

View File

@@ -0,0 +1,50 @@
import KanaKanjiConverterModuleWithDefaultDictionary
import ArgumentParser
import Foundation
extension Subcommands {
struct Run: ParsableCommand {
@Argument(help: "ひらがなで表記された入力")
var input: String = ""
@Option(name: [.customLong("config_n_best")], help: "The parameter n (n best parameter) for internal viterbi search.")
var configNBest: Int = 10
@Option(name: [.customShort("n"), .customLong("top_n")], help: "Display top n candidates.")
var displayTopN: Int = 1
@Flag(name: [.customLong("disable_prediction")], help: "Disable producing prediction candidates.")
var disablePrediction = false
static var configuration = CommandConfiguration(commandName: "run", abstract: "Show help for this utility.")
@MainActor mutating func run() {
let converter = KanaKanjiConverter()
var composingText = ComposingText()
composingText.insertAtCursorPosition(input, inputStyle: .direct)
let result = converter.requestCandidates(composingText, options: requestOptions())
for candidate in result.mainResults.prefix(self.displayTopN) {
print(candidate.text)
}
}
func requestOptions() -> ConvertRequestOptions {
.withDefaultDictionary(
N_best: configNBest,
requireJapanesePrediction: !disablePrediction,
requireEnglishPrediction: false,
keyboardLanguage: .ja_JP,
typographyLetterCandidate: false,
unicodeCandidate: true,
englishCandidateInRoman2KanaInput: true,
fullWidthRomanCandidate: false,
halfWidthKanaCandidate: false,
learningType: .nothing,
maxMemoryCount: 0,
shouldResetMemory: false,
memoryDirectoryURL: URL(fileURLWithPath: ""),
sharedContainerURL: URL(fileURLWithPath: ""),
metadata: .init(appVersionString: "anco")
)
}
}
}

View File

@@ -48,7 +48,7 @@ extension KanaKanjiConverter {
}
var string = string[...]
// drop
guard "ネン" == string.suffix(2) else {
guard string.hasSuffix("ネン") else {
return nil
}
string = string.dropLast(2)

View File

@@ -30,7 +30,7 @@ public final class DicdataStore {
private var charsID: [Character: UInt8] = [:]
private var learningManager = LearningManager()
private var osUserDict: [DicdataElement] = []
private var dynamicUserDict: [DicdataElement] = []
///
/// - TODO: make this value as an option
@@ -71,6 +71,10 @@ public final class DicdataStore {
}
public enum Notification {
/// use `importDynamicUserDict` for data that cannot be obtained statically.
/// - warning: Too many dynamic user dictionary will damage conversion performance, as dynamic user dictionary uses inefficent algorithms for looking up. If your entries can be listed up statically, then use normal user dictionaries.
case importDynamicUserDict([DicdataElement])
@available(*, deprecated, renamed: "importDynamicUserDict", message: "it will be removed in AzooKeyKanaKanjiConverter v1.0")
case importOSUserDict([DicdataElement])
case setRequestOptions(ConvertRequestOptions)
case forgetMemory(Candidate)
@@ -81,8 +85,8 @@ public final class DicdataStore {
switch data {
case .closeKeyboard:
self.closeKeyboard()
case let .importOSUserDict(osUserDict):
self.osUserDict = osUserDict
case .importOSUserDict(let dicdata), .importDynamicUserDict(let dicdata):
self.dynamicUserDict = dicdata
case let .forgetMemory(candidate):
self.learningManager.forgetMemory(data: candidate.data)
// louds
@@ -420,57 +424,29 @@ public final class DicdataStore {
if count == .zero {
return []
}
// 1
if count == 1 {
do {
let csvString = try String(contentsOf: requestOptions.dictionaryResourceURL.appendingPathComponent("p/p_\(key).csv", isDirectory: false), encoding: String.Encoding.utf8)
let csvLines = csvString.split(separator: "\n")
let csvData = csvLines.map {$0.split(separator: ",", omittingEmptySubsequences: false)}
let dicdata: [DicdataElement] = csvData
.map {self.parseLoudstxt2FormattedEntry(from: $0)}
.filter { Self.predictionUsable[$0.rcid] }
return dicdata
} catch {
debug("ファイルが存在しません: \(error)")
return []
}
} else if count == 2 {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map(self.character2charId)
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: 5).prefix(700)
result.append(
contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices))
.filter { Self.predictionUsable[$0.rcid] }
)
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: 5).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs, depth: 5).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
}
return result
// 700
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map(self.character2charId)
// 1, 2depth
let depth = if count == 1 || count == 2 {
5
} else {
var result: [DicdataElement] = []
let first = String(key.first!)
let charIDs = key.map(self.character2charId)
// 700
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs).prefix(700)
result.append(
contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(prefixIndices))
.filter { Self.predictionUsable[$0.rcid] }
)
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(userDictIndices)))
if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
}
return result
Int.max
}
let prefixIndices = self.prefixMatchLOUDS(identifier: first, charIDs: charIDs, depth: depth).prefix(700)
result.append(
contentsOf: self.getDicdataFromLoudstxt3(identifier: first, indices: Set(consume prefixIndices))
.filter { Self.predictionUsable[$0.rcid] }
)
let userDictIndices = self.prefixMatchLOUDS(identifier: "user", charIDs: charIDs, depth: depth).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "user", indices: Set(consume userDictIndices)))
if learningManager.enabled {
let memoryDictIndices = self.prefixMatchLOUDS(identifier: "memory", charIDs: charIDs).prefix(700)
result.append(contentsOf: self.getDicdataFromLoudstxt3(identifier: "memory", indices: Set(consume memoryDictIndices)))
result.append(contentsOf: self.learningManager.temporaryPrefixMatch(charIDs: charIDs))
}
return result
}
private func parseLoudstxt2FormattedEntry(from dataString: [some StringProtocol]) -> DicdataElement {
@@ -634,12 +610,12 @@ public final class DicdataStore {
/// OSruby
func getMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
self.osUserDict.filter {$0.ruby == ruby}
self.dynamicUserDict.filter {$0.ruby == ruby}
}
/// OSruby
func getPrefixMatchOSUserDict(_ ruby: some StringProtocol) -> [DicdataElement] {
self.osUserDict.filter {$0.ruby.hasPrefix(ruby)}
self.dynamicUserDict.filter {$0.ruby.hasPrefix(ruby)}
}
//

View File

@@ -54,7 +54,7 @@ struct LOUDS: Sendable {
}
return flatChar2nodeIndices
}
self.flatChar2nodeIndicesIndex = consume flatChar2nodeIndicesIndex
self.flatChar2nodeIndicesIndex = flatChar2nodeIndicesIndex
var rankLarge: [UInt32] = .init(repeating: 0, count: bytes.count + 1)
rankLarge.withUnsafeMutableBufferPointer { buffer in
@@ -62,7 +62,7 @@ struct LOUDS: Sendable {
buffer[i + 1] = buffer[i] &+ UInt32(Self.unit &- byte.nonzeroBitCount)
}
}
self.rankLarge = consume rankLarge
self.rankLarge = rankLarge
}
/// parentNodeIndex01Index

View File

@@ -53,7 +53,7 @@ public struct TextReplacer: Sendable {
}
}
@available(*, deprecated, renamed: "init(emojiDataProvider:)", message: "init() is depreacted and will be removed in v1.0. Use init(emojiDataProvider:) instead")
@available(*, deprecated, renamed: "init(emojiDataProvider:)", message: "it be removed in AzooKeyKanaKanjiConverter v1.0")
public init() {
self.init {
if #available(iOS 16.4, *) {