Merge pull request #158 from fkunn1326/perf-windows

perf: ファイルI/Oでの遅延をなくすために、辞書を事前に読み込めるように
This commit is contained in:
Miwa
2025-03-06 19:47:54 +09:00
committed by GitHub
4 changed files with 82 additions and 30 deletions

View File

@@ -29,7 +29,7 @@ public struct ConvertRequestOptions: Sendable {
/// - sharedContainerURL:
/// - textReplacer:
/// - metadata: `ConvertRequestOptions.Metadata`
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, zenzaiMode: ZenzaiMode = .off, metadata: ConvertRequestOptions.Metadata?) {
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
self.N_best = N_best
self.requireJapanesePrediction = requireJapanesePrediction
self.requireEnglishPrediction = requireEnglishPrediction
@@ -47,6 +47,7 @@ public struct ConvertRequestOptions: Sendable {
self.metadata = metadata
self.textReplacer = textReplacer
self.zenzaiMode = zenzaiMode
self.preloadDictionary = preloadDictionary
self.dictionaryResourceURL = dictionaryResourceURL
}
@@ -69,9 +70,10 @@ public struct ConvertRequestOptions: Sendable {
/// - memoryDirectoryURL:
/// - sharedContainerURL:
/// - textReplacer:
/// - preloadDictionary:
/// - metadata: `ConvertRequestOptions.Metadata`
@available(*, deprecated, message: "it be removed in AzooKeyKanaKanjiConverter v1.0")
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, zenzaiMode: ZenzaiMode = .off, metadata: ConvertRequestOptions.Metadata?) {
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
self.init(
N_best: N_best,
requireJapanesePrediction: requireJapanesePrediction,
@@ -91,6 +93,7 @@ public struct ConvertRequestOptions: Sendable {
// MARK: using deprecated initializer here
textReplacer: TextReplacer(),
zenzaiMode: zenzaiMode,
preloadDictionary: preloadDictionary,
metadata: metadata
)
}
@@ -115,6 +118,7 @@ public struct ConvertRequestOptions: Sendable {
public var sharedContainerURL: URL
public var dictionaryResourceURL: URL
public var zenzaiMode: ZenzaiMode
public var preloadDictionary: Bool
//
public var metadata: Metadata?
@@ -142,6 +146,7 @@ public struct ConvertRequestOptions: Sendable {
// dummy data, won't work
sharedContainerURL: Bundle.main.bundleURL,
textReplacer: .empty,
preloadDictionary: false,
metadata: nil
)
}

View File

@@ -26,6 +26,7 @@ public final class DicdataStore {
private var mmValue: [PValue] = []
private var loudses: [String: LOUDS] = [:]
private var loudstxts: [String: Data] = [:]
private var importedLoudses: Set<String> = []
private var charsID: [Character: UInt8] = [:]
private var learningManager = LearningManager()
@@ -68,6 +69,40 @@ public final class DicdataStore {
}
_ = self.loadLOUDS(query: "user")
_ = self.loadLOUDS(query: "memory")
if requestOptions.preloadDictionary {
self.preloadDictionary()
}
}
/// I/O
private func preloadDictionary() {
guard let fileURLs = try? FileManager.default.contentsOfDirectory(
at: self.requestOptions.dictionaryResourceURL.appendingPathComponent("louds", isDirectory: true),
includingPropertiesForKeys: nil
) else { return }
for url in fileURLs {
let identifier = url.deletingPathExtension().lastPathComponent
let pathExt = url.pathExtension
switch pathExt {
case "louds":
// usermemory
if identifier == "user" || identifier == "memory" {
continue
}
loudses[identifier] = LOUDS.load(identifier, option: self.requestOptions)
case "loudstxt3":
if let data = try? Data(contentsOf: url) {
loudstxts[identifier] = data
} else {
debug("Error: Could not load loudstxt3 file at \(url)")
}
default:
continue
}
}
}
public enum Notification {
@@ -162,19 +197,19 @@ public final class DicdataStore {
importedLoudses.insert(query)
// ASCII
let identifier = [
#"\n"#: "[0A]",
#" "#: "[20]",
#"""#: "[22]",
#"'"#: "[27]",
#"*"#: "[2A]",
#"+"#: "[2B]",
#"."#: "[2E]",
#"/"#: "[2F]",
#":"#: "[3A]",
#"<"#: "[3C]",
#">"#: "[3E]",
#"\"#: "[5C]",
#"|"#: "[7C]",
"\\n": "[0A]",
" ": "[20]",
"\"": "[22]",
"\'": "[27]",
"*": "[2A]",
"+": "[2B]",
".": "[2E]",
"/": "[2F]",
":": "[3A]",
"<": "[3C]",
">": "[3E]",
"\\": "[5C]",
"|": "[7C]",
][query, default: query]
if let louds = LOUDS.load(identifier, option: self.requestOptions) {
self.loudses[query] = louds
@@ -217,7 +252,7 @@ public final class DicdataStore {
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [DicdataElement] = []
for (key, value) in dict {
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: self.requestOptions))
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, cache: self.loudstxts[identifier + "\(key)"], option: self.requestOptions))
}
if identifier == "memory" {
data.mutatingForeach {

View File

@@ -104,14 +104,19 @@ extension LOUDS {
}
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [DicdataElement] {
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], cache: Data? = nil, option: ConvertRequestOptions) -> [DicdataElement] {
let binary: Data
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
} catch {
debug("getDataForLoudstxt3: \(error)")
return []
if let cache {
binary = cache
} else {
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
} catch {
debug("getDataForLoudstxt3: \(error)")
return []
}
}
let lc = binary[0..<2].toArray(of: UInt16.self)[0]
@@ -127,14 +132,19 @@ extension LOUDS {
}
/// index
static func getDataForLoudstxt3(_ identifier: String, indices: [(trueIndex: Int, keyIndex: Int)], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
static func getDataForLoudstxt3(_ identifier: String, indices: [(trueIndex: Int, keyIndex: Int)], cache: Data? = nil, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
let binary: Data
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
} catch {
debug("getDataForLoudstxt3: \(error)")
return []
if let cache {
binary = cache
} else {
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
} catch {
debug("getDataForLoudstxt3: \(error)")
return []
}
}
let lc = binary[0..<2].toArray(of: UInt16.self)[0]

View File

@@ -19,6 +19,7 @@ public extension ConvertRequestOptions {
sharedContainerURL: URL,
zenzaiMode: ZenzaiMode = .off,
textReplacer: TextReplacer = .withDefaultEmojiDictionary(),
preloadDictionary: Bool = false,
metadata: ConvertRequestOptions.Metadata?
) -> Self {
#if os(iOS) || os(watchOS) || os(tvOS) || os(visionOS)
@@ -46,6 +47,7 @@ public extension ConvertRequestOptions {
sharedContainerURL: sharedContainerURL,
textReplacer: textReplacer,
zenzaiMode: zenzaiMode,
preloadDictionary: preloadDictionary,
metadata: metadata
)
}