refactor: キャッシュしたデータをDicdataStoreに保持するように

This commit is contained in:
fkunn1326
2025-02-27 07:10:44 +09:00
parent 39b90389da
commit d0397c62ca
4 changed files with 56 additions and 84 deletions

View File

@@ -29,7 +29,7 @@ public struct ConvertRequestOptions: Sendable {
/// - sharedContainerURL:
/// - textReplacer:
/// - metadata: `ConvertRequestOptions.Metadata`
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, zenzaiMode: ZenzaiMode = .off, metadata: ConvertRequestOptions.Metadata?) {
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, textReplacer: TextReplacer, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
self.N_best = N_best
self.requireJapanesePrediction = requireJapanesePrediction
self.requireEnglishPrediction = requireEnglishPrediction
@@ -47,6 +47,7 @@ public struct ConvertRequestOptions: Sendable {
self.metadata = metadata
self.textReplacer = textReplacer
self.zenzaiMode = zenzaiMode
self.preloadDictionary = preloadDictionary
self.dictionaryResourceURL = dictionaryResourceURL
}
@@ -69,9 +70,10 @@ public struct ConvertRequestOptions: Sendable {
/// - memoryDirectoryURL:
/// - sharedContainerURL:
/// - textReplacer:
/// - preloadDictionary:
/// - metadata: `ConvertRequestOptions.Metadata`
@available(*, deprecated, message: "it be removed in AzooKeyKanaKanjiConverter v1.0")
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, zenzaiMode: ZenzaiMode = .off, metadata: ConvertRequestOptions.Metadata?) {
public init(N_best: Int = 10, requireJapanesePrediction: Bool, requireEnglishPrediction: Bool, keyboardLanguage: KeyboardLanguage, typographyLetterCandidate: Bool = false, unicodeCandidate: Bool = true, englishCandidateInRoman2KanaInput: Bool = false, fullWidthRomanCandidate: Bool = false, halfWidthKanaCandidate: Bool = false, learningType: LearningType, maxMemoryCount: Int = 65536, shouldResetMemory: Bool = false, dictionaryResourceURL: URL, memoryDirectoryURL: URL, sharedContainerURL: URL, zenzaiMode: ZenzaiMode = .off, preloadDictionary: Bool = false, metadata: ConvertRequestOptions.Metadata?) {
self.init(
N_best: N_best,
requireJapanesePrediction: requireJapanesePrediction,
@@ -91,6 +93,7 @@ public struct ConvertRequestOptions: Sendable {
// MARK: using deprecated initializer here
textReplacer: TextReplacer(),
zenzaiMode: zenzaiMode,
preloadDictionary: preloadDictionary,
metadata: metadata
)
}
@@ -115,6 +118,7 @@ public struct ConvertRequestOptions: Sendable {
public var sharedContainerURL: URL
public var dictionaryResourceURL: URL
public var zenzaiMode: ZenzaiMode
public var preloadDictionary: Bool
//
public var metadata: Metadata?
@@ -142,6 +146,7 @@ public struct ConvertRequestOptions: Sendable {
// dummy data, won't work
sharedContainerURL: Bundle.main.bundleURL,
textReplacer: .empty,
preloadDictionary: false,
metadata: nil
)
}

View File

@@ -43,10 +43,6 @@ import EfficientNGram
self.lastData = nil
}
public func preloadLouds(option: ConvertRequestOptions) {
LOUDS.preloadLouds(option: option)
}
private func getZenzaiPersonalization(mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode?) -> (mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode, base: EfficientNGram, personal: EfficientNGram)? {
guard let mode else {
return nil

View File

@@ -26,6 +26,7 @@ public final class DicdataStore {
private var mmValue: [PValue] = []
private var loudses: [String: LOUDS] = [:]
private var loudstxts: [String: Data] = [:]
private var importedLoudses: Set<String> = []
private var charsID: [Character: UInt8] = [:]
private var learningManager = LearningManager()
@@ -68,6 +69,40 @@ public final class DicdataStore {
}
_ = self.loadLOUDS(query: "user")
_ = self.loadLOUDS(query: "memory")
if requestOptions.preloadDictionary {
self.preloadDictionary()
}
}
/// I/O
private func preloadDictionary() {
guard let fileURLs = try? FileManager.default.contentsOfDirectory(
at: self.requestOptions.dictionaryResourceURL.appendingPathComponent("louds", isDirectory: true),
includingPropertiesForKeys: nil
) else { return }
for url in fileURLs {
let identifier = url.deletingPathExtension().lastPathComponent
let pathExt = url.pathExtension
switch pathExt {
case "louds":
// usermemory
if identifier == "user" || identifier == "memory" {
continue
}
loudses[identifier] = LOUDS.load(identifier, option: self.requestOptions)
case "loudstxt3":
if let data = try? Data(contentsOf: url) {
loudstxts[identifier] = data
} else {
debug("Error: Could not load loudstxt3 file at \(url)")
}
default:
continue
}
}
}
public enum Notification {
@@ -217,7 +252,7 @@ public final class DicdataStore {
let dict = [Int: [Int]].init(grouping: indices, by: {$0 >> 11})
var data: [DicdataElement] = []
for (key, value) in dict {
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, option: self.requestOptions))
data.append(contentsOf: LOUDS.getDataForLoudstxt3(identifier + "\(key)", indices: value.map {$0 & 2047}, cache: self.loudstxts[identifier + "\(key)"], option: self.requestOptions))
}
if identifier == "memory" {
data.mutatingForeach {

View File

@@ -10,60 +10,10 @@ import Foundation
import SwiftUtils
extension LOUDS {
// Cache to store preloaded loudstxt3 data
private static var loudstxt3Cache: [String: Data] = [:]
private static var loudsCache: [String: [UInt64]] = [:]
private static var loudscharsCache: [String: [UInt8]] = [:]
// Call this function at app startup to preload all loudstxt3 files
public static func preloadLouds(option: ConvertRequestOptions) {
guard let fileURLs = try? FileManager.default.contentsOfDirectory(
at: option.dictionaryResourceURL.appendingPathComponent("louds", isDirectory: true),
includingPropertiesForKeys: nil
) else { return }
for url in fileURLs {
let identifier = url.deletingPathExtension().lastPathComponent
let pathext = url.pathExtension
switch pathext {
case "louds":
do {
loudsCache[identifier] = try loadLOUDSBinary(from: url)
} catch {
debug("Failed to preload \(identifier).louds: \(error)")
}
case "loudschars2":
do {
loudscharsCache[identifier] = try Array(Data(contentsOf: url))
} catch {
debug("Failed to preload \(identifier).loudschars2: \(error)")
}
case "loudstxt3":
do {
loudstxt3Cache[identifier] = try Data(contentsOf: url)
} catch {
debug("Failed to preload \(identifier).loudstxt3: \(error)")
}
default:
break
}
}
}
private static func loadLOUDSBinary(from url: URL) -> [UInt64]? {
// Check if the data is already cached
let identifier = url.deletingPathExtension().lastPathComponent
if let cachedData = loudsCache[identifier] {
return cachedData
}
// If not in cache, load from disk
do {
let binaryData = try Data(contentsOf: url, options: [.uncached])
let binaryData = try Data(contentsOf: url, options: [.uncached]) // 2
let ui64array = binaryData.toArray(of: UInt64.self)
// Store in cache for future use
loudsCache[identifier] = ui64array
return ui64array
} catch {
debug(error)
@@ -72,6 +22,7 @@ extension LOUDS {
}
private static func getLOUDSURL(_ identifier: String, option: ConvertRequestOptions) -> (chars: URL, louds: URL) {
if identifier == "user"{
return (
option.sharedContainerURL.appendingPathComponent("user.loudschars2", isDirectory: false),
@@ -106,18 +57,11 @@ extension LOUDS {
package static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option)
let nodeIndex2ID: [UInt8]
// Check if the data is already cached
if let cachedData = loudscharsCache[identifier] {
nodeIndex2ID = cachedData
} else {
do {
nodeIndex2ID = try Array(Data(contentsOf: charsURL, options: [.uncached]))
// Store in cache for future use
loudscharsCache[identifier] = nodeIndex2ID
} catch {
debug("Error: \(identifier)に対するLOUDSファイルが存在しません。このエラーは無視できる可能性があります。 Description: \(error)")
return nil
}
do {
nodeIndex2ID = try Array(Data(contentsOf: charsURL, options: [.uncached])) // 2
} catch {
debug("Error: \(identifier)に対するLOUDSファイルが存在しません。このエラーは無視できる可能性があります。 Description: \(error)")
return nil
}
if let bytes = LOUDS.loadLOUDSBinary(from: loudsURL) {
@@ -160,23 +104,19 @@ extension LOUDS {
}
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [DicdataElement] {
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], cache: Data? = nil, option: ConvertRequestOptions) -> [DicdataElement] {
let binary: Data
// Try to get from cache first
if let cachedData = loudstxt3Cache[identifier] {
if let cachedData = cache {
binary = cachedData
} else {
// Fall back to loading from disk if not in cache
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
// Cache the data for future use
loudstxt3Cache[identifier] = binary
} catch {
debug("getDataForLoudstxt3: \(error)")
return []
}
return []
}
let lc = binary[0..<2].toArray(of: UInt16.self)[0]
@@ -192,19 +132,15 @@ extension LOUDS {
}
/// index
static func getDataForLoudstxt3(_ identifier: String, indices: [(trueIndex: Int, keyIndex: Int)], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
static func getDataForLoudstxt3(_ identifier: String, indices: [(trueIndex: Int, keyIndex: Int)], cache: Data? = nil, option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
let binary: Data
// Try to get from cache first
if let cachedData = loudstxt3Cache[identifier] {
if let cachedData = cache {
binary = cachedData
} else {
// Fall back to loading from disk if not in cache
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
// Store in cache for future use
loudstxt3Cache[identifier] = binary
} catch {
debug("getDataForLoudstxt3: \(error)")
return []