perf: file I/Oでの遅延をなくすために、事前にキャッシュできるように

This commit is contained in:
fkunn1326
2025-02-26 20:24:27 +09:00
parent 24f506b5b5
commit 39b90389da
3 changed files with 109 additions and 31 deletions

View File

@@ -43,6 +43,10 @@ import EfficientNGram
self.lastData = nil self.lastData = nil
} }
public func preloadLouds(option: ConvertRequestOptions) {
LOUDS.preloadLouds(option: option)
}
private func getZenzaiPersonalization(mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode?) -> (mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode, base: EfficientNGram, personal: EfficientNGram)? { private func getZenzaiPersonalization(mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode?) -> (mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode, base: EfficientNGram, personal: EfficientNGram)? {
guard let mode else { guard let mode else {
return nil return nil

View File

@@ -162,19 +162,19 @@ public final class DicdataStore {
importedLoudses.insert(query) importedLoudses.insert(query)
// ASCII // ASCII
let identifier = [ let identifier = [
#"\n"#: "[0A]", "\\n": "[0A]",
#" "#: "[20]", " ": "[20]",
#"""#: "[22]", "\"": "[22]",
#"'"#: "[27]", "\'": "[27]",
#"*"#: "[2A]", "*": "[2A]",
#"+"#: "[2B]", "+": "[2B]",
#"."#: "[2E]", ".": "[2E]",
#"/"#: "[2F]", "/": "[2F]",
#":"#: "[3A]", ":": "[3A]",
#"<"#: "[3C]", "<": "[3C]",
#">"#: "[3E]", ">": "[3E]",
#"\"#: "[5C]", "\\": "[5C]",
#"|"#: "[7C]", "|": "[7C]",
][query, default: query] ][query, default: query]
if let louds = LOUDS.load(identifier, option: self.requestOptions) { if let louds = LOUDS.load(identifier, option: self.requestOptions) {
self.loudses[query] = louds self.loudses[query] = louds

View File

@@ -10,10 +10,60 @@ import Foundation
import SwiftUtils import SwiftUtils
extension LOUDS { extension LOUDS {
// Cache to store preloaded loudstxt3 data
private static var loudstxt3Cache: [String: Data] = [:]
private static var loudsCache: [String: [UInt64]] = [:]
private static var loudscharsCache: [String: [UInt8]] = [:]
// Call this function at app startup to preload all loudstxt3 files
public static func preloadLouds(option: ConvertRequestOptions) {
guard let fileURLs = try? FileManager.default.contentsOfDirectory(
at: option.dictionaryResourceURL.appendingPathComponent("louds", isDirectory: true),
includingPropertiesForKeys: nil
) else { return }
for url in fileURLs {
let identifier = url.deletingPathExtension().lastPathComponent
let pathext = url.pathExtension
switch pathext {
case "louds":
do {
loudsCache[identifier] = try loadLOUDSBinary(from: url)
} catch {
debug("Failed to preload \(identifier).louds: \(error)")
}
case "loudschars2":
do {
loudscharsCache[identifier] = try Array(Data(contentsOf: url))
} catch {
debug("Failed to preload \(identifier).loudschars2: \(error)")
}
case "loudstxt3":
do {
loudstxt3Cache[identifier] = try Data(contentsOf: url)
} catch {
debug("Failed to preload \(identifier).loudstxt3: \(error)")
}
default:
break
}
}
}
private static func loadLOUDSBinary(from url: URL) -> [UInt64]? { private static func loadLOUDSBinary(from url: URL) -> [UInt64]? {
// Check if the data is already cached
let identifier = url.deletingPathExtension().lastPathComponent
if let cachedData = loudsCache[identifier] {
return cachedData
}
// If not in cache, load from disk
do { do {
let binaryData = try Data(contentsOf: url, options: [.uncached]) // 2 let binaryData = try Data(contentsOf: url, options: [.uncached])
let ui64array = binaryData.toArray(of: UInt64.self) let ui64array = binaryData.toArray(of: UInt64.self)
// Store in cache for future use
loudsCache[identifier] = ui64array
return ui64array return ui64array
} catch { } catch {
debug(error) debug(error)
@@ -22,7 +72,6 @@ extension LOUDS {
} }
private static func getLOUDSURL(_ identifier: String, option: ConvertRequestOptions) -> (chars: URL, louds: URL) { private static func getLOUDSURL(_ identifier: String, option: ConvertRequestOptions) -> (chars: URL, louds: URL) {
if identifier == "user"{ if identifier == "user"{
return ( return (
option.sharedContainerURL.appendingPathComponent("user.loudschars2", isDirectory: false), option.sharedContainerURL.appendingPathComponent("user.loudschars2", isDirectory: false),
@@ -57,11 +106,18 @@ extension LOUDS {
package static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? { package static func load(_ identifier: String, option: ConvertRequestOptions) -> LOUDS? {
let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option) let (charsURL, loudsURL) = getLOUDSURL(identifier, option: option)
let nodeIndex2ID: [UInt8] let nodeIndex2ID: [UInt8]
do { // Check if the data is already cached
nodeIndex2ID = try Array(Data(contentsOf: charsURL, options: [.uncached])) // 2 if let cachedData = loudscharsCache[identifier] {
} catch { nodeIndex2ID = cachedData
debug("Error: \(identifier)に対するLOUDSファイルが存在しません。このエラーは無視できる可能性があります。 Description: \(error)") } else {
return nil do {
nodeIndex2ID = try Array(Data(contentsOf: charsURL, options: [.uncached]))
// Store in cache for future use
loudscharsCache[identifier] = nodeIndex2ID
} catch {
debug("Error: \(identifier)に対するLOUDSファイルが存在しません。このエラーは無視できる可能性があります。 Description: \(error)")
return nil
}
} }
if let bytes = LOUDS.loadLOUDSBinary(from: loudsURL) { if let bytes = LOUDS.loadLOUDSBinary(from: loudsURL) {
@@ -106,11 +162,20 @@ extension LOUDS {
static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [DicdataElement] { static func getDataForLoudstxt3(_ identifier: String, indices: [Int], option: ConvertRequestOptions) -> [DicdataElement] {
let binary: Data let binary: Data
do {
let url = getLoudstxt3URL(identifier, option: option) // Try to get from cache first
binary = try Data(contentsOf: url) if let cachedData = loudstxt3Cache[identifier] {
} catch { binary = cachedData
debug("getDataForLoudstxt3: \(error)") } else {
// Fall back to loading from disk if not in cache
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
// Cache the data for future use
loudstxt3Cache[identifier] = binary
} catch {
debug("getDataForLoudstxt3: \(error)")
}
return [] return []
} }
@@ -129,12 +194,21 @@ extension LOUDS {
/// index /// index
static func getDataForLoudstxt3(_ identifier: String, indices: [(trueIndex: Int, keyIndex: Int)], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] { static func getDataForLoudstxt3(_ identifier: String, indices: [(trueIndex: Int, keyIndex: Int)], option: ConvertRequestOptions) -> [(loudsNodeIndex: Int, dicdata: [DicdataElement])] {
let binary: Data let binary: Data
do {
let url = getLoudstxt3URL(identifier, option: option) // Try to get from cache first
binary = try Data(contentsOf: url) if let cachedData = loudstxt3Cache[identifier] {
} catch { binary = cachedData
debug("getDataForLoudstxt3: \(error)") } else {
return [] // Fall back to loading from disk if not in cache
do {
let url = getLoudstxt3URL(identifier, option: option)
binary = try Data(contentsOf: url)
// Store in cache for future use
loudstxt3Cache[identifier] = binary
} catch {
debug("getDataForLoudstxt3: \(error)")
return []
}
} }
let lc = binary[0..<2].toArray(of: UInt16.self)[0] let lc = binary[0..<2].toArray(of: UInt16.self)[0]