This commit is contained in:
mii443
2025-07-24 21:05:32 +09:00
parent d0302f105d
commit 0e605ffc6c
3 changed files with 38 additions and 3 deletions

View File

@@ -4,9 +4,13 @@ import EfficientNGram
@MainActor package final class Zenz {
package var resourceURL: URL
package let ngl: Int
private var zenzContext: ZenzContext?
init(resourceURL: URL, ngl: Int = 0) throws {
self.resourceURL = resourceURL
self.ngl = ngl
print("[Zenz] init called with ngl: \(ngl)")
debug("Zenz.init called with ngl: \(ngl)")
do {
#if canImport(Darwin)
if #available(iOS 16, macOS 13, *) {
@@ -17,8 +21,9 @@ import EfficientNGram
}
#else
// this is not percent-encoded
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path)
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path, ngl: ngl)
#endif
print("[Zenz] Loaded model \(resourceURL.lastPathComponent)")
debug("Loaded model \(resourceURL.lastPathComponent)")
} catch {
throw error

View File

@@ -102,15 +102,39 @@ final class ZenzContext {
}
static func createContext(path: String, ngl: Int = 0) throws -> ZenzContext {
print("[ZenzContext] createContext called with ngl: \(ngl)")
debug("ZenzContext.createContext called with ngl: \(ngl)")
llama_backend_init()
var model_params = llama_model_default_params()
model_params.use_mmap = true
print("[ZenzContext] Default model_params.n_gpu_layers: \(model_params.n_gpu_layers)")
debug("Default model_params.n_gpu_layers: \(model_params.n_gpu_layers)")
// IMPORTANT: Set fields in the same order as C struct definition
// model_params.devices is already NULL by default
model_params.n_gpu_layers = Int32(ngl)
// model_params.split_mode = 0 // LLAMA_SPLIT_MODE_NONE (should be default)
// model_params.main_gpu = 0 // Use first GPU (should be default)
// model_params.tensor_split = nil // NULL (should be default)
model_params.use_mmap = true
if ngl > 0 {
print("[ZenzContext] Requesting \(ngl) layers to be offloaded to GPU")
debug("Requesting \(ngl) layers to be offloaded to GPU")
}
print("[ZenzContext] After setting, model_params.n_gpu_layers: \(model_params.n_gpu_layers)")
debug("After setting, model_params.n_gpu_layers: \(model_params.n_gpu_layers)")
print("[ZenzContext] Loading model from: \(path)")
debug("Loading model from: \(path)")
// Try to verify the struct layout
withUnsafePointer(to: &model_params) { ptr in
debug("model_params address: \(ptr)")
}
let model = llama_model_load_from_file(path, model_params)
guard let model else {
debug("Could not load model at \(path)")
throw ZenzError.couldNotLoadModel(path: path)
}
debug("Model loaded successfully")
let context = llama_init_from_model(model, ctx_params)
guard let context else {

View File

@@ -69,11 +69,17 @@ import EfficientNGram
}
package func getModel(modelURL: URL, ngl: Int = 0) -> Zenz? {
if let model = self.zenz, model.resourceURL == modelURL {
print("[KanaKanjiConverter] getModel called with ngl: \(ngl)")
debug("KanaKanjiConverter.getModel called with ngl: \(ngl)")
if let model = self.zenz, model.resourceURL == modelURL && model.ngl == ngl {
self.zenzStatus = "load \(modelURL.absoluteString)"
print("[KanaKanjiConverter] Returning cached model with matching ngl: \(ngl)")
debug("Returning cached model with matching ngl: \(ngl)")
return model
} else {
do {
print("[KanaKanjiConverter] Creating new model with ngl: \(ngl)")
debug("Creating new model with ngl: \(ngl)")
self.zenz = try Zenz(resourceURL: modelURL, ngl: ngl)
self.zenzStatus = "load \(modelURL.absoluteString)"
return self.zenz