This commit is contained in:
mii443
2025-07-24 20:00:52 +09:00
parent d8b06e9367
commit d0302f105d
5 changed files with 25 additions and 17 deletions

View File

@@ -5,15 +5,15 @@ import EfficientNGram
@MainActor package final class Zenz {
package var resourceURL: URL
private var zenzContext: ZenzContext?
init(resourceURL: URL) throws {
init(resourceURL: URL, ngl: Int = 0) throws {
self.resourceURL = resourceURL
do {
#if canImport(Darwin)
if #available(iOS 16, macOS 13, *) {
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path(percentEncoded: false))
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path(percentEncoded: false), ngl: ngl)
} else {
// this is not percent-encoded
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path)
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path, ngl: ngl)
}
#else
// this is not percent-encoded

View File

@@ -101,10 +101,11 @@ final class ZenzContext {
return ctx_params
}
static func createContext(path: String) throws -> ZenzContext {
static func createContext(path: String, ngl: Int = 0) throws -> ZenzContext {
llama_backend_init()
var model_params = llama_model_default_params()
model_params.use_mmap = true
model_params.n_gpu_layers = Int32(ngl)
let model = llama_model_load_from_file(path, model_params)
guard let model else {
debug("Could not load model at \(path)")

View File

@@ -31,6 +31,7 @@ package func llama_backend_free() {}
package struct llama_model_params {
package var use_mmap: Bool
package var n_gpu_layers: Int32
}
package func llama_model_default_params() -> llama_model_params { unimplemented() }

View File

@@ -286,17 +286,19 @@ public struct ConvertRequestOptions: Sendable {
public struct ZenzaiMode: Sendable, Equatable {
public struct PersonalizationMode: Sendable, Equatable {
public init(baseNgramLanguageModel: String, personalNgramLanguageModel: String, n: Int = 5, d: Double = 0.75, alpha: Float = 0.5) {
public init(baseNgramLanguageModel: String, personalNgramLanguageModel: String, n: Int = 5, d: Double = 0.75, alpha: Float = 0.5, ngl: Int = 0) {
self.baseNgramLanguageModel = baseNgramLanguageModel
self.personalNgramLanguageModel = personalNgramLanguageModel
self.n = n
self.d = d
self.alpha = alpha
self.ngl = ngl
}
var n: Int = 5
var d: Double = 0.75
var alpha: Float = 0.5
var ngl: Int = 0
var baseNgramLanguageModel: String
var personalNgramLanguageModel: String
}
@@ -305,24 +307,27 @@ public struct ConvertRequestOptions: Sendable {
weightURL: URL(fileURLWithPath: ""),
inferenceLimit: 10,
requestRichCandidates: false,
versionDependentMode: .v3(.init())
versionDependentMode: .v3(.init()),
ngl: 0
)
/// activate *Zenzai* - Neural Kana-Kanji Conversiion Engine
/// - Parameters:
/// - weight: path for model weight (gguf)
/// - inferenceLimit: applying inference count limitation. Smaller limit makes conversion faster but quality will be worse. (Default: 10)
/// - requestRichCandidates: when this flag is true, the converter spends more time but generate richer N-Best candidates for candidate list view. Usually this option is not recommended for live conversion.
/// - personalizationMode: values for personalization.
/// - versionDependentMode: specify zenz model version and its configuration.
public static func on(weight: URL, inferenceLimit: Int = 10, requestRichCandidates: Bool = false, personalizationMode: PersonalizationMode?, versionDependentMode: ZenzaiVersionDependentMode = .v3(.init())) -> Self {
/// - weight: path for model weight (gguf)
/// - inferenceLimit: applying inference count limitation. Smaller limit makes conversion faster but quality will be worse. (Default: 10)
/// - requestRichCandidates: when this flag is true, the converter spends more time but generate richer N-Best candidates for candidate list view. Usually this option is not recommended for live conversion.
/// - personalizationMode: values for personalization.
/// - versionDependentMode: specify zenz model version and its configuration.
/// - ngl: number of layers to offload to GPU. If 0, all computation will be done on CPU.
public static func on(weight: URL, inferenceLimit: Int = 10, requestRichCandidates: Bool = false, personalizationMode: PersonalizationMode?, versionDependentMode: ZenzaiVersionDependentMode = .v3(.init()), ngl: Int = 0) -> Self {
ZenzaiMode(
enabled: true,
weightURL: weight,
inferenceLimit: inferenceLimit,
requestRichCandidates: requestRichCandidates,
personalizationMode: personalizationMode,
versionDependentMode: versionDependentMode
versionDependentMode: versionDependentMode,
ngl: ngl
)
}
var enabled: Bool
@@ -331,5 +336,6 @@ public struct ConvertRequestOptions: Sendable {
var requestRichCandidates: Bool
var personalizationMode: PersonalizationMode?
var versionDependentMode: ZenzaiVersionDependentMode
var ngl: Int
}
}

View File

@@ -68,13 +68,13 @@ import EfficientNGram
return (mode, baseModel, personalModel)
}
package func getModel(modelURL: URL) -> Zenz? {
package func getModel(modelURL: URL, ngl: Int = 0) -> Zenz? {
if let model = self.zenz, model.resourceURL == modelURL {
self.zenzStatus = "load \(modelURL.absoluteString)"
return model
} else {
do {
self.zenz = try Zenz(resourceURL: modelURL)
self.zenz = try Zenz(resourceURL: modelURL, ngl: ngl)
self.zenzStatus = "load \(modelURL.absoluteString)"
return self.zenz
} catch {
@@ -85,7 +85,7 @@ import EfficientNGram
}
public func predictNextCharacter(leftSideContext: String, count: Int, options: ConvertRequestOptions) -> [(character: Character, value: Float)] {
guard let zenz = self.getModel(modelURL: options.zenzaiMode.weightURL) else {
guard let zenz = self.getModel(modelURL: options.zenzaiMode.weightURL, ngl: options.zenzaiMode.ngl) else {
print("zenz-v2 model unavailable")
return []
}
@@ -613,7 +613,7 @@ import EfficientNGram
}
// FIXME: enable cache based zenzai
if zenzaiMode.enabled, let model = self.getModel(modelURL: zenzaiMode.weightURL) {
if zenzaiMode.enabled, let model = self.getModel(modelURL: zenzaiMode.weightURL, ngl: zenzaiMode.ngl) {
let (result, nodes, cache) = self.converter.all_zenzai(
inputData,
zenz: model,