mirror of
https://github.com/mii443/AzooKeyKanaKanjiConverter.git
synced 2025-12-03 02:58:27 +00:00
ngl
This commit is contained in:
@@ -5,15 +5,15 @@ import EfficientNGram
|
||||
@MainActor package final class Zenz {
|
||||
package var resourceURL: URL
|
||||
private var zenzContext: ZenzContext?
|
||||
init(resourceURL: URL) throws {
|
||||
init(resourceURL: URL, ngl: Int = 0) throws {
|
||||
self.resourceURL = resourceURL
|
||||
do {
|
||||
#if canImport(Darwin)
|
||||
if #available(iOS 16, macOS 13, *) {
|
||||
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path(percentEncoded: false))
|
||||
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path(percentEncoded: false), ngl: ngl)
|
||||
} else {
|
||||
// this is not percent-encoded
|
||||
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path)
|
||||
self.zenzContext = try ZenzContext.createContext(path: resourceURL.path, ngl: ngl)
|
||||
}
|
||||
#else
|
||||
// this is not percent-encoded
|
||||
|
||||
@@ -101,10 +101,11 @@ final class ZenzContext {
|
||||
return ctx_params
|
||||
}
|
||||
|
||||
static func createContext(path: String) throws -> ZenzContext {
|
||||
static func createContext(path: String, ngl: Int = 0) throws -> ZenzContext {
|
||||
llama_backend_init()
|
||||
var model_params = llama_model_default_params()
|
||||
model_params.use_mmap = true
|
||||
model_params.n_gpu_layers = Int32(ngl)
|
||||
let model = llama_model_load_from_file(path, model_params)
|
||||
guard let model else {
|
||||
debug("Could not load model at \(path)")
|
||||
|
||||
@@ -31,6 +31,7 @@ package func llama_backend_free() {}
|
||||
|
||||
package struct llama_model_params {
|
||||
package var use_mmap: Bool
|
||||
package var n_gpu_layers: Int32
|
||||
}
|
||||
package func llama_model_default_params() -> llama_model_params { unimplemented() }
|
||||
|
||||
|
||||
@@ -286,17 +286,19 @@ public struct ConvertRequestOptions: Sendable {
|
||||
|
||||
public struct ZenzaiMode: Sendable, Equatable {
|
||||
public struct PersonalizationMode: Sendable, Equatable {
|
||||
public init(baseNgramLanguageModel: String, personalNgramLanguageModel: String, n: Int = 5, d: Double = 0.75, alpha: Float = 0.5) {
|
||||
public init(baseNgramLanguageModel: String, personalNgramLanguageModel: String, n: Int = 5, d: Double = 0.75, alpha: Float = 0.5, ngl: Int = 0) {
|
||||
self.baseNgramLanguageModel = baseNgramLanguageModel
|
||||
self.personalNgramLanguageModel = personalNgramLanguageModel
|
||||
self.n = n
|
||||
self.d = d
|
||||
self.alpha = alpha
|
||||
self.ngl = ngl
|
||||
}
|
||||
|
||||
var n: Int = 5
|
||||
var d: Double = 0.75
|
||||
var alpha: Float = 0.5
|
||||
var ngl: Int = 0
|
||||
var baseNgramLanguageModel: String
|
||||
var personalNgramLanguageModel: String
|
||||
}
|
||||
@@ -305,24 +307,27 @@ public struct ConvertRequestOptions: Sendable {
|
||||
weightURL: URL(fileURLWithPath: ""),
|
||||
inferenceLimit: 10,
|
||||
requestRichCandidates: false,
|
||||
versionDependentMode: .v3(.init())
|
||||
versionDependentMode: .v3(.init()),
|
||||
ngl: 0
|
||||
)
|
||||
|
||||
/// activate *Zenzai* - Neural Kana-Kanji Conversiion Engine
|
||||
/// - Parameters:
|
||||
/// - weight: path for model weight (gguf)
|
||||
/// - inferenceLimit: applying inference count limitation. Smaller limit makes conversion faster but quality will be worse. (Default: 10)
|
||||
/// - requestRichCandidates: when this flag is true, the converter spends more time but generate richer N-Best candidates for candidate list view. Usually this option is not recommended for live conversion.
|
||||
/// - personalizationMode: values for personalization.
|
||||
/// - versionDependentMode: specify zenz model version and its configuration.
|
||||
public static func on(weight: URL, inferenceLimit: Int = 10, requestRichCandidates: Bool = false, personalizationMode: PersonalizationMode?, versionDependentMode: ZenzaiVersionDependentMode = .v3(.init())) -> Self {
|
||||
/// - weight: path for model weight (gguf)
|
||||
/// - inferenceLimit: applying inference count limitation. Smaller limit makes conversion faster but quality will be worse. (Default: 10)
|
||||
/// - requestRichCandidates: when this flag is true, the converter spends more time but generate richer N-Best candidates for candidate list view. Usually this option is not recommended for live conversion.
|
||||
/// - personalizationMode: values for personalization.
|
||||
/// - versionDependentMode: specify zenz model version and its configuration.
|
||||
/// - ngl: number of layers to offload to GPU. If 0, all computation will be done on CPU.
|
||||
public static func on(weight: URL, inferenceLimit: Int = 10, requestRichCandidates: Bool = false, personalizationMode: PersonalizationMode?, versionDependentMode: ZenzaiVersionDependentMode = .v3(.init()), ngl: Int = 0) -> Self {
|
||||
ZenzaiMode(
|
||||
enabled: true,
|
||||
weightURL: weight,
|
||||
inferenceLimit: inferenceLimit,
|
||||
requestRichCandidates: requestRichCandidates,
|
||||
personalizationMode: personalizationMode,
|
||||
versionDependentMode: versionDependentMode
|
||||
versionDependentMode: versionDependentMode,
|
||||
ngl: ngl
|
||||
)
|
||||
}
|
||||
var enabled: Bool
|
||||
@@ -331,5 +336,6 @@ public struct ConvertRequestOptions: Sendable {
|
||||
var requestRichCandidates: Bool
|
||||
var personalizationMode: PersonalizationMode?
|
||||
var versionDependentMode: ZenzaiVersionDependentMode
|
||||
var ngl: Int
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,13 +68,13 @@ import EfficientNGram
|
||||
return (mode, baseModel, personalModel)
|
||||
}
|
||||
|
||||
package func getModel(modelURL: URL) -> Zenz? {
|
||||
package func getModel(modelURL: URL, ngl: Int = 0) -> Zenz? {
|
||||
if let model = self.zenz, model.resourceURL == modelURL {
|
||||
self.zenzStatus = "load \(modelURL.absoluteString)"
|
||||
return model
|
||||
} else {
|
||||
do {
|
||||
self.zenz = try Zenz(resourceURL: modelURL)
|
||||
self.zenz = try Zenz(resourceURL: modelURL, ngl: ngl)
|
||||
self.zenzStatus = "load \(modelURL.absoluteString)"
|
||||
return self.zenz
|
||||
} catch {
|
||||
@@ -85,7 +85,7 @@ import EfficientNGram
|
||||
}
|
||||
|
||||
public func predictNextCharacter(leftSideContext: String, count: Int, options: ConvertRequestOptions) -> [(character: Character, value: Float)] {
|
||||
guard let zenz = self.getModel(modelURL: options.zenzaiMode.weightURL) else {
|
||||
guard let zenz = self.getModel(modelURL: options.zenzaiMode.weightURL, ngl: options.zenzaiMode.ngl) else {
|
||||
print("zenz-v2 model unavailable")
|
||||
return []
|
||||
}
|
||||
@@ -613,7 +613,7 @@ import EfficientNGram
|
||||
}
|
||||
|
||||
// FIXME: enable cache based zenzai
|
||||
if zenzaiMode.enabled, let model = self.getModel(modelURL: zenzaiMode.weightURL) {
|
||||
if zenzaiMode.enabled, let model = self.getModel(modelURL: zenzaiMode.weightURL, ngl: zenzaiMode.ngl) {
|
||||
let (result, nodes, cache) = self.converter.all_zenzai(
|
||||
inputData,
|
||||
zenz: model,
|
||||
|
||||
Reference in New Issue
Block a user