mirror of
https://github.com/mii443/tokenizers.git
synced 2025-09-04 08:19:21 +00:00
Node - Tokenizer can be trained
This commit is contained in:
@ -1,7 +1,8 @@
|
|||||||
extern crate tokenizers as tk;
|
extern crate tokenizers as tk;
|
||||||
|
|
||||||
use crate::models::*;
|
use crate::models::JsModel;
|
||||||
use crate::tasks::tokenizer::{EncodeTask, WorkingTokenizer};
|
use crate::tasks::tokenizer::{EncodeTask, WorkingTokenizer};
|
||||||
|
use crate::trainers::JsTrainer;
|
||||||
use neon::prelude::*;
|
use neon::prelude::*;
|
||||||
|
|
||||||
/// Tokenizer
|
/// Tokenizer
|
||||||
@ -293,7 +294,22 @@ declare_types! {
|
|||||||
}
|
}
|
||||||
|
|
||||||
method train(mut cx) {
|
method train(mut cx) {
|
||||||
unimplemented!()
|
// train(trainer: JsTrainer, files: string[])
|
||||||
|
|
||||||
|
let trainer = cx.argument::<JsTrainer>(0)?;
|
||||||
|
let files = cx.argument::<JsArray>(1)?.to_vec(&mut cx)?.into_iter().map(|file| {
|
||||||
|
Ok(file.downcast::<JsString>().or_throw(&mut cx)?.value())
|
||||||
|
}).collect::<NeonResult<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let mut this = cx.this();
|
||||||
|
let guard = cx.lock();
|
||||||
|
let res = trainer.borrow(&guard).trainer.execute(|trainer| {
|
||||||
|
let res = this.borrow_mut(&guard).tokenizer.train(trainer.unwrap(), files);
|
||||||
|
res
|
||||||
|
});
|
||||||
|
res.map_err(|e| cx.throw_error::<_, ()>(format!("{}", e)).unwrap_err())?;
|
||||||
|
|
||||||
|
Ok(cx.undefined().upcast())
|
||||||
}
|
}
|
||||||
|
|
||||||
method getModel(mut cx) {
|
method getModel(mut cx) {
|
||||||
|
@ -6,7 +6,7 @@ use std::collections::HashSet;
|
|||||||
|
|
||||||
/// Trainer
|
/// Trainer
|
||||||
pub struct Trainer {
|
pub struct Trainer {
|
||||||
pub trainer: Container<dyn tk::tokenizer::Trainer + Sync>,
|
pub trainer: Container<dyn tk::tokenizer::Trainer>,
|
||||||
}
|
}
|
||||||
|
|
||||||
declare_types! {
|
declare_types! {
|
||||||
|
Reference in New Issue
Block a user