mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Upgrade neon version + tests in JS instead of TS.
This commit is contained in:
committed by
Anthony MOI
parent
81bb4f6da3
commit
6f8892e3ae
@ -1,7 +1,5 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
/* eslint-disable @typescript-eslint/no-empty-function */
|
||||
|
||||
const tokenizers = await import("tokenizers");
|
||||
/*eslint-disable no-undef*/
|
||||
const tokenizers = require("tokenizers");
|
||||
|
||||
describe("loadExample", () => {
|
||||
it("", () => {
|
@ -1,5 +1,4 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
/* eslint-disable @typescript-eslint/no-empty-function */
|
||||
/*eslint-disable no-undef*/
|
||||
|
||||
const {
|
||||
Tokenizer,
|
||||
@ -10,11 +9,11 @@ const {
|
||||
decoders,
|
||||
trainers,
|
||||
AddedToken,
|
||||
} = await import("tokenizers");
|
||||
} = require("tokenizers");
|
||||
|
||||
describe("trainExample", () => {
|
||||
it("", () => {
|
||||
const vocab_size = 100;
|
||||
const vocabSize = 100;
|
||||
|
||||
const tokenizer = new Tokenizer(models.BPE.empty());
|
||||
tokenizer.normalizer = normalizers.sequenceNormalizer([
|
||||
@ -26,16 +25,16 @@ describe("trainExample", () => {
|
||||
tokenizer.decoder = decoders.byteLevelDecoder();
|
||||
|
||||
const trainer = trainers.bpeTrainer({
|
||||
vocab_size,
|
||||
min_frequency: 0,
|
||||
special_tokens: [
|
||||
vocabSize,
|
||||
minFrequency: 0,
|
||||
specialTokens: [
|
||||
new AddedToken("<s>", true),
|
||||
new AddedToken("<pad>", true),
|
||||
new AddedToken("</s>", true),
|
||||
new AddedToken("<unk>", true),
|
||||
new AddedToken("<mask>", true),
|
||||
],
|
||||
show_progress: true,
|
||||
showProgress: false,
|
||||
});
|
||||
|
||||
tokenizer.train(trainer, ["data/small.txt"]);
|
4114
bindings/node/package-lock.json
generated
4114
bindings/node/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,59 +1,59 @@
|
||||
{
|
||||
"name": "tokenizers",
|
||||
"version": "0.7.0",
|
||||
"description": "",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/huggingface/tokenizers.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/huggingface/tokenizers/issues"
|
||||
},
|
||||
"homepage": "https://github.com/huggingface/tokenizers/tree/master/bindings/node",
|
||||
"author": "Anthony MOI <m.anthony.moi@gmail.com>",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@types/node": "^13.1.6",
|
||||
"node-pre-gyp": "^0.14.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/jest": "^26.0.7",
|
||||
"@typescript-eslint/eslint-plugin": "^3.7.0",
|
||||
"@typescript-eslint/parser": "^3.7.0",
|
||||
"eslint": "^7.5.0",
|
||||
"eslint-config-prettier": "^6.11.0",
|
||||
"eslint-plugin-jest": "^23.18.0",
|
||||
"eslint-plugin-jsdoc": "^30.0.3",
|
||||
"eslint-plugin-prettier": "^3.1.4",
|
||||
"eslint-plugin-simple-import-sort": "^5.0.3",
|
||||
"jest": "^26.1.0",
|
||||
"neon-cli": "^0.3.3",
|
||||
"prettier": "^2.0.5",
|
||||
"shelljs": "^0.8.3",
|
||||
"ts-jest": "^26.1.3",
|
||||
"typescript": "^3.9.7"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10 < 11 || >=12 <14"
|
||||
},
|
||||
"scripts": {
|
||||
"dev-ts": "rm -rf dist && tsc -p tsconfig.lib.json && rsync -a $(pwd)/lib/bindings/ dist/bindings/",
|
||||
"dev-rs": "neon build",
|
||||
"dev": "npm run dev-rs && npm run dev-ts",
|
||||
"compile": "neon build --release",
|
||||
"clean-rs": "neon clean",
|
||||
"package": "node-pre-gyp package",
|
||||
"test": "jest && cargo test --manifest-path native/Cargo.toml",
|
||||
"lint-check": "eslint --ext .js,.ts lib examples",
|
||||
"lint": "eslint --fix --ext .js,.ts lib examples"
|
||||
},
|
||||
"binary": {
|
||||
"module_name": "index",
|
||||
"module_path": "./bin-package",
|
||||
"host": "https://tokenizers-releases.s3.amazonaws.com/node",
|
||||
"package_name": "{module_name}-v{version}-{node_abi}-{platform}-{arch}-{libc}.tar.gz",
|
||||
"remote_path": "{version}"
|
||||
}
|
||||
"name": "tokenizers",
|
||||
"version": "0.7.0",
|
||||
"description": "",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/huggingface/tokenizers.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/huggingface/tokenizers/issues"
|
||||
},
|
||||
"homepage": "https://github.com/huggingface/tokenizers/tree/master/bindings/node",
|
||||
"author": "Anthony MOI <m.anthony.moi@gmail.com>",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@types/node": "^13.13.21",
|
||||
"node-pre-gyp": "^0.14.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/jest": "^26.0.14",
|
||||
"@typescript-eslint/eslint-plugin": "^3.10.1",
|
||||
"@typescript-eslint/parser": "^3.10.1",
|
||||
"eslint": "^7.10.0",
|
||||
"eslint-config-prettier": "^6.12.0",
|
||||
"eslint-plugin-jest": "^23.20.0",
|
||||
"eslint-plugin-jsdoc": "^30.6.1",
|
||||
"eslint-plugin-prettier": "^3.1.4",
|
||||
"eslint-plugin-simple-import-sort": "^5.0.3",
|
||||
"jest": "^26.4.2",
|
||||
"neon-cli": "^0.4.2",
|
||||
"prettier": "^2.1.2",
|
||||
"shelljs": "^0.8.3",
|
||||
"ts-jest": "^26.4.0",
|
||||
"typescript": "^3.9.7"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10 < 11 || >=12 <14"
|
||||
},
|
||||
"scripts": {
|
||||
"dev-ts": "rm -rf dist && tsc -p tsconfig.lib.json && rsync -a $(pwd)/lib/bindings/ dist/bindings/",
|
||||
"dev-rs": "neon build",
|
||||
"dev": "npm run dev-rs && npm run dev-ts",
|
||||
"compile": "neon build --release",
|
||||
"clean-rs": "neon clean",
|
||||
"package": "node-pre-gyp package",
|
||||
"test": "jest && cargo test --manifest-path native/Cargo.toml",
|
||||
"lint-check": "eslint --ext .js,.ts lib examples",
|
||||
"lint": "eslint --fix --ext .js,.ts lib examples"
|
||||
},
|
||||
"binary": {
|
||||
"module_name": "index",
|
||||
"module_path": "./bin-package",
|
||||
"host": "https://tokenizers-releases.s3.amazonaws.com/node",
|
||||
"package_name": "{module_name}-v{version}-{node_abi}-{platform}-{arch}-{libc}.tar.gz",
|
||||
"remote_path": "{version}"
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user