mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-03 11:18:29 +00:00
Upgrade neon version + tests in JS instead of TS.
This commit is contained in:
committed by
Anthony MOI
parent
81bb4f6da3
commit
6f8892e3ae
@@ -1,7 +1,5 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
/*eslint-disable no-undef*/
|
||||||
/* eslint-disable @typescript-eslint/no-empty-function */
|
const tokenizers = require("tokenizers");
|
||||||
|
|
||||||
const tokenizers = await import("tokenizers");
|
|
||||||
|
|
||||||
describe("loadExample", () => {
|
describe("loadExample", () => {
|
||||||
it("", () => {
|
it("", () => {
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
/*eslint-disable no-undef*/
|
||||||
/* eslint-disable @typescript-eslint/no-empty-function */
|
|
||||||
|
|
||||||
const {
|
const {
|
||||||
Tokenizer,
|
Tokenizer,
|
||||||
@@ -10,11 +9,11 @@ const {
|
|||||||
decoders,
|
decoders,
|
||||||
trainers,
|
trainers,
|
||||||
AddedToken,
|
AddedToken,
|
||||||
} = await import("tokenizers");
|
} = require("tokenizers");
|
||||||
|
|
||||||
describe("trainExample", () => {
|
describe("trainExample", () => {
|
||||||
it("", () => {
|
it("", () => {
|
||||||
const vocab_size = 100;
|
const vocabSize = 100;
|
||||||
|
|
||||||
const tokenizer = new Tokenizer(models.BPE.empty());
|
const tokenizer = new Tokenizer(models.BPE.empty());
|
||||||
tokenizer.normalizer = normalizers.sequenceNormalizer([
|
tokenizer.normalizer = normalizers.sequenceNormalizer([
|
||||||
@@ -26,16 +25,16 @@ describe("trainExample", () => {
|
|||||||
tokenizer.decoder = decoders.byteLevelDecoder();
|
tokenizer.decoder = decoders.byteLevelDecoder();
|
||||||
|
|
||||||
const trainer = trainers.bpeTrainer({
|
const trainer = trainers.bpeTrainer({
|
||||||
vocab_size,
|
vocabSize,
|
||||||
min_frequency: 0,
|
minFrequency: 0,
|
||||||
special_tokens: [
|
specialTokens: [
|
||||||
new AddedToken("<s>", true),
|
new AddedToken("<s>", true),
|
||||||
new AddedToken("<pad>", true),
|
new AddedToken("<pad>", true),
|
||||||
new AddedToken("</s>", true),
|
new AddedToken("</s>", true),
|
||||||
new AddedToken("<unk>", true),
|
new AddedToken("<unk>", true),
|
||||||
new AddedToken("<mask>", true),
|
new AddedToken("<mask>", true),
|
||||||
],
|
],
|
||||||
show_progress: true,
|
showProgress: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
tokenizer.train(trainer, ["data/small.txt"]);
|
tokenizer.train(trainer, ["data/small.txt"]);
|
||||||
4114
bindings/node/package-lock.json
generated
4114
bindings/node/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,59 +1,59 @@
|
|||||||
{
|
{
|
||||||
"name": "tokenizers",
|
"name": "tokenizers",
|
||||||
"version": "0.7.0",
|
"version": "0.7.0",
|
||||||
"description": "",
|
"description": "",
|
||||||
"main": "./dist/index.js",
|
"main": "./dist/index.js",
|
||||||
"types": "./dist/index.d.ts",
|
"types": "./dist/index.d.ts",
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "git+https://github.com/huggingface/tokenizers.git"
|
"url": "git+https://github.com/huggingface/tokenizers.git"
|
||||||
},
|
},
|
||||||
"bugs": {
|
"bugs": {
|
||||||
"url": "https://github.com/huggingface/tokenizers/issues"
|
"url": "https://github.com/huggingface/tokenizers/issues"
|
||||||
},
|
},
|
||||||
"homepage": "https://github.com/huggingface/tokenizers/tree/master/bindings/node",
|
"homepage": "https://github.com/huggingface/tokenizers/tree/master/bindings/node",
|
||||||
"author": "Anthony MOI <m.anthony.moi@gmail.com>",
|
"author": "Anthony MOI <m.anthony.moi@gmail.com>",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@types/node": "^13.1.6",
|
"@types/node": "^13.13.21",
|
||||||
"node-pre-gyp": "^0.14.0"
|
"node-pre-gyp": "^0.14.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/jest": "^26.0.7",
|
"@types/jest": "^26.0.14",
|
||||||
"@typescript-eslint/eslint-plugin": "^3.7.0",
|
"@typescript-eslint/eslint-plugin": "^3.10.1",
|
||||||
"@typescript-eslint/parser": "^3.7.0",
|
"@typescript-eslint/parser": "^3.10.1",
|
||||||
"eslint": "^7.5.0",
|
"eslint": "^7.10.0",
|
||||||
"eslint-config-prettier": "^6.11.0",
|
"eslint-config-prettier": "^6.12.0",
|
||||||
"eslint-plugin-jest": "^23.18.0",
|
"eslint-plugin-jest": "^23.20.0",
|
||||||
"eslint-plugin-jsdoc": "^30.0.3",
|
"eslint-plugin-jsdoc": "^30.6.1",
|
||||||
"eslint-plugin-prettier": "^3.1.4",
|
"eslint-plugin-prettier": "^3.1.4",
|
||||||
"eslint-plugin-simple-import-sort": "^5.0.3",
|
"eslint-plugin-simple-import-sort": "^5.0.3",
|
||||||
"jest": "^26.1.0",
|
"jest": "^26.4.2",
|
||||||
"neon-cli": "^0.3.3",
|
"neon-cli": "^0.4.2",
|
||||||
"prettier": "^2.0.5",
|
"prettier": "^2.1.2",
|
||||||
"shelljs": "^0.8.3",
|
"shelljs": "^0.8.3",
|
||||||
"ts-jest": "^26.1.3",
|
"ts-jest": "^26.4.0",
|
||||||
"typescript": "^3.9.7"
|
"typescript": "^3.9.7"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=10 < 11 || >=12 <14"
|
"node": ">=10 < 11 || >=12 <14"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev-ts": "rm -rf dist && tsc -p tsconfig.lib.json && rsync -a $(pwd)/lib/bindings/ dist/bindings/",
|
"dev-ts": "rm -rf dist && tsc -p tsconfig.lib.json && rsync -a $(pwd)/lib/bindings/ dist/bindings/",
|
||||||
"dev-rs": "neon build",
|
"dev-rs": "neon build",
|
||||||
"dev": "npm run dev-rs && npm run dev-ts",
|
"dev": "npm run dev-rs && npm run dev-ts",
|
||||||
"compile": "neon build --release",
|
"compile": "neon build --release",
|
||||||
"clean-rs": "neon clean",
|
"clean-rs": "neon clean",
|
||||||
"package": "node-pre-gyp package",
|
"package": "node-pre-gyp package",
|
||||||
"test": "jest && cargo test --manifest-path native/Cargo.toml",
|
"test": "jest && cargo test --manifest-path native/Cargo.toml",
|
||||||
"lint-check": "eslint --ext .js,.ts lib examples",
|
"lint-check": "eslint --ext .js,.ts lib examples",
|
||||||
"lint": "eslint --fix --ext .js,.ts lib examples"
|
"lint": "eslint --fix --ext .js,.ts lib examples"
|
||||||
},
|
},
|
||||||
"binary": {
|
"binary": {
|
||||||
"module_name": "index",
|
"module_name": "index",
|
||||||
"module_path": "./bin-package",
|
"module_path": "./bin-package",
|
||||||
"host": "https://tokenizers-releases.s3.amazonaws.com/node",
|
"host": "https://tokenizers-releases.s3.amazonaws.com/node",
|
||||||
"package_name": "{module_name}-v{version}-{node_abi}-{platform}-{arch}-{libc}.tar.gz",
|
"package_name": "{module_name}-v{version}-{node_abi}-{platform}-{arch}-{libc}.tar.gz",
|
||||||
"remote_path": "{version}"
|
"remote_path": "{version}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user