Files
tokenizers/bindings/node/package.json
Arthur 09069717e9 Refactor metaspace (#1476)
* version = "0.15.3-dev-0”

Improve performances of meta space, but also just fix it.

(transformers) ➜  transformers git:(refactor-default-llama) ✗ python ../scripts/gemma-dummy.py
Token indices sequence length is longer than the specified maximum sequence length for this model (14999 > 2048). Running this sequence through the model will result in indexing errors
['<REPR_END>', '▁inform', '<s>', '.', '▁Hey', '<unk>', '.', '▁', '▁', '▁', '▁', '▁', '▁', '▁.']
['▁inform', '<s>', '.', '▁Hey', '<unk>', '.', '▁', '▁', '▁', '▁', '▁', '▁', '▁.']
[0.0006330013275146484, 0.0014591217041015625, 0.015890836715698242, 0.18584918975830078, 2.1726326942443848]
(transformers) ➜  transformers git:(refactor-default-llama) ✗ python ../scripts/gemma-dummy.py
Token indices sequence length is longer than the specified maximum sequence length for this model (10000 > 2048). Running this sequence through the model will result in indexing errors
['<REPR_END>', 'in', 'form', '<s>', '.', '▁Hey', '<unk>', '.', '▁▁▁▁▁▁', '▁.']
['in', 'form', '<s>', '.', '▁Hey', '<unk>', '.', '▁▁▁▁▁▁', '▁.']
[0.0008409023284912109, 0.0008909702301025391, 0.00882411003112793, 0.10214710235595703, 1.187899112701416]

* well what do we have

* nit

* be BC with non legacy

* unrelated change for clippy

* fix test

* splitting is a must for word_ids

* fmt and lint

* Fixing everything (hopefully better).

* Fixing node.

* Including yarn.lock

* Lint.

* Stubs.

* revert to use split

* fix merge issues

* fix tests

* finish fixing tests

* ruff

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
2024-03-30 10:27:24 +01:00

114 lines
3.0 KiB
JSON

{
"name": "tokenizers",
"version": "0.15.3-dev0",
"repository": {
"type": "git",
"url": "git+https://github.com/huggingface/tokenizers.git"
},
"bugs": {
"url": "https://github.com/huggingface/tokenizers/issues"
},
"homepage": "https://github.com/huggingface/tokenizers/tree/master/bindings/node",
"author": "Anthony MOI <m.anthony.moi@gmail.com>",
"license": "Apache-2.0",
"description": "Provides an implementation of today's most used tokenizers, with a focus on performances and versatility.",
"files": [
"index.d.ts",
"index.js"
],
"napi": {
"name": "tokenizers",
"triples": {
"defaults": true,
"additional": [
"x86_64-unknown-linux-musl",
"aarch64-unknown-linux-gnu",
"i686-pc-windows-msvc",
"armv7-unknown-linux-gnueabihf",
"aarch64-apple-darwin",
"aarch64-linux-android",
"x86_64-unknown-freebsd",
"aarch64-unknown-linux-musl",
"aarch64-pc-windows-msvc",
"armv7-linux-androideabi"
]
}
},
"engines": {
"node": ">= 10"
},
"publishConfig": {
"registry": "https://registry.npmjs.org/",
"access": "public"
},
"scripts": {
"artifacts": "napi artifacts",
"bench": "node -r @swc-node/register benchmark/bench.ts",
"build": "napi build --platform --release --pipe \"prettier -w\"",
"build:debug": "napi build --platform --pipe \"prettier -w\"",
"format": "run-p format:prettier format:rs format:toml",
"format:prettier": "prettier . -w",
"format:toml": "taplo format",
"format:rs": "cargo fmt",
"lint": "eslint . -c ./.eslintrc.yml",
"prepublishOnly": "napi prepublish -t npm",
"test": "jest",
"version": "napi version"
},
"devDependencies": {
"@napi-rs/cli": "^2.14.6",
"@swc-node/register": "^1.5.5",
"@swc/core": "^1.3.32",
"@taplo/cli": "^0.5.2",
"@types/jest": "^29.5.1",
"@typescript-eslint/eslint-plugin": "^5.50.0",
"@typescript-eslint/parser": "^5.50.0",
"ava": "^5.1.1",
"benny": "^3.7.1",
"chalk": "^5.2.0",
"eslint": "^8.33.0",
"eslint-config-prettier": "^8.6.0",
"eslint-plugin-import": "^2.27.5",
"eslint-plugin-prettier": "^4.2.1",
"husky": "^8.0.3",
"jest": "^29.5.0",
"lint-staged": "^13.1.0",
"npm-run-all": "^4.1.5",
"prettier": "^2.8.3",
"ts-jest": "^29.1.0",
"typescript": "^5.0.0"
},
"lint-staged": {
"*.@(js|ts|tsx)": [
"eslint -c .eslintrc.yml --fix"
],
"*.@(js|ts|tsx|yml|yaml|md|json)": [
"prettier --write"
],
"*.toml": [
"taplo format"
]
},
"ava": {
"require": [
"@swc-node/register"
],
"extensions": [
"ts"
],
"timeout": "2m",
"workerThreads": false,
"environmentVariables": {
"TS_NODE_PROJECT": "./tsconfig.json"
}
},
"prettier": {
"printWidth": 120,
"semi": false,
"trailingComma": "all",
"singleQuote": true,
"arrowParens": "always"
},
"packageManager": "yarn@3.5.1"
}