mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
* version = "0.15.3-dev-0” Improve performances of meta space, but also just fix it. (transformers) ➜ transformers git:(refactor-default-llama) ✗ python ../scripts/gemma-dummy.py Token indices sequence length is longer than the specified maximum sequence length for this model (14999 > 2048). Running this sequence through the model will result in indexing errors ['<REPR_END>', '▁inform', '<s>', '.', '▁Hey', '<unk>', '.', '▁', '▁', '▁', '▁', '▁', '▁', '▁.'] ['▁inform', '<s>', '.', '▁Hey', '<unk>', '.', '▁', '▁', '▁', '▁', '▁', '▁', '▁.'] [0.0006330013275146484, 0.0014591217041015625, 0.015890836715698242, 0.18584918975830078, 2.1726326942443848] (transformers) ➜ transformers git:(refactor-default-llama) ✗ python ../scripts/gemma-dummy.py Token indices sequence length is longer than the specified maximum sequence length for this model (10000 > 2048). Running this sequence through the model will result in indexing errors ['<REPR_END>', 'in', 'form', '<s>', '.', '▁Hey', '<unk>', '.', '▁▁▁▁▁▁', '▁.'] ['in', 'form', '<s>', '.', '▁Hey', '<unk>', '.', '▁▁▁▁▁▁', '▁.'] [0.0008409023284912109, 0.0008909702301025391, 0.00882411003112793, 0.10214710235595703, 1.187899112701416] * well what do we have * nit * be BC with non legacy * unrelated change for clippy * fix test * splitting is a must for word_ids * fmt and lint * Fixing everything (hopefully better). * Fixing node. * Including yarn.lock * Lint. * Stubs. * revert to use split * fix merge issues * fix tests * finish fixing tests * ruff --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
114 lines
3.0 KiB
JSON
114 lines
3.0 KiB
JSON
{
|
|
"name": "tokenizers",
|
|
"version": "0.15.3-dev0",
|
|
"repository": {
|
|
"type": "git",
|
|
"url": "git+https://github.com/huggingface/tokenizers.git"
|
|
},
|
|
"bugs": {
|
|
"url": "https://github.com/huggingface/tokenizers/issues"
|
|
},
|
|
"homepage": "https://github.com/huggingface/tokenizers/tree/master/bindings/node",
|
|
"author": "Anthony MOI <m.anthony.moi@gmail.com>",
|
|
"license": "Apache-2.0",
|
|
"description": "Provides an implementation of today's most used tokenizers, with a focus on performances and versatility.",
|
|
"files": [
|
|
"index.d.ts",
|
|
"index.js"
|
|
],
|
|
"napi": {
|
|
"name": "tokenizers",
|
|
"triples": {
|
|
"defaults": true,
|
|
"additional": [
|
|
"x86_64-unknown-linux-musl",
|
|
"aarch64-unknown-linux-gnu",
|
|
"i686-pc-windows-msvc",
|
|
"armv7-unknown-linux-gnueabihf",
|
|
"aarch64-apple-darwin",
|
|
"aarch64-linux-android",
|
|
"x86_64-unknown-freebsd",
|
|
"aarch64-unknown-linux-musl",
|
|
"aarch64-pc-windows-msvc",
|
|
"armv7-linux-androideabi"
|
|
]
|
|
}
|
|
},
|
|
"engines": {
|
|
"node": ">= 10"
|
|
},
|
|
"publishConfig": {
|
|
"registry": "https://registry.npmjs.org/",
|
|
"access": "public"
|
|
},
|
|
"scripts": {
|
|
"artifacts": "napi artifacts",
|
|
"bench": "node -r @swc-node/register benchmark/bench.ts",
|
|
"build": "napi build --platform --release --pipe \"prettier -w\"",
|
|
"build:debug": "napi build --platform --pipe \"prettier -w\"",
|
|
"format": "run-p format:prettier format:rs format:toml",
|
|
"format:prettier": "prettier . -w",
|
|
"format:toml": "taplo format",
|
|
"format:rs": "cargo fmt",
|
|
"lint": "eslint . -c ./.eslintrc.yml",
|
|
"prepublishOnly": "napi prepublish -t npm",
|
|
"test": "jest",
|
|
"version": "napi version"
|
|
},
|
|
"devDependencies": {
|
|
"@napi-rs/cli": "^2.14.6",
|
|
"@swc-node/register": "^1.5.5",
|
|
"@swc/core": "^1.3.32",
|
|
"@taplo/cli": "^0.5.2",
|
|
"@types/jest": "^29.5.1",
|
|
"@typescript-eslint/eslint-plugin": "^5.50.0",
|
|
"@typescript-eslint/parser": "^5.50.0",
|
|
"ava": "^5.1.1",
|
|
"benny": "^3.7.1",
|
|
"chalk": "^5.2.0",
|
|
"eslint": "^8.33.0",
|
|
"eslint-config-prettier": "^8.6.0",
|
|
"eslint-plugin-import": "^2.27.5",
|
|
"eslint-plugin-prettier": "^4.2.1",
|
|
"husky": "^8.0.3",
|
|
"jest": "^29.5.0",
|
|
"lint-staged": "^13.1.0",
|
|
"npm-run-all": "^4.1.5",
|
|
"prettier": "^2.8.3",
|
|
"ts-jest": "^29.1.0",
|
|
"typescript": "^5.0.0"
|
|
},
|
|
"lint-staged": {
|
|
"*.@(js|ts|tsx)": [
|
|
"eslint -c .eslintrc.yml --fix"
|
|
],
|
|
"*.@(js|ts|tsx|yml|yaml|md|json)": [
|
|
"prettier --write"
|
|
],
|
|
"*.toml": [
|
|
"taplo format"
|
|
]
|
|
},
|
|
"ava": {
|
|
"require": [
|
|
"@swc-node/register"
|
|
],
|
|
"extensions": [
|
|
"ts"
|
|
],
|
|
"timeout": "2m",
|
|
"workerThreads": false,
|
|
"environmentVariables": {
|
|
"TS_NODE_PROJECT": "./tsconfig.json"
|
|
}
|
|
},
|
|
"prettier": {
|
|
"printWidth": 120,
|
|
"semi": false,
|
|
"trailingComma": "all",
|
|
"singleQuote": true,
|
|
"arrowParens": "always"
|
|
},
|
|
"packageManager": "yarn@3.5.1"
|
|
}
|