node: version 0.4.0

This commit is contained in:
Pierric Cistac
2020-02-05 17:38:59 -05:00
parent cb8585bc4e
commit 3df188dc27
5 changed files with 29 additions and 7 deletions

View File

@ -0,0 +1,21 @@
# [0.4.0](https://github.com/huggingface/tokenizers/compare/node-v0.3.0...node-v0.3.1) (2020-02-05)
### BREAKING CHANGES
- `getOverflowing()` method on `Encoding` now returns all the overflowing `Encoding`s at once ([#77](https://github.com/huggingface/tokenizers/pull/77)) ([0094393](https://github.com/huggingface/tokenizers/commit/0094393610623bafc269790cd1be81fd1474583a))
### Features
- Add `setTruncation`, `disableTruncation`, `setPadding` and `disablePadding` methods in `Tokenizer` and `BaseTokenizer` ([#109](https://github.com/huggingface/tokenizers/pull/109)) ([78e2690](https://github.com/huggingface/tokenizers/commit/78e26905a735e14e67590cb09ddb42ed141c455b))
- Expose tokenizer / truncation / padding configuration in `BaseTokenizer` ([#126](https://github.com/huggingface/tokenizers/pull/126)) ([cb8585b](https://github.com/huggingface/tokenizers/commit/cb8585bc4eb8037c52049da677e4791857231f03))
- Expose `addTokens`, `addSpecialTokens`, `idToToken` and `tokenToId` in `BaseTokenizer` ([7051480](https://github.com/huggingface/tokenizers/commit/7051480c333f88bef80aa6846b66032a2d47383c))
- Add `getOriginalString()` method on `Encoding` ([a14c633](https://github.com/huggingface/tokenizers/commit/a14c63343b217a2c501359bec52baf717e3a05ef))
- Add `charDelimiterSplitPreTokenizer`: a new `PreTokenizer` that allows splitting sequences on the given delimiter (works like `.split(delimiter)`) ([#114](https://github.com/huggingface/tokenizers/pull/114)) ([6165910](https://github.com/huggingface/tokenizers/commit/6165910ca66b6bfd9fd996aa38c4c0b2b6505953))
- Add `robertaProcessing` as a new `PostProcessor` ([#111](https://github.com/huggingface/tokenizers/pull/111)) ([6524f09](https://github.com/huggingface/tokenizers/commit/6524f09e991c3a52c839d8eb01bfa41e81fde1d1))
### Bug Fixes
- Correctly truncate with `OnlyFirst` and `OnlySecond` strategies ([#108](https://github.com/huggingface/tokenizers/issues/108)) ([6d532fe](https://github.com/huggingface/tokenizers/commit/6d532fedb1d3626328828304a5c39807733d2fa1))
- Fix default special tokens in `BertWordPieceTokenizer` ([10e2d28](https://github.com/huggingface/tokenizers/commit/10e2d286caf517f0977c04cf8e1924aed90403c9))
- Fix return type of `getSpecialTokensMask` on `Encoding` ([9770be5](https://github.com/huggingface/tokenizers/commit/9770be566175dc9c44dd7dcaa00a57d0e4ca632b))
- Actually add special tokens in tokenizers implementations ([acef252](https://github.com/huggingface/tokenizers/commit/acef252dacc43adc414175cfc325668ad1488753))

View File

@ -48,6 +48,7 @@ console.log(wpEncoded.getIds());
console.log(wpEncoded.getAttentionMask());
console.log(wpEncoded.getOffsets());
console.log(wpEncoded.getOverflowing());
console.log(wpEncoded.getOriginalString());
console.log(wpEncoded.getSpecialTokensMask());
console.log(wpEncoded.getTypeIds());
```

View File

@ -170,7 +170,7 @@ dependencies = [
[[package]]
name = "indicatif"
version = "0.13.0"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"console 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -251,7 +251,7 @@ version = "0.1.0"
dependencies = [
"neon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"neon-build 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"tokenizers 0.6.1",
"tokenizers 0.7.0",
]
[[package]]
@ -430,10 +430,10 @@ dependencies = [
[[package]]
name = "tokenizers"
version = "0.6.1"
version = "0.7.0"
dependencies = [
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"indicatif 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -513,7 +513,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum encode_unicode 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772"
"checksum indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8572bccfb0665e70b7faf44ee28841b8e0823450cd4ad562a76b5a3c4bf48487"
"checksum indicatif 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a68371cf417889c9d7f98235b7102ea7c54fc59bcbd22f3dea785be9d27e40"
"checksum itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
"checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558"

View File

@ -1,6 +1,6 @@
{
"name": "tokenizers",
"version": "0.3.1",
"version": "0.4.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "tokenizers",
"version": "0.3.1",
"version": "0.4.0",
"description": "",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",