mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Preparing for 0.12.1 (#978)
* Preparing for 0.12.1 * Updated the changelog.
This commit is contained in:
@ -1,9 +1,13 @@
|
|||||||
## [0.12.0]
|
## [0.12.1]
|
||||||
|
|
||||||
|
- [#938] **Reverted breaking change**. https://github.com/huggingface/transformers/issues/16520
|
||||||
|
|
||||||
|
## [0.12.0] YANKED
|
||||||
|
|
||||||
Bump minor version because of a breaking change.
|
Bump minor version because of a breaking change.
|
||||||
Using `0.12` to match other bindings.
|
Using `0.12` to match other bindings.
|
||||||
|
|
||||||
- [#938] **Breaking change**. Decoder trait is modified to be composable. This is only breaking if you are using decoders on their own. tokenizers should be error free.
|
- [#938] [REVERTED IN 0.12.1] **Breaking change**. Decoder trait is modified to be composable. This is only breaking if you are using decoders on their own. tokenizers should be error free.
|
||||||
- [#939] Making the regex in `ByteLevel` pre_tokenizer optional (necessary for BigScience)
|
- [#939] Making the regex in `ByteLevel` pre_tokenizer optional (necessary for BigScience)
|
||||||
|
|
||||||
- [#952] Fixed the vocabulary size of UnigramTrainer output (to respect added tokens)
|
- [#952] Fixed the vocabulary size of UnigramTrainer output (to respect added tokens)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "tokenizers",
|
"name": "tokenizers",
|
||||||
"version": "0.12.0",
|
"version": "0.12.1",
|
||||||
"description": "",
|
"description": "",
|
||||||
"main": "./dist/index.js",
|
"main": "./dist/index.js",
|
||||||
"types": "./dist/index.d.ts",
|
"types": "./dist/index.d.ts",
|
||||||
|
@ -4,11 +4,15 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## [0.12.0]
|
## [0.12.1]
|
||||||
|
|
||||||
|
- [#938] **Reverted breaking change**. https://github.com/huggingface/transformers/issues/16520
|
||||||
|
|
||||||
|
## [0.12.0] YANKED
|
||||||
|
|
||||||
Bump minor version because of a breaking change.
|
Bump minor version because of a breaking change.
|
||||||
|
|
||||||
- [#938] **Breaking change**. Decoder trait is modified to be composable. This is only breaking if you are using decoders on their own. tokenizers should be error free.
|
- [#938] [REVERTED IN 0.12.1] **Breaking change**. Decoder trait is modified to be composable. This is only breaking if you are using decoders on their own. tokenizers should be error free.
|
||||||
- [#939] Making the regex in `ByteLevel` pre_tokenizer optional (necessary for BigScience)
|
- [#939] Making the regex in `ByteLevel` pre_tokenizer optional (necessary for BigScience)
|
||||||
|
|
||||||
- [#952] Fixed the vocabulary size of UnigramTrainer output (to respect added tokens)
|
- [#952] Fixed the vocabulary size of UnigramTrainer output (to respect added tokens)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__version__ = "0.12.0"
|
__version__ = "0.12.1"
|
||||||
|
|
||||||
from typing import Tuple, Union, Tuple, List
|
from typing import Tuple, Union, Tuple, List
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
@ -7,7 +7,7 @@ extras["docs"] = ["sphinx", "sphinx_rtd_theme", "setuptools_rust"]
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="tokenizers",
|
name="tokenizers",
|
||||||
version="0.12.0",
|
version="0.12.1",
|
||||||
description="Fast and Customizable Tokenizers",
|
description="Fast and Customizable Tokenizers",
|
||||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
@ -4,11 +4,15 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## [0.12.0]
|
## [0.12.1]
|
||||||
|
|
||||||
|
- [#938] **Reverted breaking change**. https://github.com/huggingface/transformers/issues/16520
|
||||||
|
|
||||||
|
## [0.12.0] YANKED
|
||||||
|
|
||||||
Bump minor version because of a breaking change.
|
Bump minor version because of a breaking change.
|
||||||
|
|
||||||
- [#938] **Breaking change**. Decoder trait is modified to be composable. This is only breaking if you are using decoders on their own. tokenizers should be error free.
|
- [#938] [REVERTED IN 0.12.1] **Breaking change**. Decoder trait is modified to be composable. This is only breaking if you are using decoders on their own. tokenizers should be error free.
|
||||||
- [#939] Making the regex in `ByteLevel` pre_tokenizer optional (necessary for BigScience)
|
- [#939] Making the regex in `ByteLevel` pre_tokenizer optional (necessary for BigScience)
|
||||||
|
|
||||||
- [#952] Fixed the vocabulary size of UnigramTrainer output (to respect added tokens)
|
- [#952] Fixed the vocabulary size of UnigramTrainer output (to respect added tokens)
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.12.0"
|
version = "0.12.1"
|
||||||
homepage = "https://github.com/huggingface/tokenizers"
|
homepage = "https://github.com/huggingface/tokenizers"
|
||||||
repository = "https://github.com/huggingface/tokenizers"
|
repository = "https://github.com/huggingface/tokenizers"
|
||||||
documentation = "https://docs.rs/tokenizers/"
|
documentation = "https://docs.rs/tokenizers/"
|
||||||
|
Reference in New Issue
Block a user