diff --git a/bindings/python/CHANGELOG.md b/bindings/python/CHANGELOG.md index d22dab62..e58906cc 100644 --- a/bindings/python/CHANGELOG.md +++ b/bindings/python/CHANGELOG.md @@ -4,15 +4,26 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.9.3] + +### Fixed +- [#470]: Fix hanging error when training with custom component +- [#476]: TemplateProcessing serialization is now deterministic +- [#481]: Fix SentencePieceBPETokenizer.from_files + +### Added +- [#477]: UnicodeScripts PreTokenizer to avoid merges between various scripts +- [#480]: Unigram now accepts an `initial_alphabet` and handles `special_tokens` correctly + ## [0.9.2] ### Fixed -- [#464] Fix a problem with RobertaProcessing being deserialized as BertProcessing +- [#464]: Fix a problem with RobertaProcessing being deserialized as BertProcessing ## [0.9.1] ### Fixed -- [#459] Fix a problem with deserialization +- [#459]: Fix a problem with deserialization ## [0.9.0] @@ -248,6 +259,11 @@ delimiter (Works like `.split(delimiter)`) - Fix a bug with the IDs associated with added tokens. - Fix a bug that was causing crashes in Python 3.5 +[#481]: https://github.com/huggingface/tokenizers/pull/481 +[#480]: https://github.com/huggingface/tokenizers/pull/480 +[#477]: https://github.com/huggingface/tokenizers/pull/477 +[#476]: https://github.com/huggingface/tokenizers/pull/476 +[#470]: https://github.com/huggingface/tokenizers/pull/470 [#464]: https://github.com/huggingface/tokenizers/pull/464 [#459]: https://github.com/huggingface/tokenizers/pull/459 [#420]: https://github.com/huggingface/tokenizers/pull/420 diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index e09b8d97..6be62b09 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -893,7 +893,7 @@ dependencies = [ [[package]] name = "tokenizers-python" -version = "0.9.2" +version = "0.9.3" dependencies = [ "env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 5acae788..f3a1a1ef 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tokenizers-python" -version = "0.9.2" +version = "0.9.3" authors = ["Anthony MOI "] edition = "2018" diff --git a/bindings/python/py_src/tokenizers/__init__.py b/bindings/python/py_src/tokenizers/__init__.py index 6f972a6b..8b62e769 100644 --- a/bindings/python/py_src/tokenizers/__init__.py +++ b/bindings/python/py_src/tokenizers/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.9.2" +__version__ = "0.9.3" from typing import Tuple, Union, Tuple, List from enum import Enum diff --git a/bindings/python/setup.py b/bindings/python/setup.py index d424412e..5713bb6e 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -6,7 +6,7 @@ extras["testing"] = ["pytest"] setup( name="tokenizers", - version="0.9.2", + version="0.9.3", description="Fast and Customizable Tokenizers", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown",