diff --git a/bindings/python/CHANGELOG.md b/bindings/python/CHANGELOG.md index 6117c465..e210c732 100644 --- a/bindings/python/CHANGELOG.md +++ b/bindings/python/CHANGELOG.md @@ -4,19 +4,24 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.9.0-dev4] +## [0.9.0-rc1] ### Fixed - [#362]: Fix training deadlock with Python components. - [#363]: Fix a crash when calling `.train` with some non-existent files - [#355]: Remove a lot of possible crashes +- [#389]: Improve truncation (crash and consistency) ### Added - [#379]: Add the ability to call `encode`/`encode_batch` with numpy arrays - [#292]: Support for the Unigram algorithm +- [#378], [#394], [#416], [#417]: Many new Normalizer and PreTokenizer - [#403]: Add `TemplateProcessing` `PostProcessor`. +- [#420]: Ability to fuse the "unk" token in BPE. ### Changed +- [#360]: Lots of improvements related to words/alignment tracking +- [#426]: Improvements on error messages thanks to PyO3 0.12 ## [0.8.1] @@ -233,10 +238,17 @@ delimiter (Works like `.split(delimiter)`) - Fix a bug with the IDs associated with added tokens. - Fix a bug that was causing crashes in Python 3.5 +[#420]: https://github.com/huggingface/tokenizers/pull/420 +[#417]: https://github.com/huggingface/tokenizers/pull/417 +[#416]: https://github.com/huggingface/tokenizers/pull/416 [#403]: https://github.com/huggingface/tokenizers/pull/403 +[#394]: https://github.com/huggingface/tokenizers/pull/394 +[#389]: https://github.com/huggingface/tokenizers/pull/389 [#379]: https://github.com/huggingface/tokenizers/pull/379 +[#378]: https://github.com/huggingface/tokenizers/pull/378 [#363]: https://github.com/huggingface/tokenizers/pull/363 [#362]: https://github.com/huggingface/tokenizers/pull/362 +[#360]: https://github.com/huggingface/tokenizers/pull/360 [#355]: https://github.com/huggingface/tokenizers/pull/355 [#333]: https://github.com/huggingface/tokenizers/pull/333 [#330]: https://github.com/huggingface/tokenizers/pull/330 diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 175a2346..61fb0f11 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -890,7 +890,7 @@ dependencies = [ [[package]] name = "tokenizers-python" -version = "0.9.0-dev4" +version = "0.9.0-rc1" dependencies = [ "env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.77 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 86b26bbb..fe95accd 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tokenizers-python" -version = "0.9.0-dev4" +version = "0.9.0-rc1" authors = ["Anthony MOI "] edition = "2018" diff --git a/bindings/python/py_src/tokenizers/__init__.py b/bindings/python/py_src/tokenizers/__init__.py index 5776263d..0c770458 100644 --- a/bindings/python/py_src/tokenizers/__init__.py +++ b/bindings/python/py_src/tokenizers/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.9.0.dev4" +__version__ = "0.9.0.rc1" from typing import Tuple, Union, Tuple, List from enum import Enum diff --git a/bindings/python/setup.py b/bindings/python/setup.py index c852d7d8..a8df1132 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -6,7 +6,7 @@ extras["testing"] = ["pytest"] setup( name="tokenizers", - version="0.9.0.dev4", + version="0.9.0.rc1", description="Fast and Customizable Tokenizers", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown",