From 719bea76b92a34ee03dc024315922c50a3a9ea40 Mon Sep 17 00:00:00 2001 From: Anthony MOI Date: Tue, 12 Jan 2021 16:18:53 -0500 Subject: [PATCH] Python - Prepare for release 0.10.0 --- bindings/python/CHANGELOG.md | 6 +++++- bindings/python/Cargo.lock | 2 +- bindings/python/Cargo.toml | 2 +- bindings/python/py_src/tokenizers/__init__.py | 2 +- bindings/python/setup.py | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/bindings/python/CHANGELOG.md b/bindings/python/CHANGELOG.md index e066b2fa..7ad22788 100644 --- a/bindings/python/CHANGELOG.md +++ b/bindings/python/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.10.0rc1] +## [0.10.0] ### Added - [#508]: Add a Visualizer for notebooks to help understand how the tokenizers work @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#533]: Add support for conda builds - [#542]: Add Split pre-tokenizer to easily split using a pattern - [#544]: Ability to train from memory. This also improves the integration with `datasets` +- [#590]: Add getters/setters for components on BaseTokenizer +- [#574]: Add `fust_unk` option to SentencePieceBPETokenizer ### Changed - [#509]: Automatically stubbing the `.pyi` files @@ -291,6 +293,8 @@ delimiter (Works like `.split(delimiter)`) - Fix a bug that was causing crashes in Python 3.5 +[#590]: https://github.com/huggingface/tokenizers/pull/590 +[#574]: https://github.com/huggingface/tokenizers/pull/574 [#544]: https://github.com/huggingface/tokenizers/pull/544 [#542]: https://github.com/huggingface/tokenizers/pull/542 [#539]: https://github.com/huggingface/tokenizers/pull/539 diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index b8537d6d..742d81e7 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -993,7 +993,7 @@ dependencies = [ [[package]] name = "tokenizers-python" -version = "0.10.0-rc1" +version = "0.10.0" dependencies = [ "env_logger", "itertools 0.9.0", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 09deaee6..f1ebf82f 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tokenizers-python" -version = "0.10.0-rc1" +version = "0.10.0" authors = ["Anthony MOI "] edition = "2018" diff --git a/bindings/python/py_src/tokenizers/__init__.py b/bindings/python/py_src/tokenizers/__init__.py index d2592ff5..aeb1215b 100644 --- a/bindings/python/py_src/tokenizers/__init__.py +++ b/bindings/python/py_src/tokenizers/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.10.0rc1" +__version__ = "0.10.0" from typing import Tuple, Union, Tuple, List from enum import Enum diff --git a/bindings/python/setup.py b/bindings/python/setup.py index 2ea3c405..e5dc17ed 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -6,7 +6,7 @@ extras["testing"] = ["pytest"] setup( name="tokenizers", - version="0.10.0rc1", + version="0.10.0", description="Fast and Customizable Tokenizers", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown",