mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Python - Update CHANGELOG and bump to 0.9.3 for release
This commit is contained in:
@ -4,15 +4,26 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.9.3]
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- [#470]: Fix hanging error when training with custom component
|
||||||
|
- [#476]: TemplateProcessing serialization is now deterministic
|
||||||
|
- [#481]: Fix SentencePieceBPETokenizer.from_files
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- [#477]: UnicodeScripts PreTokenizer to avoid merges between various scripts
|
||||||
|
- [#480]: Unigram now accepts an `initial_alphabet` and handles `special_tokens` correctly
|
||||||
|
|
||||||
## [0.9.2]
|
## [0.9.2]
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- [#464] Fix a problem with RobertaProcessing being deserialized as BertProcessing
|
- [#464]: Fix a problem with RobertaProcessing being deserialized as BertProcessing
|
||||||
|
|
||||||
## [0.9.1]
|
## [0.9.1]
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- [#459] Fix a problem with deserialization
|
- [#459]: Fix a problem with deserialization
|
||||||
|
|
||||||
## [0.9.0]
|
## [0.9.0]
|
||||||
|
|
||||||
@ -248,6 +259,11 @@ delimiter (Works like `.split(delimiter)`)
|
|||||||
- Fix a bug with the IDs associated with added tokens.
|
- Fix a bug with the IDs associated with added tokens.
|
||||||
- Fix a bug that was causing crashes in Python 3.5
|
- Fix a bug that was causing crashes in Python 3.5
|
||||||
|
|
||||||
|
[#481]: https://github.com/huggingface/tokenizers/pull/481
|
||||||
|
[#480]: https://github.com/huggingface/tokenizers/pull/480
|
||||||
|
[#477]: https://github.com/huggingface/tokenizers/pull/477
|
||||||
|
[#476]: https://github.com/huggingface/tokenizers/pull/476
|
||||||
|
[#470]: https://github.com/huggingface/tokenizers/pull/470
|
||||||
[#464]: https://github.com/huggingface/tokenizers/pull/464
|
[#464]: https://github.com/huggingface/tokenizers/pull/464
|
||||||
[#459]: https://github.com/huggingface/tokenizers/pull/459
|
[#459]: https://github.com/huggingface/tokenizers/pull/459
|
||||||
[#420]: https://github.com/huggingface/tokenizers/pull/420
|
[#420]: https://github.com/huggingface/tokenizers/pull/420
|
||||||
|
2
bindings/python/Cargo.lock
generated
2
bindings/python/Cargo.lock
generated
@ -893,7 +893,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokenizers-python"
|
name = "tokenizers-python"
|
||||||
version = "0.9.2"
|
version = "0.9.3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)",
|
"libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tokenizers-python"
|
name = "tokenizers-python"
|
||||||
version = "0.9.2"
|
version = "0.9.3"
|
||||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__version__ = "0.9.2"
|
__version__ = "0.9.3"
|
||||||
|
|
||||||
from typing import Tuple, Union, Tuple, List
|
from typing import Tuple, Union, Tuple, List
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
@ -6,7 +6,7 @@ extras["testing"] = ["pytest"]
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="tokenizers",
|
name="tokenizers",
|
||||||
version="0.9.2",
|
version="0.9.3",
|
||||||
description="Fast and Customizable Tokenizers",
|
description="Fast and Customizable Tokenizers",
|
||||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
Reference in New Issue
Block a user