mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Python - Update CHANGELOG and bump to 0.9.0.rc1
This commit is contained in:
@ -4,19 +4,24 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.9.0-dev4]
|
||||
## [0.9.0-rc1]
|
||||
|
||||
### Fixed
|
||||
- [#362]: Fix training deadlock with Python components.
|
||||
- [#363]: Fix a crash when calling `.train` with some non-existent files
|
||||
- [#355]: Remove a lot of possible crashes
|
||||
- [#389]: Improve truncation (crash and consistency)
|
||||
|
||||
### Added
|
||||
- [#379]: Add the ability to call `encode`/`encode_batch` with numpy arrays
|
||||
- [#292]: Support for the Unigram algorithm
|
||||
- [#378], [#394], [#416], [#417]: Many new Normalizer and PreTokenizer
|
||||
- [#403]: Add `TemplateProcessing` `PostProcessor`.
|
||||
- [#420]: Ability to fuse the "unk" token in BPE.
|
||||
|
||||
### Changed
|
||||
- [#360]: Lots of improvements related to words/alignment tracking
|
||||
- [#426]: Improvements on error messages thanks to PyO3 0.12
|
||||
|
||||
## [0.8.1]
|
||||
|
||||
@ -233,10 +238,17 @@ delimiter (Works like `.split(delimiter)`)
|
||||
- Fix a bug with the IDs associated with added tokens.
|
||||
- Fix a bug that was causing crashes in Python 3.5
|
||||
|
||||
[#420]: https://github.com/huggingface/tokenizers/pull/420
|
||||
[#417]: https://github.com/huggingface/tokenizers/pull/417
|
||||
[#416]: https://github.com/huggingface/tokenizers/pull/416
|
||||
[#403]: https://github.com/huggingface/tokenizers/pull/403
|
||||
[#394]: https://github.com/huggingface/tokenizers/pull/394
|
||||
[#389]: https://github.com/huggingface/tokenizers/pull/389
|
||||
[#379]: https://github.com/huggingface/tokenizers/pull/379
|
||||
[#378]: https://github.com/huggingface/tokenizers/pull/378
|
||||
[#363]: https://github.com/huggingface/tokenizers/pull/363
|
||||
[#362]: https://github.com/huggingface/tokenizers/pull/362
|
||||
[#360]: https://github.com/huggingface/tokenizers/pull/360
|
||||
[#355]: https://github.com/huggingface/tokenizers/pull/355
|
||||
[#333]: https://github.com/huggingface/tokenizers/pull/333
|
||||
[#330]: https://github.com/huggingface/tokenizers/pull/330
|
||||
|
2
bindings/python/Cargo.lock
generated
2
bindings/python/Cargo.lock
generated
@ -890,7 +890,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokenizers-python"
|
||||
version = "0.9.0-dev4"
|
||||
version = "0.9.0-rc1"
|
||||
dependencies = [
|
||||
"env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tokenizers-python"
|
||||
version = "0.9.0-dev4"
|
||||
version = "0.9.0-rc1"
|
||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
__version__ = "0.9.0.dev4"
|
||||
__version__ = "0.9.0.rc1"
|
||||
|
||||
from typing import Tuple, Union, Tuple, List
|
||||
from enum import Enum
|
||||
|
@ -6,7 +6,7 @@ extras["testing"] = ["pytest"]
|
||||
|
||||
setup(
|
||||
name="tokenizers",
|
||||
version="0.9.0.dev4",
|
||||
version="0.9.0.rc1",
|
||||
description="Fast and Customizable Tokenizers",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
Reference in New Issue
Block a user