mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Python - Prepare for release 0.10.0
This commit is contained in:
@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.10.0rc1]
|
||||
## [0.10.0]
|
||||
|
||||
### Added
|
||||
- [#508]: Add a Visualizer for notebooks to help understand how the tokenizers work
|
||||
@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- [#533]: Add support for conda builds
|
||||
- [#542]: Add Split pre-tokenizer to easily split using a pattern
|
||||
- [#544]: Ability to train from memory. This also improves the integration with `datasets`
|
||||
- [#590]: Add getters/setters for components on BaseTokenizer
|
||||
- [#574]: Add `fust_unk` option to SentencePieceBPETokenizer
|
||||
|
||||
### Changed
|
||||
- [#509]: Automatically stubbing the `.pyi` files
|
||||
@ -291,6 +293,8 @@ delimiter (Works like `.split(delimiter)`)
|
||||
- Fix a bug that was causing crashes in Python 3.5
|
||||
|
||||
|
||||
[#590]: https://github.com/huggingface/tokenizers/pull/590
|
||||
[#574]: https://github.com/huggingface/tokenizers/pull/574
|
||||
[#544]: https://github.com/huggingface/tokenizers/pull/544
|
||||
[#542]: https://github.com/huggingface/tokenizers/pull/542
|
||||
[#539]: https://github.com/huggingface/tokenizers/pull/539
|
||||
|
2
bindings/python/Cargo.lock
generated
2
bindings/python/Cargo.lock
generated
@ -993,7 +993,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokenizers-python"
|
||||
version = "0.10.0-rc1"
|
||||
version = "0.10.0"
|
||||
dependencies = [
|
||||
"env_logger",
|
||||
"itertools 0.9.0",
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tokenizers-python"
|
||||
version = "0.10.0-rc1"
|
||||
version = "0.10.0"
|
||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
__version__ = "0.10.0rc1"
|
||||
__version__ = "0.10.0"
|
||||
|
||||
from typing import Tuple, Union, Tuple, List
|
||||
from enum import Enum
|
||||
|
@ -6,7 +6,7 @@ extras["testing"] = ["pytest"]
|
||||
|
||||
setup(
|
||||
name="tokenizers",
|
||||
version="0.10.0rc1",
|
||||
version="0.10.0",
|
||||
description="Fast and Customizable Tokenizers",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
Reference in New Issue
Block a user