Add python 3.11 to manylinux buildwheels (#1096)

* Add python 3.11 to manylinux buildwheels

* Fixing clippy.

* Node clippy.

* Python clippy.

* Changelog + version number update.

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
Cameron
2022-11-07 17:45:04 +10:00
committed by GitHub
parent 96a9e5715c
commit 11bb2e00f2
20 changed files with 39 additions and 28 deletions

View File

@ -14,10 +14,9 @@ jobs:
strategy:
matrix:
os: [windows-latest, macos-latest]
python: ["3.7", "3.8", "3.9"]
# 3.10 Not yet available on Conda.
# python: ["3.7", "3.8", "3.9", "3.10"]
python: ["3.7", "3.8", "3.9", "3.10"]
# 3.11 Not yet available on Conda.
# python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
steps:
- name: Checkout repository
uses: actions/checkout@v2

View File

@ -34,7 +34,7 @@ jobs:
runs-on: windows-latest
strategy:
matrix:
python: ["3.7", "3.8", "3.9", "3.10"]
python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
steps:
- name: Checkout repository
uses: actions/checkout@v2

View File

@ -1,3 +1,7 @@
## [0.13.2]
- Python only chnages.
## [0.13.1]
- [#1072] Fixing Roberta type ids.

View File

@ -995,7 +995,7 @@ dependencies = [
[[package]]
name = "node"
version = "0.8.0"
version = "0.13.2"
dependencies = [
"neon",
"neon-build",
@ -1668,7 +1668,7 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokenizers"
version = "0.12.1"
version = "0.13.2"
dependencies = [
"aho-corasick",
"cached-path",

View File

@ -227,7 +227,7 @@ fn sequence(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
match pretokenizer.downcast::<JsPreTokenizer>().or_throw(&mut cx) {
Ok(pretokenizer) => {
let guard = cx.lock();
let pretok = (*pretokenizer.borrow(&guard)).pretok.clone();
let pretok = pretokenizer.borrow(&guard).pretok.clone();
if let Some(pretokenizer) = pretok {
match pretokenizer {
JsPreTokenizerWrapper::Sequence(seq) => sequence.extend(seq),

View File

@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.13.2]
- [#1096] Python 3.11 support
## [0.13.1]
- [#1072] Fixing Roberta type ids.
@ -389,7 +393,7 @@ delimiter (Works like `.split(delimiter)`)
- Fix a bug with the IDs associated with added tokens.
- Fix a bug that was causing crashes in Python 3.5
[#1096]: https://github.com/huggingface/tokenizers/pull/1096
[#1072]: https://github.com/huggingface/tokenizers/pull/1072
[#956]: https://github.com/huggingface/tokenizers/pull/956
[#1008]: https://github.com/huggingface/tokenizers/pull/1008

View File

@ -1720,7 +1720,7 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokenizers"
version = "0.13.1"
version = "0.13.2"
dependencies = [
"aho-corasick",
"cached-path",

View File

@ -1,6 +1,6 @@
[package]
name = "tokenizers-python"
version = "0.13.1"
version = "0.13.2"
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
edition = "2021"

View File

@ -8,7 +8,7 @@ fi
export PATH="$HOME/.cargo/bin:$PATH"
for PYBIN in /opt/python/cp{37,38,39,310}*/bin; do
for PYBIN in /opt/python/cp{37,38,39,310,311}*/bin; do
export PYTHON_SYS_EXECUTABLE="$PYBIN/python"
"${PYBIN}/pip" install -U setuptools-rust setuptools wheel

View File

@ -1,4 +1,4 @@
__version__ = "0.13.2.dev0"
__version__ = "0.13.2"
from enum import Enum
from typing import List, Tuple, Union

View File

@ -9,7 +9,7 @@ extras["dev"] = extras["testing"]
setup(
name="tokenizers",
version="0.13.2.dev0",
version="0.13.2",
description="Fast and Customizable Tokenizers",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",

View File

@ -353,7 +353,7 @@ impl PySequenceDecoder {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[PyList::empty(py)])
PyTuple::new(py, [PyList::empty(py)])
}
}

View File

@ -360,7 +360,7 @@ impl PySequence {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[PyList::empty(py)])
PyTuple::new(py, [PyList::empty(py)])
}
fn __len__(&self) -> usize {

View File

@ -355,7 +355,7 @@ impl PySplit {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[" ", "removed"])
PyTuple::new(py, [" ", "removed"])
}
}
@ -387,7 +387,7 @@ impl PyCharDelimiterSplit {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[" "])
PyTuple::new(py, [" "])
}
}
@ -450,7 +450,7 @@ impl PySequence {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[PyList::empty(py)])
PyTuple::new(py, [PyList::empty(py)])
}
}

View File

@ -167,7 +167,7 @@ impl PyBertProcessing {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[("", 0), ("", 0)])
PyTuple::new(py, [("", 0), ("", 0)])
}
}
@ -218,7 +218,7 @@ impl PyRobertaProcessing {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[("", 0), ("", 0)])
PyTuple::new(py, [("", 0), ("", 0)])
}
}
@ -441,7 +441,7 @@ impl PySequence {
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[PyList::empty(py)])
PyTuple::new(py, [PyList::empty(py)])
}
}

View File

@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.13.2]
- Python only changes
## [0.13.1]
- [#1072] Fixing Roberta type ids.

View File

@ -99,7 +99,7 @@ impl BertNormalizer {
let mut new_chars: Vec<(char, isize)> = vec![];
normalized.for_each(|c| {
if is_chinese_char(c) {
new_chars.extend(&[(' ', 0), (c, 1), (' ', 1)]);
new_chars.extend([(' ', 0), (c, 1), (' ', 1)]);
} else {
new_chars.push((c, 0));
}

View File

@ -135,7 +135,7 @@ impl PreTokenizer for ByteLevel {
bytes
.iter()
.enumerate()
.map(|(i, b)| (BYTES_CHAR[b], if i > 0 { 1 } else { 0 })),
.map(|(i, b)| (BYTES_CHAR[b], isize::from(i > 0))),
);
}
normalized.transform(transformations.into_iter(), 0);

View File

@ -167,10 +167,10 @@ impl AddedVocabulary {
pub fn new() -> Self {
let trie = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostLongest)
.build::<_, &&[u8]>(&[]);
.build::<_, &&[u8]>([]);
let normalized_trie = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostLongest)
.build::<_, &&[u8]>(&[]);
.build::<_, &&[u8]>([]);
Self {
added_tokens_map: HashMap::new(),
added_tokens_map_r: HashMap::new(),

View File

@ -546,7 +546,7 @@ impl NormalizedString {
let mut new_chars: Vec<(char, isize)> = vec![];
self.for_each(|c| {
c.to_lowercase().enumerate().for_each(|(index, c)| {
new_chars.push((c, if index > 0 { 1 } else { 0 }));
new_chars.push((c, isize::from(index > 0)));
})
});
self.transform(new_chars.into_iter(), 0);
@ -558,7 +558,7 @@ impl NormalizedString {
let mut new_chars: Vec<(char, isize)> = vec![];
self.for_each(|c| {
c.to_uppercase().enumerate().for_each(|(index, c)| {
new_chars.push((c, if index > 0 { 1 } else { 0 }));
new_chars.push((c, isize::from(index > 0)));
})
});
self.transform(new_chars.into_iter(), 0);