mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Fixing the documentation for ByteLevel
in Python (#982)
* Fixing the documentation for `ByteLevel` in Python * Python stub.py (after rebuilding ofc).
This commit is contained in:
@ -100,6 +100,9 @@ class ByteLevel(PreTokenizer):
|
||||
add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
Whether to add a space to the first word if there isn't already one. This
|
||||
lets us treat `hello` exactly like `say hello`.
|
||||
use_regex (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
Set this to :obj:`False` to prevent this `pre_tokenizer` from using
|
||||
the GPT2 specific regexp for spliting on whitespace.
|
||||
"""
|
||||
|
||||
def __init__(self, add_prefix_space=True, use_regex=True):
|
||||
|
@ -228,6 +228,9 @@ macro_rules! setter {
|
||||
/// add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
/// Whether to add a space to the first word if there isn't already one. This
|
||||
/// lets us treat `hello` exactly like `say hello`.
|
||||
/// use_regex (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
/// Set this to :obj:`False` to prevent this `pre_tokenizer` from using
|
||||
/// the GPT2 specific regexp for spliting on whitespace.
|
||||
#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=ByteLevel)]
|
||||
#[text_signature = "(self, add_prefix_space=True, use_regex=True)"]
|
||||
pub struct PyByteLevel {}
|
||||
|
Reference in New Issue
Block a user