mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
59 lines
1.1 KiB
Plaintext
59 lines
1.1 KiB
Plaintext
# Pre-tokenizers
|
|
|
|
<tokenizerslangcontent>
|
|
<python>
|
|
## BertPreTokenizer
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.BertPreTokenizer
|
|
|
|
## ByteLevel
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.ByteLevel
|
|
|
|
## CharDelimiterSplit
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.CharDelimiterSplit
|
|
|
|
## Digits
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.Digits
|
|
|
|
## Metaspace
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.Metaspace
|
|
|
|
## PreTokenizer
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.PreTokenizer
|
|
|
|
## Punctuation
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.Punctuation
|
|
|
|
## Sequence
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.Sequence
|
|
|
|
## Split
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.Split
|
|
|
|
## UnicodeScripts
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.UnicodeScripts
|
|
|
|
## Whitespace
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.Whitespace
|
|
|
|
## WhitespaceSplit
|
|
|
|
[[autodoc]] tokenizers.pre_tokenizers.WhitespaceSplit
|
|
</python>
|
|
<rust>
|
|
The Rust API Reference is available directly on the [Docs.rs](https://docs.rs/tokenizers/latest/tokenizers/) website.
|
|
</rust>
|
|
<node>
|
|
The node API has not been documented yet.
|
|
</node>
|
|
</tokenizerslangcontent> |