mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-08 13:48:19 +00:00
Add SplitDelimiterBehavior to Punctuation constructor (#657)
Resolves: #642
This commit is contained in:
@@ -308,10 +308,16 @@ class Metaspace(PreTokenizer):
|
||||
|
||||
class Punctuation(PreTokenizer):
|
||||
"""
|
||||
This pre-tokenizer simply splits on punctuation as individual characters.`
|
||||
This pre-tokenizer simply splits on punctuation as individual characters.
|
||||
|
||||
Args:
|
||||
behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
|
||||
The behavior to use when splitting.
|
||||
Choices: "removed", "isolated" (default), "merged_with_previous", "merged_with_next",
|
||||
"contiguous"
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, behavior="isolated"):
|
||||
pass
|
||||
def pre_tokenize(self, pretok):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user