Add SplitDelimiterBehavior to Punctuation constructor (#657)

Resolves: #642
This commit is contained in:
Vlad Artamonov
2021-08-13 09:19:23 -04:00
committed by GitHub
parent c1100dcbe3
commit e2bf8daa3a
10 changed files with 69 additions and 17 deletions

View File

@@ -308,10 +308,16 @@ class Metaspace(PreTokenizer):
class Punctuation(PreTokenizer):
"""
This pre-tokenizer simply splits on punctuation as individual characters.`
This pre-tokenizer simply splits on punctuation as individual characters.
Args:
behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
The behavior to use when splitting.
Choices: "removed", "isolated" (default), "merged_with_previous", "merged_with_next",
"contiguous"
"""
def __init__(self):
def __init__(self, behavior="isolated"):
pass
def pre_tokenize(self, pretok):
"""