mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-12 05:18:39 +00:00
Throw a more meaningful error when provided python input is None.
This commit is contained in:
@@ -143,6 +143,10 @@ class BaseTokenizer:
|
|||||||
Returns:
|
Returns:
|
||||||
An Encoding
|
An Encoding
|
||||||
"""
|
"""
|
||||||
|
if sequence is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Input is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
|
||||||
|
)
|
||||||
return self._tokenizer.encode(sequence, pair, add_special_tokens)
|
return self._tokenizer.encode(sequence, pair, add_special_tokens)
|
||||||
|
|
||||||
def encode_batch(
|
def encode_batch(
|
||||||
@@ -161,6 +165,12 @@ class BaseTokenizer:
|
|||||||
Returns:
|
Returns:
|
||||||
A list of Encoding
|
A list of Encoding
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if sequences is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Input is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
|
||||||
|
)
|
||||||
|
|
||||||
return self._tokenizer.encode_batch(sequences, add_special_tokens)
|
return self._tokenizer.encode_batch(sequences, add_special_tokens)
|
||||||
|
|
||||||
def decode(self, ids: List[int], skip_special_tokens: Optional[bool] = True) -> str:
|
def decode(self, ids: List[int], skip_special_tokens: Optional[bool] = True) -> str:
|
||||||
@@ -176,6 +186,10 @@ class BaseTokenizer:
|
|||||||
Returns:
|
Returns:
|
||||||
The decoded string
|
The decoded string
|
||||||
"""
|
"""
|
||||||
|
if ids is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Input is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
|
||||||
|
)
|
||||||
return self._tokenizer.decode(ids, skip_special_tokens=skip_special_tokens)
|
return self._tokenizer.decode(ids, skip_special_tokens=skip_special_tokens)
|
||||||
|
|
||||||
def decode_batch(
|
def decode_batch(
|
||||||
@@ -193,6 +207,10 @@ class BaseTokenizer:
|
|||||||
Returns:
|
Returns:
|
||||||
A list of decoded strings
|
A list of decoded strings
|
||||||
"""
|
"""
|
||||||
|
if sequences is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Input is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
|
||||||
|
)
|
||||||
return self._tokenizer.decode_batch(sequences, skip_special_tokens=skip_special_tokens)
|
return self._tokenizer.decode_batch(sequences, skip_special_tokens=skip_special_tokens)
|
||||||
|
|
||||||
def token_to_id(self, token: str) -> Optional[int]:
|
def token_to_id(self, token: str) -> Optional[int]:
|
||||||
|
|||||||
Reference in New Issue
Block a user