mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
fix imports
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@ -15,3 +15,5 @@ pip-wheel-metadata
|
|||||||
*.so
|
*.so
|
||||||
/bindings/python/build
|
/bindings/python/build
|
||||||
/bindings/python/dist
|
/bindings/python/dist
|
||||||
|
|
||||||
|
.vscode
|
@ -1,6 +1,6 @@
|
|||||||
from .tokenizers import Tokenizer
|
from ..tokenizers import Tokenizer, Encoding
|
||||||
|
|
||||||
from typing import List, Union, Tuple
|
from typing import List, Union, Tuple, Optional
|
||||||
|
|
||||||
class BaseTokenizer:
|
class BaseTokenizer:
|
||||||
_tokenizer: Tokenizer
|
_tokenizer: Tokenizer
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from tokenizers import Tokenizer, pre_tokenizers, decoders
|
from tokenizers import Tokenizer, pre_tokenizers, decoders
|
||||||
from tokenizers.tokenizers import BaseTokenizer
|
|
||||||
from tokenizers.models import BPE
|
from tokenizers.models import BPE
|
||||||
from tokenizers.normalizers import NFKC
|
from tokenizers.normalizers import NFKC
|
||||||
|
from .base_tokenizer import BaseTokenizer
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ class ByteLevelBpe(BaseTokenizer):
|
|||||||
def __init__(self,
|
def __init__(self,
|
||||||
vocab_file: Optional[str]=None,
|
vocab_file: Optional[str]=None,
|
||||||
merges_file: Optional[str]=None,
|
merges_file: Optional[str]=None,
|
||||||
add_prefix_space: boolean=False):
|
add_prefix_space: bool=False):
|
||||||
if vocab_file is not None and merges_file is not None:
|
if vocab_file is not None and merges_file is not None:
|
||||||
tokenizer = Tokenizer(BPE.from_files(vocab_file, merges_file))
|
tokenizer = Tokenizer(BPE.from_files(vocab_file, merges_file))
|
||||||
else:
|
else:
|
||||||
|
Reference in New Issue
Block a user