mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
* Fixing a bug where long tokenizer files would be incorrectly deserialized - Add a bunch of tests to check deserialization behaviour - One tests also confirms current Single deserialization of Sequence. * Better test locations for Windows + no file dependency in Python binding Rust side. * Adressing @n1t0 comments.
11 lines
393 B
Python
11 lines
393 B
Python
from tokenizers import Tokenizer, models, normalizers
|
|
from .utils import data_dir, albert_base
|
|
|
|
|
|
class TestSerialization:
|
|
def test_full_serialization_albert(self, albert_base):
|
|
# Check we can read this file.
|
|
# This used to fail because of BufReader that would fail because the
|
|
# file exceeds the buffer capacity
|
|
tokenizer = Tokenizer.from_file(albert_base)
|