mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-05 20:28:22 +00:00
Python - Add to/from str and files for Tokenizer
This commit is contained in:
@@ -203,6 +203,32 @@ impl Tokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
#[staticmethod]
|
||||
fn from_str(s: &str) -> PyResult<Self> {
|
||||
let tokenizer: PyResult<tk::tokenizer::Tokenizer> = ToPyResult(s.parse()).into();
|
||||
Ok(Self {
|
||||
tokenizer: tokenizer?,
|
||||
})
|
||||
}
|
||||
|
||||
#[staticmethod]
|
||||
fn from_file(path: &str) -> PyResult<Self> {
|
||||
let tokenizer: PyResult<_> = ToPyResult(tk::tokenizer::Tokenizer::from_file(path)).into();
|
||||
Ok(Self {
|
||||
tokenizer: tokenizer?,
|
||||
})
|
||||
}
|
||||
|
||||
#[args(pretty = false)]
|
||||
fn to_str(&self, pretty: bool) -> PyResult<String> {
|
||||
ToPyResult(self.tokenizer.to_string(pretty)).into()
|
||||
}
|
||||
|
||||
#[args(pretty = false)]
|
||||
fn save(&self, path: &str, pretty: bool) -> PyResult<()> {
|
||||
ToPyResult(self.tokenizer.save(path, pretty)).into()
|
||||
}
|
||||
|
||||
fn num_special_tokens_to_add(&self, is_pair: bool) -> PyResult<usize> {
|
||||
Ok(self
|
||||
.tokenizer
|
||||
@@ -604,9 +630,4 @@ impl Tokenizer {
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[args(pretty = false)]
|
||||
fn save(&self, path: &str, pretty: bool) -> PyResult<()> {
|
||||
ToPyResult(self.tokenizer.save(path, pretty)).into()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,6 +252,49 @@ class Tokenizer:
|
||||
Tokenizer
|
||||
"""
|
||||
pass
|
||||
@staticmethod
|
||||
def from_str(s: str) -> Tokenizer:
|
||||
""" Instantiate a new Tokenizer from the given JSON string
|
||||
|
||||
Args:
|
||||
s: str:
|
||||
A JSON string representation of the Tokenizer
|
||||
|
||||
Returns:
|
||||
Tokenizer
|
||||
"""
|
||||
pass
|
||||
@staticmethod
|
||||
def from_file(path: str) -> Tokenizer:
|
||||
""" Instantiate a new Tokenizer from the given file
|
||||
|
||||
Args:
|
||||
path: str:
|
||||
Path to a file containing a Tokenizer
|
||||
|
||||
Returns:
|
||||
Tokenizer
|
||||
"""
|
||||
pass
|
||||
def to_str(self, pretty: bool = False) -> str:
|
||||
""" Get a serialized JSON version of the Tokenizer as a str
|
||||
|
||||
Args:
|
||||
pretty: bool:
|
||||
Whether the JSON string should be prettified
|
||||
|
||||
Returns:
|
||||
str
|
||||
"""
|
||||
pass
|
||||
def save(self, path: str, pretty: bool = False):
|
||||
""" Save the Tokenizer as JSON to the given path
|
||||
|
||||
Args:
|
||||
pretty: bool:
|
||||
Whether the JSON string should be prettified
|
||||
"""
|
||||
pass
|
||||
@property
|
||||
def model(self) -> Model:
|
||||
""" Get the model in use with this Tokenizer """
|
||||
|
||||
@@ -295,7 +295,7 @@ class BaseTokenizer:
|
||||
"""
|
||||
return self._tokenizer.model.save(directory, name=name)
|
||||
|
||||
def save(self, path: str, pretty: bool = True):
|
||||
def save(self, path: str, pretty: bool = False):
|
||||
""" Save the current Tokenizer at the given path
|
||||
|
||||
Args:
|
||||
@@ -304,6 +304,18 @@ class BaseTokenizer:
|
||||
"""
|
||||
return self._tokenizer.save(path, pretty)
|
||||
|
||||
def to_str(self, pretty: bool = False):
|
||||
""" Get a serialized JSON version of the Tokenizer as a str
|
||||
|
||||
Args:
|
||||
pretty: bool:
|
||||
Whether the JSON string should be prettified
|
||||
|
||||
Returns:
|
||||
str
|
||||
"""
|
||||
return self._tokenizer.to_str(pretty)
|
||||
|
||||
def post_process(
|
||||
self, encoding: Encoding, pair: Optional[Encoding] = None, add_special_tokens: bool = True
|
||||
) -> Encoding:
|
||||
|
||||
Reference in New Issue
Block a user