Python - Add bindings to Tokenizer.from_pretrained

This commit is contained in:
Anthony Moi
2021-08-19 11:45:23 +02:00
committed by Anthony MOI
parent e71e5be64f
commit e44fdee4a1
4 changed files with 1022 additions and 1 deletions

View File

@ -544,6 +544,43 @@ impl PyTokenizer {
Ok(Self { tokenizer })
}
/// Instantiate a new :class:`~tokenizers.Tokenizer` from an existing file on the
/// Hugging Face Hub.
///
/// Args:
/// identifier (:obj:`str`):
/// The identifier of a Model on the Hugging Face Hub, that contains
/// a tokenizer.json file
/// revision (:obj:`str`, defaults to `main`):
/// A branch or commit id
/// auth_token (:obj:`str`, `optional`, defaults to `None`):
/// An optional auth token used to access private repositories on the
/// Hugging Face Hub
///
/// Returns:
/// :class:`~tokenizers.Tokenizer`: The new tokenizer
#[staticmethod]
#[args(revision = "String::from(\"main\")", auth_token = "None")]
#[text_signature = "(identifier, revision=\"main\", auth_token=None)"]
fn from_pretrained(
identifier: &str,
revision: String,
auth_token: Option<String>,
) -> PyResult<Self> {
let params = tk::utils::from_pretrained::FromPretrainedParameters {
revision,
auth_token,
user_agent: [("bindings", "Python"), ("version", crate::VERSION)]
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
};
let tokenizer: PyResult<_> =
ToPyResult(Tokenizer::from_pretrained(identifier, Some(params))).into();
Ok(Self::new(tokenizer?))
}
/// Gets a serialized string representing this :class:`~tokenizers.Tokenizer`.
///
/// Args: