Python - Improve AddedToken interface

This commit is contained in:
Anthony MOI
2020-06-19 17:53:46 -04:00
parent a14cd7b219
commit c02d4e2202
5 changed files with 125 additions and 87 deletions

View File

@ -36,12 +36,12 @@ impl BpeTrainer {
.into_iter()
.map(|token| {
if let Ok(content) = token.extract::<String>() {
Ok(tk::tokenizer::AddedToken {
content,
..Default::default()
})
} else if let Ok(token) = token.extract::<PyRef<AddedToken>>() {
Ok(token.token.clone())
Ok(AddedToken::from(content, Some(true)).get_token())
} else if let Ok(mut token) =
token.extract::<PyRefMut<AddedToken>>()
{
token.is_special_token = true;
Ok(token.get_token())
} else {
Err(exceptions::Exception::py_err(
"special_tokens must be a List[Union[str, AddedToken]]",
@ -105,12 +105,12 @@ impl WordPieceTrainer {
.into_iter()
.map(|token| {
if let Ok(content) = token.extract::<String>() {
Ok(tk::tokenizer::AddedToken {
content,
..Default::default()
})
} else if let Ok(token) = token.extract::<PyRef<AddedToken>>() {
Ok(token.token.clone())
Ok(AddedToken::from(content, Some(true)).get_token())
} else if let Ok(mut token) =
token.extract::<PyRefMut<AddedToken>>()
{
token.is_special_token = true;
Ok(token.get_token())
} else {
Err(exceptions::Exception::py_err(
"special_tokens must be a List[Union[str, AddedToken]]",