Python - add Metaspace decoder

This commit is contained in:
Anthony MOI
2020-01-07 18:40:18 -05:00
parent 43acdcfacf
commit cbdd2cf423
6 changed files with 98 additions and 5 deletions

View File

@ -57,6 +57,41 @@ impl WordPiece {
}
}
#[pyclass]
pub struct Metaspace {}
#[pymethods]
impl Metaspace {
#[staticmethod]
#[args(kwargs = "**")]
fn new(kwargs: Option<&PyDict>) -> PyResult<Decoder> {
let mut replacement = '▁';
let mut add_prefix_space = true;
if let Some(kwargs) = kwargs {
for (key, value) in kwargs {
let key: &str = key.extract()?;
match key {
"replacement" => {
let s: &str = value.extract()?;
replacement = s.chars().nth(0).ok_or(exceptions::Exception::py_err(
"replacement must be a character",
))?;
}
"add_prefix_space" => add_prefix_space = value.extract()?,
_ => println!("Ignored unknown kwarg option {}", key),
}
}
}
Ok(Decoder {
decoder: Container::Owned(Box::new(tk::decoder::metaspace::Metaspace::new(
replacement,
add_prefix_space,
))),
})
}
}
struct PyDecoder {
class: PyObject,
}

View File

@ -49,6 +49,7 @@ fn decoders(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<decoders::Decoder>()?;
m.add_class::<decoders::ByteLevel>()?;
m.add_class::<decoders::WordPiece>()?;
m.add_class::<decoders::Metaspace>()?;
Ok(())
}

View File

@ -3,3 +3,4 @@ from .. import decoders
Decoder = decoders.Decoder
ByteLevel = decoders.ByteLevel
WordPiece = decoders.WordPiece
Metaspace = decoders.Metaspace

View File

@ -31,3 +31,22 @@ class WordPiece:
The prefix to use for subwords that are not a beginning-of-word
"""
pass
class Metaspace:
""" Metaspace decoder """
@staticmethod
def new(replacement: str="",
add_prefix_space: bool=True) -> Decoder:
""" Instantiate a new Metaspace
Args:
replacement: str:
The replacement character. Must be exactly one character. By default we
use the `▁` (U+2581) meta symbol (Same as in SentencePiece).
add_prefix_space: boolean:
Whether to add a space to the first word if there isn't already one. This
lets us treat `hello` exactly like `say hello`.
"""
pass