mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Add CTC Decoder for Wave2Vec models (#693)
* Rust - add a CTCDecoder as a seperate mod * Adding bindings to Node + Python. * Clippy update. * Stub. * Fixing roberta.json URLs. * Moving test files to hf.co. * Update cargo check and clippy to 1.52. * Inner ':' actually is used for domains in sphinx. Making `domain` work correctly was just too much work so I went the easy way and have global roles for the custom rust extension. * Update struct naming and docs * Update changelog Co-authored-by: Thomaub <github.thomaub@gmail.com> Co-authored-by: Anthony MOI <m.anthony.moi@gmail.com>
This commit is contained in:
@ -10,7 +10,7 @@ logger = sphinx.util.logging.getLogger(__name__)
|
||||
|
||||
class RustRef:
|
||||
def __call__(self, name, rawtext, text, lineno, inliner, options={}, content=[]):
|
||||
doctype = name.split(":")[1]
|
||||
doctype = name.split("_")[1]
|
||||
parts = text.split("::")
|
||||
|
||||
if text.startswith("~"):
|
||||
@ -87,10 +87,10 @@ class RustRef:
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_role("rust:struct", RustRef())
|
||||
app.add_role("rust:func", RustRef())
|
||||
app.add_role("rust:meth", RustRef())
|
||||
app.add_role("rust:trait", RustRef())
|
||||
app.add_role("rust_struct", RustRef())
|
||||
app.add_role("rust_func", RustRef())
|
||||
app.add_role("rust_meth", RustRef())
|
||||
app.add_role("rust_trait", RustRef())
|
||||
|
||||
return {
|
||||
"version": "0.1",
|
||||
|
@ -58,47 +58,47 @@
|
||||
classmethod
|
||||
static method
|
||||
Tokenizer
|
||||
:rust:struct:`~tokenizers::tokenizer::Tokenizer`
|
||||
:rust_struct:`~tokenizers::tokenizer::Tokenizer`
|
||||
Tokenizer.train
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::train`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::train`
|
||||
Tokenizer.save
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::save`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::save`
|
||||
Tokenizer.from_file
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::from_file`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::from_file`
|
||||
Tokenizer.encode
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::encode`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::encode`
|
||||
Tokenizer.encode_batch
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::encode_batch`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::encode_batch`
|
||||
Tokenizer.decode
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::decode`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::decode`
|
||||
Tokenizer.decode_batch
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::decode_batch`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::decode_batch`
|
||||
Tokenizer.token_to_id
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::token_to_id`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::token_to_id`
|
||||
Tokenizer.enable_padding
|
||||
:rust:meth:`~tokenizers::tokenizer::Tokenizer::enable_padding`
|
||||
:rust_meth:`~tokenizers::tokenizer::Tokenizer::enable_padding`
|
||||
Encoding
|
||||
:rust:struct:`~tokenizers::tokenizer::Encoding`
|
||||
:rust_struct:`~tokenizers::tokenizer::Encoding`
|
||||
TemplateProcessing
|
||||
:rust:struct:`~tokenizers::processors::template::TemplateProcessing`
|
||||
:rust_struct:`~tokenizers::processors::template::TemplateProcessing`
|
||||
Normalizer
|
||||
:rust:trait:`~tokenizers::tokenizer::Normalizer`
|
||||
:rust_trait:`~tokenizers::tokenizer::Normalizer`
|
||||
normalizers.Sequence
|
||||
:rust:struct:`~tokenizers::normalizers::utils::Sequence`
|
||||
:rust_struct:`~tokenizers::normalizers::utils::Sequence`
|
||||
pre_tokenizers.Whitespace
|
||||
:rust:struct:`~tokenizers::normalizers::whitespace::Whitespace`
|
||||
:rust_struct:`~tokenizers::normalizers::whitespace::Whitespace`
|
||||
PreTokenizer
|
||||
:rust:trait:`~tokenizers::tokenizer::PreTokenizer`
|
||||
:rust_trait:`~tokenizers::tokenizer::PreTokenizer`
|
||||
models.BPE
|
||||
:rust:struct:`~tokenizers::models::bpe::BPE`
|
||||
:rust_struct:`~tokenizers::models::bpe::BPE`
|
||||
models.Unigram
|
||||
:rust:struct:`~tokenizers::models::unigram::Unigram`
|
||||
:rust_struct:`~tokenizers::models::unigram::Unigram`
|
||||
models.WordLevel
|
||||
:rust:struct:`~tokenizers::models::wordlevel::WordLevel`
|
||||
:rust_struct:`~tokenizers::models::wordlevel::WordLevel`
|
||||
models.WordPiece
|
||||
:rust:struct:`~tokenizers::models::wordpiece::WordPiece`
|
||||
:rust_struct:`~tokenizers::models::wordpiece::WordPiece`
|
||||
Decoder
|
||||
:rust:trait:`~tokenizers::tokenizer::Decoder`
|
||||
:rust_trait:`~tokenizers::tokenizer::Decoder`
|
||||
|
||||
.. entities:: node
|
||||
|
||||
|
@ -44,7 +44,7 @@ Training the tokenizer
|
||||
.. entities:: rust
|
||||
|
||||
BpeTrainer
|
||||
:rust:struct:`~tokenizers::models::bpe::BpeTrainer`
|
||||
:rust_struct:`~tokenizers::models::bpe::BpeTrainer`
|
||||
vocab_size
|
||||
:obj:`vocab_size`
|
||||
min_frequency
|
||||
|
Reference in New Issue
Block a user