Doc - Backbone for API Reference

This commit is contained in:
Anthony MOI
2020-10-06 14:31:16 -04:00
committed by Anthony MOI
parent 7eb0a9255e
commit 3ee54766e3
7 changed files with 69 additions and 16 deletions

View File

@ -602,15 +602,31 @@ impl PyTokenizer {
})
}
/// Input can be:
/// encode("A single sequence")
/// encode("A sequence", "And its pair")
/// encode([ "A", "pre", "tokenized", "sequence" ], is_pretokenized=True)
/// encode(
/// [ "A", "pre", "tokenized", "sequence" ], [ "And", "its", "pair" ],
/// is_pretokenized=True
/// )
/// Encode the given sequence and pair. This method can process raw text sequences
/// as well as already pre-tokenized sequences.
///
/// Example:
/// Here are some examples of the inputs that are accepted::
///
/// encode("A single sequence")`
/// encode("A sequence", "And its pair")`
/// encode([ "A", "pre", "tokenized", "sequence" ], is_pretokenized=True)`
/// encode(
/// [ "A", "pre", "tokenized", "sequence" ], [ "And", "its", "pair" ],
/// is_pretokenized=True
/// )
///
/// Args:
/// sequence (:obj:`~tokenizers.InputSequence`): The main input sequence
/// pair: (:obj:`~tokenizers.InputSequence`): An optional input sequence
/// is_pretokenized (:obj:`bool`): Whether the input is already pre-tokenized
/// add_special_tokens (:obj:`bool`): Whether to add the special tokens
///
/// Returns:
/// :class:`~tokenizers.Encoding`: The encoded result
///
#[args(pair = "None", is_pretokenized = "false", add_special_tokens = "true")]
#[text_signature = "($self, sequence, pair=None, is_pretokenized=False, add_special_tokens=True, /)"]
fn encode(
&self,
sequence: &PyAny,
@ -643,14 +659,29 @@ impl PyTokenizer {
.into()
}
/// Input can be:
/// encode_batch([
/// "A single sequence",
/// ("A tuple with a sequence", "And its pair"),
/// [ "A", "pre", "tokenized", "sequence" ],
/// ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
/// ])
/// Encode the given batch of inputs. This method accept both raw text sequences
/// as well as already pre-tokenized sequences.
///
/// Example:
/// Here are some examples of the inputs that are accepted::
///
/// encode_batch([
/// "A single sequence",
/// ("A tuple with a sequence", "And its pair"),
/// [ "A", "pre", "tokenized", "sequence" ],
/// ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
/// ])
///
/// Args:
/// input (:obj:`~tokenizers.EncodeInput`): The batch inputs
/// is_pretokenized (:obj:`bool`): Whether the input is already pre-tokenized
/// add_special_tokens (:obj:`bool`): Whether to add the special tokens
///
/// Returns:
/// :obj:`List[:class:`~tokenizers.Encoding`]`: The encoded batch
///
#[args(is_pretokenized = "false", add_special_tokens = "true")]
#[text_signature = "($self, input, is_pretokenized=False, add_special_tokens=True, /)"]
fn encode_batch(
&self,
input: Vec<&PyAny>,

View File

@ -0,0 +1 @@
The node API has not been documented yet.

View File

@ -0,0 +1,2 @@
.. autoclass:: tokenizers.Tokenizer
:members:

View File

@ -0,0 +1,14 @@
API Reference
====================================================================================================
.. only:: python
.. include:: python_reference.inc
.. only:: rust
.. include:: rust_reference.inc
.. only:: node
.. include:: node_reference.inc

View File

@ -0,0 +1 @@
The Rust API has not been documented yet.

View File

@ -30,7 +30,7 @@ release = "0.9.0"
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = []
extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

View File

@ -34,6 +34,10 @@ Components:
pipeline
components
.. toctree::
api/reference
Load an existing tokenizer:
----------------------------------------------------------------------------------------------------