mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Doc - Backbone for API Reference
This commit is contained in:
@ -602,15 +602,31 @@ impl PyTokenizer {
|
||||
})
|
||||
}
|
||||
|
||||
/// Input can be:
|
||||
/// encode("A single sequence")
|
||||
/// encode("A sequence", "And its pair")
|
||||
/// encode([ "A", "pre", "tokenized", "sequence" ], is_pretokenized=True)
|
||||
/// encode(
|
||||
/// [ "A", "pre", "tokenized", "sequence" ], [ "And", "its", "pair" ],
|
||||
/// is_pretokenized=True
|
||||
/// )
|
||||
/// Encode the given sequence and pair. This method can process raw text sequences
|
||||
/// as well as already pre-tokenized sequences.
|
||||
///
|
||||
/// Example:
|
||||
/// Here are some examples of the inputs that are accepted::
|
||||
///
|
||||
/// encode("A single sequence")`
|
||||
/// encode("A sequence", "And its pair")`
|
||||
/// encode([ "A", "pre", "tokenized", "sequence" ], is_pretokenized=True)`
|
||||
/// encode(
|
||||
/// [ "A", "pre", "tokenized", "sequence" ], [ "And", "its", "pair" ],
|
||||
/// is_pretokenized=True
|
||||
/// )
|
||||
///
|
||||
/// Args:
|
||||
/// sequence (:obj:`~tokenizers.InputSequence`): The main input sequence
|
||||
/// pair: (:obj:`~tokenizers.InputSequence`): An optional input sequence
|
||||
/// is_pretokenized (:obj:`bool`): Whether the input is already pre-tokenized
|
||||
/// add_special_tokens (:obj:`bool`): Whether to add the special tokens
|
||||
///
|
||||
/// Returns:
|
||||
/// :class:`~tokenizers.Encoding`: The encoded result
|
||||
///
|
||||
#[args(pair = "None", is_pretokenized = "false", add_special_tokens = "true")]
|
||||
#[text_signature = "($self, sequence, pair=None, is_pretokenized=False, add_special_tokens=True, /)"]
|
||||
fn encode(
|
||||
&self,
|
||||
sequence: &PyAny,
|
||||
@ -643,14 +659,29 @@ impl PyTokenizer {
|
||||
.into()
|
||||
}
|
||||
|
||||
/// Input can be:
|
||||
/// encode_batch([
|
||||
/// "A single sequence",
|
||||
/// ("A tuple with a sequence", "And its pair"),
|
||||
/// [ "A", "pre", "tokenized", "sequence" ],
|
||||
/// ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
|
||||
/// ])
|
||||
/// Encode the given batch of inputs. This method accept both raw text sequences
|
||||
/// as well as already pre-tokenized sequences.
|
||||
///
|
||||
/// Example:
|
||||
/// Here are some examples of the inputs that are accepted::
|
||||
///
|
||||
/// encode_batch([
|
||||
/// "A single sequence",
|
||||
/// ("A tuple with a sequence", "And its pair"),
|
||||
/// [ "A", "pre", "tokenized", "sequence" ],
|
||||
/// ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
|
||||
/// ])
|
||||
///
|
||||
/// Args:
|
||||
/// input (:obj:`~tokenizers.EncodeInput`): The batch inputs
|
||||
/// is_pretokenized (:obj:`bool`): Whether the input is already pre-tokenized
|
||||
/// add_special_tokens (:obj:`bool`): Whether to add the special tokens
|
||||
///
|
||||
/// Returns:
|
||||
/// :obj:`List[:class:`~tokenizers.Encoding`]`: The encoded batch
|
||||
///
|
||||
#[args(is_pretokenized = "false", add_special_tokens = "true")]
|
||||
#[text_signature = "($self, input, is_pretokenized=False, add_special_tokens=True, /)"]
|
||||
fn encode_batch(
|
||||
&self,
|
||||
input: Vec<&PyAny>,
|
||||
|
1
docs/source/api/node_reference.inc
Normal file
1
docs/source/api/node_reference.inc
Normal file
@ -0,0 +1 @@
|
||||
The node API has not been documented yet.
|
2
docs/source/api/python_reference.inc
Normal file
2
docs/source/api/python_reference.inc
Normal file
@ -0,0 +1,2 @@
|
||||
.. autoclass:: tokenizers.Tokenizer
|
||||
:members:
|
14
docs/source/api/reference.rst
Normal file
14
docs/source/api/reference.rst
Normal file
@ -0,0 +1,14 @@
|
||||
API Reference
|
||||
====================================================================================================
|
||||
|
||||
.. only:: python
|
||||
|
||||
.. include:: python_reference.inc
|
||||
|
||||
.. only:: rust
|
||||
|
||||
.. include:: rust_reference.inc
|
||||
|
||||
.. only:: node
|
||||
|
||||
.. include:: node_reference.inc
|
1
docs/source/api/rust_reference.inc
Normal file
1
docs/source/api/rust_reference.inc
Normal file
@ -0,0 +1 @@
|
||||
The Rust API has not been documented yet.
|
@ -30,7 +30,7 @@ release = "0.9.0"
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = []
|
||||
extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["_templates"]
|
||||
|
@ -34,6 +34,10 @@ Components:
|
||||
pipeline
|
||||
components
|
||||
|
||||
.. toctree::
|
||||
|
||||
api/reference
|
||||
|
||||
Load an existing tokenizer:
|
||||
----------------------------------------------------------------------------------------------------
|
||||
|
||||
|
Reference in New Issue
Block a user