mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Add an Encoding.sequences to allow masking
This commit is contained in:
@ -11,6 +11,7 @@ export class Encoding {
|
||||
private _tokens?: string[];
|
||||
private _typeIds?: number[];
|
||||
private _wordIndexes?: (number | undefined)[];
|
||||
private _sequenceIndexes?: (number | undefined)[];
|
||||
|
||||
constructor(private _rawEncoding: RawEncoding) {}
|
||||
|
||||
@ -151,6 +152,14 @@ export class Encoding {
|
||||
return (this._wordIndexes = this._rawEncoding.getWords());
|
||||
}
|
||||
|
||||
get sequenceIndexes(): (number | undefined)[] {
|
||||
if (this._sequenceIndexes) {
|
||||
return this._sequenceIndexes;
|
||||
}
|
||||
|
||||
return (this._sequenceIndexes = this._rawEncoding.getSequences());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the encoded tokens corresponding to the word at the given index in one of the input
|
||||
* sequences, with the form [startToken, endToken+1]
|
||||
|
Reference in New Issue
Block a user