Add an Encoding.sequences to allow masking

This commit is contained in:
Anthony MOI
2020-11-05 13:12:15 -05:00
committed by Anthony MOI
parent 385d25720a
commit 57d162b269
8 changed files with 74 additions and 1 deletions

View File

@ -11,6 +11,7 @@ export class Encoding {
private _tokens?: string[];
private _typeIds?: number[];
private _wordIndexes?: (number | undefined)[];
private _sequenceIndexes?: (number | undefined)[];
constructor(private _rawEncoding: RawEncoding) {}
@ -151,6 +152,14 @@ export class Encoding {
return (this._wordIndexes = this._rawEncoding.getWords());
}
get sequenceIndexes(): (number | undefined)[] {
if (this._sequenceIndexes) {
return this._sequenceIndexes;
}
return (this._sequenceIndexes = this._rawEncoding.getSequences());
}
/**
* Get the encoded tokens corresponding to the word at the given index in one of the input
* sequences, with the form [startToken, endToken+1]