mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-31 04:29:21 +00:00
Node - Merge encodings
This commit is contained in:
committed by
Anthony MOI
parent
4341c79d85
commit
0408567f23
@ -1,4 +1,5 @@
|
||||
import { PaddingOptions, RawEncoding } from "../bindings/raw-encoding";
|
||||
import { mergeEncodings } from "../bindings/utils";
|
||||
|
||||
export class Encoding {
|
||||
private _attentionMask?: number[];
|
||||
@ -13,6 +14,20 @@ export class Encoding {
|
||||
|
||||
constructor(private rawEncoding: RawEncoding) {}
|
||||
|
||||
/**
|
||||
* Merge a list of Encoding into one final Encoding
|
||||
* @param encodings The list of encodings to merge
|
||||
* @param [growingOffsets=false] Whether the offsets should accumulate while merging
|
||||
*/
|
||||
static merge(encodings: Encoding[], growingOffsets?: boolean): Encoding {
|
||||
const mergedRaw = mergeEncodings(
|
||||
encodings.map(e => e.rawEncoding),
|
||||
growingOffsets
|
||||
);
|
||||
|
||||
return new Encoding(mergedRaw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attention mask
|
||||
*/
|
||||
|
Reference in New Issue
Block a user