mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
29 lines
912 B
TypeScript
29 lines
912 B
TypeScript
import { promisify } from "util";
|
|
import { Encoding, Tokenizer } from "../bindings/tokenizer";
|
|
|
|
export class BaseTokenizer {
|
|
constructor(private tokenizer: Tokenizer) {}
|
|
|
|
/**
|
|
* Encode the given sequence
|
|
*
|
|
* @param {string} sequence The sequence to encode
|
|
* @param {(string | null)} pair The optional pair sequence
|
|
*/
|
|
async encode(sequence: string, pair?: string): Promise<Encoding> {
|
|
const encode = promisify(this.tokenizer.encode);
|
|
return encode(sequence, pair ?? null);
|
|
}
|
|
|
|
/**
|
|
* Encode the given sequences or pair of sequences
|
|
*
|
|
* @param {((string | [string, string])[])} sequences A list of sequences or pair of sequences.
|
|
* The list can contain both at the same time.
|
|
*/
|
|
async encodeBatch(sequences: (string | [string, string])[]): Promise<Encoding[]> {
|
|
const encodeBatch = promisify(this.tokenizer.encodeBatch);
|
|
return encodeBatch(sequences);
|
|
}
|
|
}
|