mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
28 lines
849 B
JavaScript
28 lines
849 B
JavaScript
/*eslint-disable no-undef*/
|
|
const tokenizers = require("..");
|
|
const { promisify } = require("util");
|
|
|
|
describe("loadExample", () => {
|
|
beforeAll(async () => {});
|
|
it("", async () => {
|
|
const example = "This is an example";
|
|
const ids = [713, 16, 41, 1246];
|
|
const tokens = ["This", "Ġis", "Ġan", "Ġexample"];
|
|
|
|
// START load_tokenizer
|
|
const tokenizer = tokenizers.Tokenizer.fromFile("data/roberta.json");
|
|
// END load_tokenizer
|
|
|
|
// You could also use regular callbacks
|
|
const encode = promisify(tokenizer.encode.bind(tokenizer));
|
|
const decode = promisify(tokenizer.decode.bind(tokenizer));
|
|
|
|
const encoded = await encode(example);
|
|
expect(encoded.getIds()).toEqual(ids);
|
|
expect(encoded.getTokens()).toEqual(tokens);
|
|
|
|
const decoded = await decode(ids);
|
|
expect(decoded).toEqual(example);
|
|
});
|
|
});
|