mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-07 21:28:19 +00:00
Doc - Update Normalizer part of the Pipeline page
This commit is contained in:
30
bindings/node/examples/documentation/pipeline.test.ts
Normal file
30
bindings/node/examples/documentation/pipeline.test.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
const globRequire = require;
|
||||
|
||||
describe("pipelineExample", () => {
|
||||
// This is a hack to let us require using path similar to what the user has to use
|
||||
function require(mod: string) {
|
||||
let path = mod.slice("tokenizers/".length);
|
||||
return globRequire("../../lib/" + path);
|
||||
}
|
||||
|
||||
it("", async () => {
|
||||
// START reload_tokenizer
|
||||
const { Tokenizer } = require("tokenizers/bindings/tokenizer");
|
||||
|
||||
const tokenizer = Tokenizer.fromFile("data/tokenizer-wiki.json");
|
||||
// END reload_tokenizer
|
||||
// START setup_normalizer
|
||||
const { sequenceNormalizer, nfdNormalizer, stripAccentsNormalizer } = require("tokenizers/bindings/normalizers");
|
||||
|
||||
const normalizer = sequenceNormalizer([nfdNormalizer(), stripAccentsNormalizer()]);
|
||||
// END setup_normalizer
|
||||
// START test_normalizer
|
||||
let normalized = normalizer.normalizeStr("Héllò hôw are ü?")
|
||||
// "Hello how are u?"
|
||||
// END test_normalizer
|
||||
expect(normalized).toEqual("Hello how are u?");
|
||||
// START replace_normalizer
|
||||
tokenizer.setNormalizer(normalizer)
|
||||
// END replace_normalizer
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user