Doc - Update Normalizer part of the Pipeline page

This commit is contained in:
Anthony MOI
2020-10-27 18:45:16 -04:00
committed by Anthony MOI
parent ab7bae466a
commit 13a80050f0
6 changed files with 207 additions and 21 deletions

View File

@@ -0,0 +1,30 @@
const globRequire = require;
describe("pipelineExample", () => {
// This is a hack to let us require using path similar to what the user has to use
function require(mod: string) {
let path = mod.slice("tokenizers/".length);
return globRequire("../../lib/" + path);
}
it("", async () => {
// START reload_tokenizer
const { Tokenizer } = require("tokenizers/bindings/tokenizer");
const tokenizer = Tokenizer.fromFile("data/tokenizer-wiki.json");
// END reload_tokenizer
// START setup_normalizer
const { sequenceNormalizer, nfdNormalizer, stripAccentsNormalizer } = require("tokenizers/bindings/normalizers");
const normalizer = sequenceNormalizer([nfdNormalizer(), stripAccentsNormalizer()]);
// END setup_normalizer
// START test_normalizer
let normalized = normalizer.normalizeStr("Héllò hôw are ü?")
// "Hello how are u?"
// END test_normalizer
expect(normalized).toEqual("Hello how are u?");
// START replace_normalizer
tokenizer.setNormalizer(normalizer)
// END replace_normalizer
});
});