mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Creating normalizers.Prepend
(To be used instead of Metaspace
). (#1194)
* Creating `normalizers.Prepend` (To be used instead of `Metaspace`). * Linting + stub. * Fixing pickling/unpickling by setting a default. * Black.
This commit is contained in:
6
bindings/node/lib/bindings/normalizers.d.ts
vendored
6
bindings/node/lib/bindings/normalizers.d.ts
vendored
@ -78,6 +78,12 @@ export function lowercaseNormalizer(): Normalizer;
|
||||
*/
|
||||
export function stripNormalizer(left?: boolean, right?: boolean): Normalizer;
|
||||
|
||||
/**
|
||||
* Returns a new Prepend Normalizer
|
||||
* @param [prepend] The string to prepend
|
||||
*/
|
||||
export function prependNormalizer(prepend: string): Normalizer;
|
||||
|
||||
/**
|
||||
* Returns a new StripAccents Normalizer
|
||||
*/
|
||||
|
@ -9,6 +9,7 @@ module.exports = {
|
||||
sequenceNormalizer: native.normalizers_Sequence,
|
||||
lowercaseNormalizer: native.normalizers_Lowercase,
|
||||
stripNormalizer: native.normalizers_Strip,
|
||||
prependNormalizer: native.normalizers_Prepend,
|
||||
stripAccentsNormalizer: native.normalizers_StripAccents,
|
||||
nmtNormalizer: native.normalizers_Nmt,
|
||||
precompiledNormalizer: native.normalizers_Precompiled,
|
||||
|
@ -1,4 +1,8 @@
|
||||
import { stripAccentsNormalizer, stripNormalizer } from "./normalizers";
|
||||
import {
|
||||
prependNormalizer,
|
||||
stripAccentsNormalizer,
|
||||
stripNormalizer,
|
||||
} from "./normalizers";
|
||||
|
||||
describe("stripNormalizer", () => {
|
||||
it("instantiates with no parameters", () => {
|
||||
@ -24,6 +28,12 @@ describe("stripNormalizer", () => {
|
||||
expect(normalizer.constructor.name).toEqual("Normalizer");
|
||||
});
|
||||
|
||||
it("prepend instantiates with one parameter", () => {
|
||||
const normalizer = prependNormalizer("_");
|
||||
expect(normalizer.constructor.name).toEqual("Normalizer");
|
||||
expect(normalizer.normalizeString("Hello")).toEqual("_Hello");
|
||||
});
|
||||
|
||||
it("can normalize strings", () => {
|
||||
const normalizer = stripNormalizer();
|
||||
expect(normalizer.normalizeString(" Hello there ")).toEqual("Hello there");
|
||||
|
@ -175,6 +175,18 @@ fn strip(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
|
||||
|
||||
Ok(normalizer)
|
||||
}
|
||||
|
||||
/// prepend(prepend: string)
|
||||
fn prepend(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
|
||||
let prepend: String = cx.extract::<String>(0)?;
|
||||
|
||||
let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
|
||||
let guard = cx.lock();
|
||||
normalizer.borrow_mut(&guard).normalizer =
|
||||
Some(tk::normalizers::prepend::Prepend::new(prepend).into());
|
||||
|
||||
Ok(normalizer)
|
||||
}
|
||||
/// strip_accents()
|
||||
fn strip_accents(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
|
||||
let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
|
||||
@ -267,6 +279,7 @@ pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
|
||||
m.export_function(&format!("{}_Sequence", prefix), sequence)?;
|
||||
m.export_function(&format!("{}_Lowercase", prefix), lowercase)?;
|
||||
m.export_function(&format!("{}_Strip", prefix), strip)?;
|
||||
m.export_function(&format!("{}_Prepend", prefix), prepend)?;
|
||||
m.export_function(&format!("{}_StripAccents", prefix), strip_accents)?;
|
||||
m.export_function(&format!("{}_Nmt", prefix), nmt)?;
|
||||
m.export_function(&format!("{}_Precompiled", prefix), precompiled)?;
|
||||
|
Reference in New Issue
Block a user