mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-03 11:18:29 +00:00
Node - Fix encoding pad
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import { promisify } from "util";
|
||||
|
||||
import { PaddingDirection } from "./enums";
|
||||
import { Model, WordPiece, WordPieceOptions } from "./models";
|
||||
import { whitespacePreTokenizer } from "./pre-tokenizers";
|
||||
import { RawEncoding } from "./raw-encoding";
|
||||
@@ -137,4 +138,31 @@ describe("RawEncoding", () => {
|
||||
expect(index).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("pad", () => {
|
||||
it("works correctly with only one parameter", () => {
|
||||
encoding.pad(10);
|
||||
expect(encoding.getTokens()).toHaveLength(10);
|
||||
});
|
||||
|
||||
it("accepts `undefined` as second parameter", () => {
|
||||
encoding.pad(10, undefined);
|
||||
expect(encoding.getTokens()).toHaveLength(10);
|
||||
});
|
||||
|
||||
it("accepts options as second parameter", () => {
|
||||
encoding.pad(10, {
|
||||
direction: PaddingDirection.Left,
|
||||
padToken: "[PA]",
|
||||
padTypeId: 10,
|
||||
padId: 400
|
||||
});
|
||||
|
||||
const tokens = encoding.getTokens();
|
||||
expect(tokens).toHaveLength(10);
|
||||
expect(tokens[0]).toBe("[PA]");
|
||||
expect(encoding.getTypeIds()[0]).toBe(10);
|
||||
expect(encoding.getIds()[0]).toBe(400);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -257,7 +257,7 @@ declare_types! {
|
||||
// padToken?: string = "[PAD]"
|
||||
// }
|
||||
let length = cx.extract::<usize>(0)?;
|
||||
let params = cx.extract_opt::<PaddingParams>(0)?
|
||||
let params = cx.extract_opt::<PaddingParams>(1)?
|
||||
.map_or_else(tk::PaddingParams::default, |p| p.0);
|
||||
|
||||
let mut this = cx.this();
|
||||
|
||||
Reference in New Issue
Block a user