diff --git a/bindings/node/lib/bindings/decoders.js b/bindings/node/lib/bindings/decoders.js index 66482638..b7eed529 100644 --- a/bindings/node/lib/bindings/decoders.js +++ b/bindings/node/lib/bindings/decoders.js @@ -4,5 +4,5 @@ module.exports = { byteLevelDecoder: native.decoders_ByteLevel, wordPieceDecoder: native.decoders_WordPiece, metaspaceDecoder: native.decoders_Metaspace, - bpeDecoder: native.decoders_BPEDecoder + bpeDecoder: native.decoders_BPEDecoder, }; diff --git a/bindings/node/lib/bindings/enums.ts b/bindings/node/lib/bindings/enums.ts index 3ff95caf..38c71eed 100644 --- a/bindings/node/lib/bindings/enums.ts +++ b/bindings/node/lib/bindings/enums.ts @@ -1,10 +1,10 @@ export enum TruncationStrategy { LongestFirst = "longest_first", OnlyFirst = "only_first", - OnlySecond = "only_second" + OnlySecond = "only_second", } export enum PaddingDirection { Left = "left", - Right = "right" + Right = "right", } diff --git a/bindings/node/lib/bindings/models.js b/bindings/node/lib/bindings/models.js index 87177db5..8a8b0dd5 100644 --- a/bindings/node/lib/bindings/models.js +++ b/bindings/node/lib/bindings/models.js @@ -3,10 +3,10 @@ const native = require("./native"); module.exports = { BPE: { fromFiles: native.models_BPE_from_files, - empty: native.models_BPE_empty + empty: native.models_BPE_empty, }, WordPiece: { fromFiles: native.models_WordPiece_from_files, - empty: native.models_WordPiece_empty - } + empty: native.models_WordPiece_empty, + }, }; diff --git a/bindings/node/lib/bindings/models.test.ts b/bindings/node/lib/bindings/models.test.ts index 4adf2b7e..e5b1a95d 100644 --- a/bindings/node/lib/bindings/models.test.ts +++ b/bindings/node/lib/bindings/models.test.ts @@ -23,7 +23,7 @@ describe("WordPiece", () => { }); it("has its callback called with the loaded model", () => { - return new Promise(done => { + return new Promise((done) => { WordPiece.fromFiles(`${MOCKS_DIR}/vocab.txt`, (err, model) => { expect(model).toBeDefined(); done(); @@ -40,7 +40,7 @@ describe("WordPiece", () => { }); it("has its callback called with the loaded model", () => { - return new Promise(done => { + return new Promise((done) => { WordPiece.fromFiles(`${MOCKS_DIR}/vocab.txt`, {}, (err, model) => { expect(model).toBeDefined(); done(); @@ -72,7 +72,7 @@ describe("BPE", () => { }); it("has its callback called with the loaded model", () => { - return new Promise(done => { + return new Promise((done) => { BPE.fromFiles( `${MOCKS_DIR}/vocab.json`, `${MOCKS_DIR}/merges.txt`, @@ -93,7 +93,7 @@ describe("BPE", () => { }); it("has its callback called with the loaded model", () => { - return new Promise(done => { + return new Promise((done) => { BPE.fromFiles( `${MOCKS_DIR}/vocab.json`, `${MOCKS_DIR}/merges.txt`, diff --git a/bindings/node/lib/bindings/normalizers.js b/bindings/node/lib/bindings/normalizers.js index de357e23..6ab0831b 100644 --- a/bindings/node/lib/bindings/normalizers.js +++ b/bindings/node/lib/bindings/normalizers.js @@ -8,5 +8,5 @@ module.exports = { nfkcNormalizer: native.normalizers_NFKC, nfkdNormalizer: native.normalizers_NFKD, sequenceNormalizer: native.normalizers_Sequence, - stripNormalizer: native.normalizers_Strip + stripNormalizer: native.normalizers_Strip, }; diff --git a/bindings/node/lib/bindings/post-processors.js b/bindings/node/lib/bindings/post-processors.js index 42fa3728..717c5c9e 100644 --- a/bindings/node/lib/bindings/post-processors.js +++ b/bindings/node/lib/bindings/post-processors.js @@ -3,5 +3,5 @@ const native = require("./native"); module.exports = { bertProcessing: native.processors_BertProcessing, byteLevelProcessing: native.processors_ByteLevel, - robertaProcessing: native.processors_RobertaProcessing + robertaProcessing: native.processors_RobertaProcessing, }; diff --git a/bindings/node/lib/bindings/post-processors.test.ts b/bindings/node/lib/bindings/post-processors.test.ts index 7099d252..8eaf73d8 100644 --- a/bindings/node/lib/bindings/post-processors.test.ts +++ b/bindings/node/lib/bindings/post-processors.test.ts @@ -3,7 +3,7 @@ import { bertProcessing, byteLevelProcessing, - robertaProcessing + robertaProcessing, } from "./post-processors"; describe("bertProcessing", () => { diff --git a/bindings/node/lib/bindings/pre-tokenizers.js b/bindings/node/lib/bindings/pre-tokenizers.js index a4026733..b231c1f4 100644 --- a/bindings/node/lib/bindings/pre-tokenizers.js +++ b/bindings/node/lib/bindings/pre-tokenizers.js @@ -7,5 +7,5 @@ module.exports = { whitespaceSplitPreTokenizer: native.pre_tokenizers_WhitespaceSplit, bertPreTokenizer: native.pre_tokenizers_BertPreTokenizer, metaspacePreTokenizer: native.pre_tokenizers_Metaspace, - charDelimiterSplitPreTokenizer: native.pre_tokenizers_CharDelimiterSplit + charDelimiterSplitPreTokenizer: native.pre_tokenizers_CharDelimiterSplit, }; diff --git a/bindings/node/lib/bindings/raw-encoding.test.ts b/bindings/node/lib/bindings/raw-encoding.test.ts index 580b66f5..0db2c624 100644 --- a/bindings/node/lib/bindings/raw-encoding.test.ts +++ b/bindings/node/lib/bindings/raw-encoding.test.ts @@ -21,7 +21,7 @@ describe("RawEncoding", () => { const model = await promisify(WordPiece.fromFiles)( `${MOCKS_DIR}/vocab.txt`, { - continuingSubwordPrefix: "##" + continuingSubwordPrefix: "##", } ); @@ -155,7 +155,7 @@ describe("RawEncoding", () => { direction: PaddingDirection.Left, padToken: "[PA]", padTypeId: 10, - padId: 400 + padId: 400, }); const tokens = encoding.getTokens(); diff --git a/bindings/node/lib/bindings/tokenizer.js b/bindings/node/lib/bindings/tokenizer.js index e2340eb5..2477366f 100644 --- a/bindings/node/lib/bindings/tokenizer.js +++ b/bindings/node/lib/bindings/tokenizer.js @@ -7,5 +7,5 @@ class Tokenizer extends native.tokenizer_Tokenizer { module.exports = { AddedToken: native.tokenizer_AddedToken, - Tokenizer + Tokenizer, }; diff --git a/bindings/node/lib/bindings/tokenizer.test.ts b/bindings/node/lib/bindings/tokenizer.test.ts index 7f45c870..4333f07a 100644 --- a/bindings/node/lib/bindings/tokenizer.test.ts +++ b/bindings/node/lib/bindings/tokenizer.test.ts @@ -14,7 +14,7 @@ import { InputSequence, PaddingConfiguration, Tokenizer, - TruncationConfiguration + TruncationConfiguration, } from "./tokenizer"; // jest.mock('../bindings/tokenizer'); @@ -45,7 +45,7 @@ describe("AddedToken", () => { const addToken = new AddedToken("test", false, { leftStrip: true, rightStrip: true, - singleWord: true + singleWord: true, }); expect(addToken.constructor.name).toEqual("AddedToken"); }); @@ -156,7 +156,7 @@ describe("Tokenizer", () => { it("accepts a pre-tokenized string as parameter", async () => { const encoding = await encode(["my", "name", "is", "john"], undefined, { - isPretokenized: true + isPretokenized: true, }); expect(encoding).toBeDefined(); }); @@ -170,7 +170,7 @@ describe("Tokenizer", () => { it("accepts a pre-tokenized input in encodeBatch", async () => { const encoding = await encodeBatch([["my", "name", "is", "john"]], { - isPretokenized: true + isPretokenized: true, }); expect(encoding).toBeDefined(); }); @@ -198,7 +198,7 @@ describe("Tokenizer", () => { [3, 7], [8, 10], [11, 15], - [0, 4] + [0, 4], ]); expect(encoding.getOverflowing()).toEqual([]); expect(encoding.getSpecialTokensMask()).toEqual([0, 0, 0, 0, 0]); @@ -243,7 +243,7 @@ describe("Tokenizer", () => { "name", "[PAD]", "[PAD]", - "[PAD]" + "[PAD]", ]); const pairEncoding = await encode("my name", "pair"); @@ -252,7 +252,7 @@ describe("Tokenizer", () => { "name", "pair", "[PAD]", - "[PAD]" + "[PAD]", ]); }); @@ -304,7 +304,7 @@ describe("Tokenizer", () => { const decodeBatch = promisify(tokenizer.decodeBatch.bind(tokenizer)); await expect(decodeBatch([[0, 1, 2, 3], [4]], true)).resolves.toEqual([ "my name is john", - "pair" + "pair", ]); }); }); @@ -326,7 +326,7 @@ describe("Tokenizer", () => { my: 0, name: 1, is: 2, - john: 3 + john: 3, }); }); }); @@ -349,7 +349,7 @@ describe("Tokenizer", () => { const expectedConfig: TruncationConfiguration = { maxLength: 2, strategy: TruncationStrategy.LongestFirst, - stride: 0 + stride: 0, }; expect(truncation).toEqual(expectedConfig); }); @@ -365,7 +365,7 @@ describe("Tokenizer", () => { direction: PaddingDirection.Right, padId: 0, padToken: "[PAD]", - padTypeId: 0 + padTypeId: 0, }; expect(padding).toEqual(expectedConfig); }); diff --git a/bindings/node/lib/bindings/trainers.js b/bindings/node/lib/bindings/trainers.js index 866aac98..20f72c9b 100644 --- a/bindings/node/lib/bindings/trainers.js +++ b/bindings/node/lib/bindings/trainers.js @@ -2,5 +2,5 @@ const native = require("./native"); module.exports = { bpeTrainer: native.trainers_BPETrainer, - wordPieceTrainer: native.trainers_WordPieceTrainer + wordPieceTrainer: native.trainers_WordPieceTrainer, }; diff --git a/bindings/node/lib/bindings/utils.js b/bindings/node/lib/bindings/utils.js index 599daa1a..4159365f 100644 --- a/bindings/node/lib/bindings/utils.js +++ b/bindings/node/lib/bindings/utils.js @@ -2,5 +2,5 @@ const native = require("./native"); module.exports = { mergeEncodings: native.utils_mergeEncodings, - slice: native.utils_slice + slice: native.utils_slice, }; diff --git a/bindings/node/lib/bindings/utils.test.ts b/bindings/node/lib/bindings/utils.test.ts index 83d6aa01..5f8f1bbe 100644 --- a/bindings/node/lib/bindings/utils.test.ts +++ b/bindings/node/lib/bindings/utils.test.ts @@ -141,7 +141,7 @@ describe("mergeEncodings", () => { [0, 2], [3, 7], [8, 10], - [0, 4] + [0, 4], ]); }); @@ -155,7 +155,7 @@ describe("mergeEncodings", () => { [0, 2], [3, 7], [8, 10], - [0, 4] + [0, 4], ]); }); @@ -169,7 +169,7 @@ describe("mergeEncodings", () => { [0, 2], [3, 7], [8, 10], - [10, 14] + [10, 14], ]); }); }); diff --git a/bindings/node/lib/implementations/encoding.test.ts b/bindings/node/lib/implementations/encoding.test.ts index aa3873cd..1215e6fe 100644 --- a/bindings/node/lib/implementations/encoding.test.ts +++ b/bindings/node/lib/implementations/encoding.test.ts @@ -10,7 +10,7 @@ describe("Encoding", () => { describe("ids", () => { const getIdsMock = jest.fn(() => [3]); const m = rawEncodingMock.mockImplementation(() => ({ - getIds: getIdsMock + getIds: getIdsMock, })); encoding = new Encoding(m() as RawEncoding); @@ -36,7 +36,7 @@ describe("Encoding", () => { const getIdsMock = jest.fn(() => [4]); const m = rawEncodingMock.mockImplementation(() => ({ getIds: getIdsMock, - pad: jest.fn() + pad: jest.fn(), })); encoding = new Encoding(m() as RawEncoding); @@ -55,7 +55,7 @@ describe("Encoding", () => { const getIdsMock = jest.fn(() => [4]); const m = rawEncodingMock.mockImplementation(() => ({ getIds: getIdsMock, - truncate: jest.fn() + truncate: jest.fn(), })); encoding = new Encoding(m() as RawEncoding); diff --git a/bindings/node/lib/implementations/encoding.ts b/bindings/node/lib/implementations/encoding.ts index 7c561e1a..4a80ddb6 100644 --- a/bindings/node/lib/implementations/encoding.ts +++ b/bindings/node/lib/implementations/encoding.ts @@ -21,7 +21,7 @@ export class Encoding { */ static merge(encodings: Encoding[], growingOffsets?: boolean): Encoding { const mergedRaw = mergeEncodings( - encodings.map(e => e.rawEncoding), + encodings.map((e) => e.rawEncoding), growingOffsets ); @@ -82,7 +82,7 @@ export class Encoding { return (this._overflowing = this._rawEncoding .getOverflowing() - .map(e => new Encoding(e))); + .map((e) => new Encoding(e))); } /** @@ -218,7 +218,7 @@ export class Encoding { "_specialTokensMask", "_tokens", "_typeIds", - "_wordIndexes" + "_wordIndexes", ]) { delete this[prop as keyof this]; } diff --git a/bindings/node/lib/implementations/tokenizers/base.tokenizer.test.ts b/bindings/node/lib/implementations/tokenizers/base.tokenizer.test.ts index 045b739b..a24539aa 100644 --- a/bindings/node/lib/implementations/tokenizers/base.tokenizer.test.ts +++ b/bindings/node/lib/implementations/tokenizers/base.tokenizer.test.ts @@ -3,12 +3,12 @@ import { BPE } from "../../bindings/models"; import { PaddingConfiguration, Tokenizer, - TruncationConfiguration + TruncationConfiguration, } from "../../bindings/tokenizer"; import { BaseTokenizer } from "./base.tokenizer"; describe("BaseTokenizer", () => { - let tokenizer: BaseTokenizer<{}>; + let tokenizer: BaseTokenizer>; beforeEach(() => { // Clear all instances and calls to constructor and all methods: @@ -29,7 +29,7 @@ describe("BaseTokenizer", () => { const expectedConfig: TruncationConfiguration = { maxLength: 2, strategy: TruncationStrategy.LongestFirst, - stride: 0 + stride: 0, }; expect(tokenizer.truncation).toEqual(expectedConfig); }); @@ -52,7 +52,7 @@ describe("BaseTokenizer", () => { direction: PaddingDirection.Right, padId: 0, padToken: "[PAD]", - padTypeId: 0 + padTypeId: 0, }; expect(tokenizer.padding).toEqual(expectedConfig); }); diff --git a/bindings/node/lib/implementations/tokenizers/base.tokenizer.ts b/bindings/node/lib/implementations/tokenizers/base.tokenizer.ts index 97159920..d668883f 100644 --- a/bindings/node/lib/implementations/tokenizers/base.tokenizer.ts +++ b/bindings/node/lib/implementations/tokenizers/base.tokenizer.ts @@ -10,12 +10,13 @@ import { PaddingOptions, Tokenizer, TruncationConfiguration, - TruncationOptions + TruncationOptions, } from "../../bindings/tokenizer"; import { Encoding } from "../encoding"; export type Token = string | AddedToken; +// eslint-disable-next-line @typescript-eslint/ban-types export class BaseTokenizer { private _truncation?: TruncationConfiguration; private _padding?: PaddingConfiguration; @@ -114,7 +115,7 @@ export class BaseTokenizer { ): Promise { const encodeBatch = promisify(this.tokenizer.encodeBatch.bind(this.tokenizer)); const rawEncodings = await encodeBatch(sequences, options); - return rawEncodings.map(e => new Encoding(e)); + return rawEncodings.map((e) => new Encoding(e)); } /** diff --git a/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.test.ts b/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.test.ts index 6dbd3adf..a129d3d1 100644 --- a/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.test.ts +++ b/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.test.ts @@ -14,7 +14,7 @@ describe("BertWordPieceTokenizer", () => { describe("when a vocabFile is provided and `addSpecialTokens === true`", () => { it("throws a `sepToken error` if no `sepToken` is provided", async () => { const options: BertWordPieceOptions = { - vocabFile: MOCKS_DIR + "/bert-vocab-empty.txt" + vocabFile: MOCKS_DIR + "/bert-vocab-empty.txt", }; await expect(BertWordPieceTokenizer.fromOptions(options)).rejects.toThrow( @@ -24,7 +24,7 @@ describe("BertWordPieceTokenizer", () => { it("throws a `clsToken error` if no `clsToken` is provided", async () => { const options: BertWordPieceOptions = { - vocabFile: MOCKS_DIR + "/bert-vocab-without-cls.txt" + vocabFile: MOCKS_DIR + "/bert-vocab-without-cls.txt", }; await expect(BertWordPieceTokenizer.fromOptions(options)).rejects.toThrow( diff --git a/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.ts b/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.ts index 8edf5e5c..f4660578 100644 --- a/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.ts +++ b/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.ts @@ -104,7 +104,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer { sepToken: "[SEP]", stripAccents: true, unkToken: "[UNK]", - wordpiecesPrefix: "##" + wordpiecesPrefix: "##", }; private readonly defaultTrainOptions: Required = { @@ -114,7 +114,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer { showProgress: true, specialTokens: ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"], vocabSize: 30000, - wordpiecesPrefix: "##" + wordpiecesPrefix: "##", }; private constructor(tokenizer: Tokenizer, configuration: BertTokenizerConfig) { @@ -135,7 +135,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer { const fromFiles = promisify(WordPiece.fromFiles); model = await fromFiles(opts.vocabFile, { unkToken: getTokenContent(opts.unkToken), - continuingSubwordPrefix: opts.wordpiecesPrefix + continuingSubwordPrefix: opts.wordpiecesPrefix, }); } else { model = WordPiece.empty(); @@ -148,7 +148,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer { opts.sepToken, opts.unkToken, opts.padToken, - opts.maskToken + opts.maskToken, ]) { if (tokenizer.tokenToId(getTokenContent(token)) !== undefined) { tokenizer.addSpecialTokens([token]); diff --git a/bindings/node/lib/implementations/tokenizers/bpe.tokenizer.ts b/bindings/node/lib/implementations/tokenizers/bpe.tokenizer.ts index 61d503a5..782aede4 100644 --- a/bindings/node/lib/implementations/tokenizers/bpe.tokenizer.ts +++ b/bindings/node/lib/implementations/tokenizers/bpe.tokenizer.ts @@ -5,7 +5,7 @@ import { BPE, BPEOptions, Model } from "../../bindings/models"; import { lowercaseNormalizer, nfkcNormalizer, - sequenceNormalizer + sequenceNormalizer, } from "../../bindings/normalizers"; import { whitespaceSplitPreTokenizer } from "../../bindings/pre-tokenizers"; import { Tokenizer } from "../../bindings/tokenizer"; @@ -75,7 +75,7 @@ type BPETokenizerConfig = BPETokenizerOptions & export class BPETokenizer extends BaseTokenizer { private static readonly defaultBPEOptions: BPETokenizerConfig = { suffix: "", - unkToken: "" + unkToken: "", }; private readonly defaultTrainOptions: Required = { @@ -85,7 +85,7 @@ export class BPETokenizer extends BaseTokenizer { showProgress: true, specialTokens: [""], suffix: "", - vocabSize: 30000 + vocabSize: 30000, }; private constructor(tokenizer: Tokenizer, configuration: BPETokenizerConfig) { @@ -105,7 +105,7 @@ export class BPETokenizer extends BaseTokenizer { const modelOptions: BPEOptions = { dropout: opts.dropout, endOfWordSuffix: opts.suffix, - unkToken: unkToken + unkToken: unkToken, }; const fromFiles = promisify(BPE.fromFiles); diff --git a/bindings/node/lib/implementations/tokenizers/byte-level-bpe.tokenizer.ts b/bindings/node/lib/implementations/tokenizers/byte-level-bpe.tokenizer.ts index 17a8c908..38f4432a 100644 --- a/bindings/node/lib/implementations/tokenizers/byte-level-bpe.tokenizer.ts +++ b/bindings/node/lib/implementations/tokenizers/byte-level-bpe.tokenizer.ts @@ -5,7 +5,7 @@ import { BPE, BPEOptions, Model } from "../../bindings/models"; import { lowercaseNormalizer, nfkcNormalizer, - sequenceNormalizer + sequenceNormalizer, } from "../../bindings/normalizers"; import { byteLevelProcessing } from "../../bindings/post-processors"; import { byteLevelAlphabet, byteLevelPreTokenizer } from "../../bindings/pre-tokenizers"; @@ -72,14 +72,14 @@ type ByteLevelBPETokenizerConfig = ByteLevelBPETokenizerOptions & export class ByteLevelBPETokenizer extends BaseTokenizer { private static readonly defaultOptions: ByteLevelBPETokenizerConfig = { addPrefixSpace: false, - trimOffsets: false + trimOffsets: false, }; private readonly defaultTrainOptions: Required = { minFrequency: 2, showProgress: true, specialTokens: [""], - vocabSize: 30000 + vocabSize: 30000, }; private constructor(tokenizer: Tokenizer, configuration: ByteLevelBPETokenizerConfig) { @@ -127,7 +127,7 @@ export class ByteLevelBPETokenizer extends BaseTokenizer" + unkToken: "", }; private readonly defaultTrainOptions: Required = { @@ -77,7 +77,7 @@ export class SentencePieceBPETokenizer extends BaseTokenizer< minFrequency: 2, showProgress: true, specialTokens: [""], - vocabSize: 30000 + vocabSize: 30000, }; private constructor( @@ -97,7 +97,7 @@ export class SentencePieceBPETokenizer extends BaseTokenizer< if (opts.vocabFile && opts.mergesFile) { const modelOptions: BPEOptions = { dropout: opts.dropout, - unkToken: unkToken + unkToken: unkToken, }; const fromFiles = promisify(BPE.fromFiles); diff --git a/bindings/node/lib/index.ts b/bindings/node/lib/index.ts index 98289101..847f0a96 100644 --- a/bindings/node/lib/index.ts +++ b/bindings/node/lib/index.ts @@ -11,6 +11,6 @@ export { EncodeInput, EncodeOptions, TruncationConfiguration, - TruncationOptions + TruncationOptions, } from "./bindings/tokenizer"; export { Encoding } from "./implementations/encoding";