Node - Fix new linting errors

This commit is contained in:
Pierric Cistac
2020-07-24 15:44:34 -04:00
parent 85800e4189
commit e9a2e63a67
24 changed files with 64 additions and 63 deletions

View File

@ -4,5 +4,5 @@ module.exports = {
byteLevelDecoder: native.decoders_ByteLevel,
wordPieceDecoder: native.decoders_WordPiece,
metaspaceDecoder: native.decoders_Metaspace,
bpeDecoder: native.decoders_BPEDecoder
bpeDecoder: native.decoders_BPEDecoder,
};

View File

@ -1,10 +1,10 @@
export enum TruncationStrategy {
LongestFirst = "longest_first",
OnlyFirst = "only_first",
OnlySecond = "only_second"
OnlySecond = "only_second",
}
export enum PaddingDirection {
Left = "left",
Right = "right"
Right = "right",
}

View File

@ -3,10 +3,10 @@ const native = require("./native");
module.exports = {
BPE: {
fromFiles: native.models_BPE_from_files,
empty: native.models_BPE_empty
empty: native.models_BPE_empty,
},
WordPiece: {
fromFiles: native.models_WordPiece_from_files,
empty: native.models_WordPiece_empty
}
empty: native.models_WordPiece_empty,
},
};

View File

@ -23,7 +23,7 @@ describe("WordPiece", () => {
});
it("has its callback called with the loaded model", () => {
return new Promise(done => {
return new Promise((done) => {
WordPiece.fromFiles(`${MOCKS_DIR}/vocab.txt`, (err, model) => {
expect(model).toBeDefined();
done();
@ -40,7 +40,7 @@ describe("WordPiece", () => {
});
it("has its callback called with the loaded model", () => {
return new Promise(done => {
return new Promise((done) => {
WordPiece.fromFiles(`${MOCKS_DIR}/vocab.txt`, {}, (err, model) => {
expect(model).toBeDefined();
done();
@ -72,7 +72,7 @@ describe("BPE", () => {
});
it("has its callback called with the loaded model", () => {
return new Promise(done => {
return new Promise((done) => {
BPE.fromFiles(
`${MOCKS_DIR}/vocab.json`,
`${MOCKS_DIR}/merges.txt`,
@ -93,7 +93,7 @@ describe("BPE", () => {
});
it("has its callback called with the loaded model", () => {
return new Promise(done => {
return new Promise((done) => {
BPE.fromFiles(
`${MOCKS_DIR}/vocab.json`,
`${MOCKS_DIR}/merges.txt`,

View File

@ -8,5 +8,5 @@ module.exports = {
nfkcNormalizer: native.normalizers_NFKC,
nfkdNormalizer: native.normalizers_NFKD,
sequenceNormalizer: native.normalizers_Sequence,
stripNormalizer: native.normalizers_Strip
stripNormalizer: native.normalizers_Strip,
};

View File

@ -3,5 +3,5 @@ const native = require("./native");
module.exports = {
bertProcessing: native.processors_BertProcessing,
byteLevelProcessing: native.processors_ByteLevel,
robertaProcessing: native.processors_RobertaProcessing
robertaProcessing: native.processors_RobertaProcessing,
};

View File

@ -3,7 +3,7 @@
import {
bertProcessing,
byteLevelProcessing,
robertaProcessing
robertaProcessing,
} from "./post-processors";
describe("bertProcessing", () => {

View File

@ -7,5 +7,5 @@ module.exports = {
whitespaceSplitPreTokenizer: native.pre_tokenizers_WhitespaceSplit,
bertPreTokenizer: native.pre_tokenizers_BertPreTokenizer,
metaspacePreTokenizer: native.pre_tokenizers_Metaspace,
charDelimiterSplitPreTokenizer: native.pre_tokenizers_CharDelimiterSplit
charDelimiterSplitPreTokenizer: native.pre_tokenizers_CharDelimiterSplit,
};

View File

@ -21,7 +21,7 @@ describe("RawEncoding", () => {
const model = await promisify<string, WordPieceOptions, Model>(WordPiece.fromFiles)(
`${MOCKS_DIR}/vocab.txt`,
{
continuingSubwordPrefix: "##"
continuingSubwordPrefix: "##",
}
);
@ -155,7 +155,7 @@ describe("RawEncoding", () => {
direction: PaddingDirection.Left,
padToken: "[PA]",
padTypeId: 10,
padId: 400
padId: 400,
});
const tokens = encoding.getTokens();

View File

@ -7,5 +7,5 @@ class Tokenizer extends native.tokenizer_Tokenizer {
module.exports = {
AddedToken: native.tokenizer_AddedToken,
Tokenizer
Tokenizer,
};

View File

@ -14,7 +14,7 @@ import {
InputSequence,
PaddingConfiguration,
Tokenizer,
TruncationConfiguration
TruncationConfiguration,
} from "./tokenizer";
// jest.mock('../bindings/tokenizer');
@ -45,7 +45,7 @@ describe("AddedToken", () => {
const addToken = new AddedToken("test", false, {
leftStrip: true,
rightStrip: true,
singleWord: true
singleWord: true,
});
expect(addToken.constructor.name).toEqual("AddedToken");
});
@ -156,7 +156,7 @@ describe("Tokenizer", () => {
it("accepts a pre-tokenized string as parameter", async () => {
const encoding = await encode(["my", "name", "is", "john"], undefined, {
isPretokenized: true
isPretokenized: true,
});
expect(encoding).toBeDefined();
});
@ -170,7 +170,7 @@ describe("Tokenizer", () => {
it("accepts a pre-tokenized input in encodeBatch", async () => {
const encoding = await encodeBatch([["my", "name", "is", "john"]], {
isPretokenized: true
isPretokenized: true,
});
expect(encoding).toBeDefined();
});
@ -198,7 +198,7 @@ describe("Tokenizer", () => {
[3, 7],
[8, 10],
[11, 15],
[0, 4]
[0, 4],
]);
expect(encoding.getOverflowing()).toEqual([]);
expect(encoding.getSpecialTokensMask()).toEqual([0, 0, 0, 0, 0]);
@ -243,7 +243,7 @@ describe("Tokenizer", () => {
"name",
"[PAD]",
"[PAD]",
"[PAD]"
"[PAD]",
]);
const pairEncoding = await encode("my name", "pair");
@ -252,7 +252,7 @@ describe("Tokenizer", () => {
"name",
"pair",
"[PAD]",
"[PAD]"
"[PAD]",
]);
});
@ -304,7 +304,7 @@ describe("Tokenizer", () => {
const decodeBatch = promisify(tokenizer.decodeBatch.bind(tokenizer));
await expect(decodeBatch([[0, 1, 2, 3], [4]], true)).resolves.toEqual([
"my name is john",
"pair"
"pair",
]);
});
});
@ -326,7 +326,7 @@ describe("Tokenizer", () => {
my: 0,
name: 1,
is: 2,
john: 3
john: 3,
});
});
});
@ -349,7 +349,7 @@ describe("Tokenizer", () => {
const expectedConfig: TruncationConfiguration = {
maxLength: 2,
strategy: TruncationStrategy.LongestFirst,
stride: 0
stride: 0,
};
expect(truncation).toEqual(expectedConfig);
});
@ -365,7 +365,7 @@ describe("Tokenizer", () => {
direction: PaddingDirection.Right,
padId: 0,
padToken: "[PAD]",
padTypeId: 0
padTypeId: 0,
};
expect(padding).toEqual(expectedConfig);
});

View File

@ -2,5 +2,5 @@ const native = require("./native");
module.exports = {
bpeTrainer: native.trainers_BPETrainer,
wordPieceTrainer: native.trainers_WordPieceTrainer
wordPieceTrainer: native.trainers_WordPieceTrainer,
};

View File

@ -2,5 +2,5 @@ const native = require("./native");
module.exports = {
mergeEncodings: native.utils_mergeEncodings,
slice: native.utils_slice
slice: native.utils_slice,
};

View File

@ -141,7 +141,7 @@ describe("mergeEncodings", () => {
[0, 2],
[3, 7],
[8, 10],
[0, 4]
[0, 4],
]);
});
@ -155,7 +155,7 @@ describe("mergeEncodings", () => {
[0, 2],
[3, 7],
[8, 10],
[0, 4]
[0, 4],
]);
});
@ -169,7 +169,7 @@ describe("mergeEncodings", () => {
[0, 2],
[3, 7],
[8, 10],
[10, 14]
[10, 14],
]);
});
});

View File

@ -10,7 +10,7 @@ describe("Encoding", () => {
describe("ids", () => {
const getIdsMock = jest.fn(() => [3]);
const m = rawEncodingMock.mockImplementation(() => ({
getIds: getIdsMock
getIds: getIdsMock,
}));
encoding = new Encoding(m() as RawEncoding);
@ -36,7 +36,7 @@ describe("Encoding", () => {
const getIdsMock = jest.fn(() => [4]);
const m = rawEncodingMock.mockImplementation(() => ({
getIds: getIdsMock,
pad: jest.fn()
pad: jest.fn(),
}));
encoding = new Encoding(m() as RawEncoding);
@ -55,7 +55,7 @@ describe("Encoding", () => {
const getIdsMock = jest.fn(() => [4]);
const m = rawEncodingMock.mockImplementation(() => ({
getIds: getIdsMock,
truncate: jest.fn()
truncate: jest.fn(),
}));
encoding = new Encoding(m() as RawEncoding);

View File

@ -21,7 +21,7 @@ export class Encoding {
*/
static merge(encodings: Encoding[], growingOffsets?: boolean): Encoding {
const mergedRaw = mergeEncodings(
encodings.map(e => e.rawEncoding),
encodings.map((e) => e.rawEncoding),
growingOffsets
);
@ -82,7 +82,7 @@ export class Encoding {
return (this._overflowing = this._rawEncoding
.getOverflowing()
.map(e => new Encoding(e)));
.map((e) => new Encoding(e)));
}
/**
@ -218,7 +218,7 @@ export class Encoding {
"_specialTokensMask",
"_tokens",
"_typeIds",
"_wordIndexes"
"_wordIndexes",
]) {
delete this[prop as keyof this];
}

View File

@ -3,12 +3,12 @@ import { BPE } from "../../bindings/models";
import {
PaddingConfiguration,
Tokenizer,
TruncationConfiguration
TruncationConfiguration,
} from "../../bindings/tokenizer";
import { BaseTokenizer } from "./base.tokenizer";
describe("BaseTokenizer", () => {
let tokenizer: BaseTokenizer<{}>;
let tokenizer: BaseTokenizer<Record<string, unknown>>;
beforeEach(() => {
// Clear all instances and calls to constructor and all methods:
@ -29,7 +29,7 @@ describe("BaseTokenizer", () => {
const expectedConfig: TruncationConfiguration = {
maxLength: 2,
strategy: TruncationStrategy.LongestFirst,
stride: 0
stride: 0,
};
expect(tokenizer.truncation).toEqual(expectedConfig);
});
@ -52,7 +52,7 @@ describe("BaseTokenizer", () => {
direction: PaddingDirection.Right,
padId: 0,
padToken: "[PAD]",
padTypeId: 0
padTypeId: 0,
};
expect(tokenizer.padding).toEqual(expectedConfig);
});

View File

@ -10,12 +10,13 @@ import {
PaddingOptions,
Tokenizer,
TruncationConfiguration,
TruncationOptions
TruncationOptions,
} from "../../bindings/tokenizer";
import { Encoding } from "../encoding";
export type Token = string | AddedToken;
// eslint-disable-next-line @typescript-eslint/ban-types
export class BaseTokenizer<TConfig extends object> {
private _truncation?: TruncationConfiguration;
private _padding?: PaddingConfiguration;
@ -114,7 +115,7 @@ export class BaseTokenizer<TConfig extends object> {
): Promise<Encoding[]> {
const encodeBatch = promisify(this.tokenizer.encodeBatch.bind(this.tokenizer));
const rawEncodings = await encodeBatch(sequences, options);
return rawEncodings.map(e => new Encoding(e));
return rawEncodings.map((e) => new Encoding(e));
}
/**

View File

@ -14,7 +14,7 @@ describe("BertWordPieceTokenizer", () => {
describe("when a vocabFile is provided and `addSpecialTokens === true`", () => {
it("throws a `sepToken error` if no `sepToken` is provided", async () => {
const options: BertWordPieceOptions = {
vocabFile: MOCKS_DIR + "/bert-vocab-empty.txt"
vocabFile: MOCKS_DIR + "/bert-vocab-empty.txt",
};
await expect(BertWordPieceTokenizer.fromOptions(options)).rejects.toThrow(
@ -24,7 +24,7 @@ describe("BertWordPieceTokenizer", () => {
it("throws a `clsToken error` if no `clsToken` is provided", async () => {
const options: BertWordPieceOptions = {
vocabFile: MOCKS_DIR + "/bert-vocab-without-cls.txt"
vocabFile: MOCKS_DIR + "/bert-vocab-without-cls.txt",
};
await expect(BertWordPieceTokenizer.fromOptions(options)).rejects.toThrow(

View File

@ -104,7 +104,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer<BertTokenizerConfig> {
sepToken: "[SEP]",
stripAccents: true,
unkToken: "[UNK]",
wordpiecesPrefix: "##"
wordpiecesPrefix: "##",
};
private readonly defaultTrainOptions: Required<BertWordPieceTrainOptions> = {
@ -114,7 +114,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer<BertTokenizerConfig> {
showProgress: true,
specialTokens: ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"],
vocabSize: 30000,
wordpiecesPrefix: "##"
wordpiecesPrefix: "##",
};
private constructor(tokenizer: Tokenizer, configuration: BertTokenizerConfig) {
@ -135,7 +135,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer<BertTokenizerConfig> {
const fromFiles = promisify<string, WordPieceOptions, Model>(WordPiece.fromFiles);
model = await fromFiles(opts.vocabFile, {
unkToken: getTokenContent(opts.unkToken),
continuingSubwordPrefix: opts.wordpiecesPrefix
continuingSubwordPrefix: opts.wordpiecesPrefix,
});
} else {
model = WordPiece.empty();
@ -148,7 +148,7 @@ export class BertWordPieceTokenizer extends BaseTokenizer<BertTokenizerConfig> {
opts.sepToken,
opts.unkToken,
opts.padToken,
opts.maskToken
opts.maskToken,
]) {
if (tokenizer.tokenToId(getTokenContent(token)) !== undefined) {
tokenizer.addSpecialTokens([token]);

View File

@ -5,7 +5,7 @@ import { BPE, BPEOptions, Model } from "../../bindings/models";
import {
lowercaseNormalizer,
nfkcNormalizer,
sequenceNormalizer
sequenceNormalizer,
} from "../../bindings/normalizers";
import { whitespaceSplitPreTokenizer } from "../../bindings/pre-tokenizers";
import { Tokenizer } from "../../bindings/tokenizer";
@ -75,7 +75,7 @@ type BPETokenizerConfig = BPETokenizerOptions &
export class BPETokenizer extends BaseTokenizer<BPETokenizerConfig> {
private static readonly defaultBPEOptions: BPETokenizerConfig = {
suffix: "</w>",
unkToken: "<unk>"
unkToken: "<unk>",
};
private readonly defaultTrainOptions: Required<BPETokenizerTrainOptions> = {
@ -85,7 +85,7 @@ export class BPETokenizer extends BaseTokenizer<BPETokenizerConfig> {
showProgress: true,
specialTokens: ["<unk>"],
suffix: "</w>",
vocabSize: 30000
vocabSize: 30000,
};
private constructor(tokenizer: Tokenizer, configuration: BPETokenizerConfig) {
@ -105,7 +105,7 @@ export class BPETokenizer extends BaseTokenizer<BPETokenizerConfig> {
const modelOptions: BPEOptions = {
dropout: opts.dropout,
endOfWordSuffix: opts.suffix,
unkToken: unkToken
unkToken: unkToken,
};
const fromFiles = promisify<string, string, BPEOptions, Model>(BPE.fromFiles);

View File

@ -5,7 +5,7 @@ import { BPE, BPEOptions, Model } from "../../bindings/models";
import {
lowercaseNormalizer,
nfkcNormalizer,
sequenceNormalizer
sequenceNormalizer,
} from "../../bindings/normalizers";
import { byteLevelProcessing } from "../../bindings/post-processors";
import { byteLevelAlphabet, byteLevelPreTokenizer } from "../../bindings/pre-tokenizers";
@ -72,14 +72,14 @@ type ByteLevelBPETokenizerConfig = ByteLevelBPETokenizerOptions &
export class ByteLevelBPETokenizer extends BaseTokenizer<ByteLevelBPETokenizerConfig> {
private static readonly defaultOptions: ByteLevelBPETokenizerConfig = {
addPrefixSpace: false,
trimOffsets: false
trimOffsets: false,
};
private readonly defaultTrainOptions: Required<ByteLevelBPETrainOptions> = {
minFrequency: 2,
showProgress: true,
specialTokens: ["<unk>"],
vocabSize: 30000
vocabSize: 30000,
};
private constructor(tokenizer: Tokenizer, configuration: ByteLevelBPETokenizerConfig) {
@ -127,7 +127,7 @@ export class ByteLevelBPETokenizer extends BaseTokenizer<ByteLevelBPETokenizerCo
const mergedOptions = { ...this.defaultTrainOptions, ...options };
const trainer = bpeTrainer({
...mergedOptions,
initialAlphabet: byteLevelAlphabet()
initialAlphabet: byteLevelAlphabet(),
});
this.tokenizer.train(trainer, files);

View File

@ -68,7 +68,7 @@ export class SentencePieceBPETokenizer extends BaseTokenizer<
private static readonly defaultOptions: SentencePieceBPETokenizerConfig = {
addPrefixSpace: true,
replacement: "▁",
unkToken: "<unk>"
unkToken: "<unk>",
};
private readonly defaultTrainOptions: Required<SentencePieceBPETrainOptions> = {
@ -77,7 +77,7 @@ export class SentencePieceBPETokenizer extends BaseTokenizer<
minFrequency: 2,
showProgress: true,
specialTokens: ["<unk>"],
vocabSize: 30000
vocabSize: 30000,
};
private constructor(
@ -97,7 +97,7 @@ export class SentencePieceBPETokenizer extends BaseTokenizer<
if (opts.vocabFile && opts.mergesFile) {
const modelOptions: BPEOptions = {
dropout: opts.dropout,
unkToken: unkToken
unkToken: unkToken,
};
const fromFiles = promisify<string, string, BPEOptions, Model>(BPE.fromFiles);

View File

@ -11,6 +11,6 @@ export {
EncodeInput,
EncodeOptions,
TruncationConfiguration,
TruncationOptions
TruncationOptions,
} from "./bindings/tokenizer";
export { Encoding } from "./implementations/encoding";