Files
tokenizers/bindings/node/Makefile
Nicolas Patry 2e2e7558f7 Add CTC Decoder for Wave2Vec models (#693)
* Rust - add a CTCDecoder as a seperate mod

* Adding bindings to Node + Python.

* Clippy update.

* Stub.

* Fixing roberta.json URLs.

* Moving test files to hf.co.

* Update cargo check and clippy to 1.52.

* Inner ':' actually is used for domains in sphinx.

Making `domain` work correctly was just too much work so I went the easy
way and have global roles for the custom rust extension.

* Update struct naming and docs

* Update changelog

Co-authored-by: Thomaub <github.thomaub@gmail.com>
Co-authored-by: Anthony MOI <m.anthony.moi@gmail.com>
2021-05-20 09:30:09 -04:00

39 lines
968 B
Makefile

.PHONY: style check-style test
DATA_DIR = data
dir_guard=@mkdir -p $(@D)
# Format source code automatically
style:
npm run lint
# Check the source code is formatted correctly
check-style:
npm run lint-check
TESTS_RESOURCES = $(DATA_DIR)/small.txt $(DATA_DIR)/roberta.json $(DATA_DIR)/tokenizer-wiki.json $(DATA_DIR)/bert-wiki.json
# Launch the test suite
test: $(TESTS_RESOURCES)
npm run test
$(DATA_DIR)/big.txt :
$(dir_guard)
wget https://norvig.com/big.txt -O $@
$(DATA_DIR)/small.txt : $(DATA_DIR)/big.txt
head -100 $(DATA_DIR)/big.txt > $@
$(DATA_DIR)/roberta.json :
$(dir_guard)
wget https://huggingface.co/roberta-large/raw/main/tokenizer.json -O $@
$(DATA_DIR)/tokenizer-wiki.json :
$(dir_guard)
wget https://s3.amazonaws.com/models.huggingface.co/bert/anthony/doc-quicktour/tokenizer.json -O $@
$(DATA_DIR)/bert-wiki.json :
$(dir_guard)
wget https://s3.amazonaws.com/models.huggingface.co/bert/anthony/doc-pipeline/tokenizer.json -O $@