diff --git a/bindings/node/examples/byte_level_bpe.js b/bindings/node/examples/byte_level_bpe.js new file mode 100644 index 00000000..0f7f8504 --- /dev/null +++ b/bindings/node/examples/byte_level_bpe.js @@ -0,0 +1,8 @@ +const tokenizers = require('..'); + +let bpe = tokenizers.models.BPE.from_files( + "../../data/gpt2-vocab.json", + "../../data/gpt2-merges.txt" +); +let tokenizer = new tokenizers.Tokenizer(bpe); +console.log(bpe, tokenizer); diff --git a/bindings/node/lib/index.js b/bindings/node/lib/index.js index 55124b2e..6f2a9212 100644 --- a/bindings/node/lib/index.js +++ b/bindings/node/lib/index.js @@ -1,8 +1,10 @@ var addon = require('../native'); -let s = "Hey man!"; -if (typeof process.argv[2] == 'string') { - s = process.argv[2]; +exports.Tokenizer = addon.tokenizer_Tokenizer; +exports.models = { + BPE: { + from_files: addon.models_create_BPE_from_files, + empty: addon.models_create_BPE_empty, + }, + WordPiece: addon.models_WordPiece, } - -console.log(addon.WhitespaceTokenizer.tokenize(s)); diff --git a/bindings/node/native/Cargo.lock b/bindings/node/native/Cargo.lock index d855d124..b6de7846 100644 --- a/bindings/node/native/Cargo.lock +++ b/bindings/node/native/Cargo.lock @@ -26,46 +26,9 @@ dependencies = [ ] [[package]] -name = "backtrace" -version = "0.3.40" +name = "autocfg" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "backtrace-sys 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "backtrace-sys" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "bindgen" -version = "0.50.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "cexpr 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "clang-sys 0.28.1 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "which 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "bitflags" @@ -73,38 +36,23 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "byteorder" -version = "1.3.2" +name = "c2-chacha" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "cc" version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "cexpr" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "clang-sys" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", - "libloading 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "clap" version = "2.33.0" @@ -119,52 +67,122 @@ dependencies = [ "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "clicolors-control" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "console" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "clicolors-control 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "encode_unicode 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "termios 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-deque" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-queue" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cslice" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "env_logger" -version = "0.6.2" +name = "either" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "getrandom" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)", - "humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", + "wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "failure" +name = "hermit-abi" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "backtrace 0.3.40 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "fxhash" -version = "0.2.1" +name = "indicatif" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "console 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "number_prefix 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "glob" -version = "0.3.0" +name = "itoa" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "humantime" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -175,28 +193,19 @@ name = "libc" version = "0.2.65" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "libloading" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "log" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "memchr" version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "memoffset" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "neon" version = "0.3.3" @@ -241,68 +250,85 @@ version = "0.1.0" dependencies = [ "neon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "neon-build 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "tokenizers-lib 0.1.0", + "tokenizers 0.0.13", ] [[package]] -name = "nom" -version = "4.2.3" +name = "num_cpus" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "onig" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", - "onig_sys 69.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "onig_sys" -version = "69.2.0" +name = "number_prefix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "ppv-lite86" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rand" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "bindgen 0.50.1 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", - "pkg-config 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "pkg-config" -version = "0.3.16" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "proc-macro2" -version = "0.4.30" +name = "rand_chacha" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "quick-error" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "quote" -version = "0.6.13" +name = "rand_core" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rayon" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rayon-core" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-queue 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -322,8 +348,21 @@ version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "rustc-demangle" -version = "0.1.16" +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ryu" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "scopeguard" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -340,8 +379,23 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "shlex" -version = "0.1.1" +name = "serde" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde_json" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "smallvec" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -350,11 +404,11 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "termcolor" -version = "1.0.5" +name = "termios" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "wincolor 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -374,11 +428,27 @@ dependencies = [ ] [[package]] -name = "tokenizers-lib" -version = "0.1.0" +name = "tokenizers" +version = "0.0.13" dependencies = [ + "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", + "indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "onig 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.44 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-normalization-alignments 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode_categories 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -387,8 +457,8 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "unicode-xid" -version = "0.1.0" +name = "unicode_categories" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -397,19 +467,10 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "version_check" -version = "0.1.5" +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "which" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "winapi" version = "0.3.8" @@ -424,82 +485,70 @@ name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "winapi-util" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "wincolor" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - [metadata] "checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" -"checksum backtrace 0.3.40 (registry+https://github.com/rust-lang/crates.io-index)" = "924c76597f0d9ca25d762c25a4d369d51267536465dc5064bdf0eb073ed477ea" -"checksum backtrace-sys 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6575f128516de27e3ce99689419835fce9643a9b215a14d2b5b685be018491" -"checksum bindgen 0.50.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cb0e5a5f74b2bafe0b39379f616b5975e08bcaca4e779c078d5c31324147e9ba" +"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" -"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" +"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" "checksum cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)" = "0213d356d3c4ea2c18c40b037c3be23cd639825c18f25ee670ac7813beeef99c" -"checksum cexpr 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a7fa24eb00d5ffab90eaeaf1092ac85c04c64aaf358ea6f84505b8116d24c6af" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" -"checksum clang-sys 0.28.1 (registry+https://github.com/rust-lang/crates.io-index)" = "81de550971c976f176130da4b2978d3b524eaa0fd9ac31f3ceb5ae1231fb4853" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" +"checksum clicolors-control 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90082ee5dcdd64dc4e9e0d37fbf3ee325419e39c0092191e0393df65518f741e" +"checksum console 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f5d540c2d34ac9dd0deb5f3b5f54c36c79efa78f6b3ad19106a554d07a7b5d9f" +"checksum crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3aa945d63861bfe624b55d153a39684da1e8c0bc8fba932f7ee3a3c16cea3ca" +"checksum crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5064ebdbf05ce3cb95e45c8b086f72263f4166b29b97f6baff7ef7fe047b55ac" +"checksum crossbeam-queue 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db" +"checksum crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4" "checksum cslice 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "697c714f50560202b1f4e2e09cd50a421881c83e9025db75d15f276616f04f40" -"checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" -"checksum failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "f8273f13c977665c5db7eb2b99ae520952fe5ac831ae4cd09d80c4c7042b5ed9" -"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" -"checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" +"checksum encode_unicode 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" +"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772" +"checksum indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8572bccfb0665e70b7faf44ee28841b8e0823450cd4ad562a76b5a3c4bf48487" +"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8" -"checksum libloading 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753" -"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" +"checksum memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" "checksum neon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "53b85accbbd250627f899a6fc1f220bbb4c8c2ff6dc71830dc6b752b39c2eb97" "checksum neon-build 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ae406bf1065c4399e69d328a3bd8d4f088f2a205dc3881bf68c0ac775bfef337" "checksum neon-runtime 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d8465ac4ed3f340dead85e053b75a5f639f48ac6343b3523eff90a751758eead" "checksum neon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ae4cf3871ca5a395077e68144c1754e94e9e1e3329e7f8399d999ca573ed89a" -"checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" -"checksum onig 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e4e723fc996fff1aeab8f62205f3e8528bf498bdd5eadb2784d2d31f30077947" -"checksum onig_sys 69.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0a8d4efbf5f59cece01f539305191485b651acb3785b9d5eef05749f0496514e" -"checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" -"checksum pkg-config 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)" = "72d5370d90f49f70bd033c3d75e87fc529fbfff9d6f7cccef07d6170079d91ea" -"checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" -"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" -"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +"checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72" +"checksum number_prefix 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" +"checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" +"checksum rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412" +"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" +"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +"checksum rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" +"checksum rayon-core 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" "checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd" "checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716" -"checksum rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" +"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +"checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" +"checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -"checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" +"checksum serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)" = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449" +"checksum serde_json 1.0.44 (registry+https://github.com/rust-lang/crates.io-index)" = "48c575e0cc52bdd09b47f330f646cf59afc586e9c4e3ccd6fc1f625b8ea1dad7" +"checksum smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44e59e0c9fa00817912ae6e4e6e3c4fe04455e75699d06eedc7d85917ed8e8f4" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" -"checksum termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "96d6098003bde162e4277c70665bd87c326f5a0c3f3fbfb285787fa482d54e6e" +"checksum termios 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "72b620c5ea021d75a735c943269bb07d30c9b77d6ac6b236bc8b5c496ef05625" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" +"checksum unicode-normalization-alignments 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" "checksum unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20" -"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" +"checksum unicode_categories 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" -"checksum which 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b57acb10231b9493c8472b20cb57317d0679a49e0bdbee44b3b803a6473af164" +"checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7168bab6e1daee33b4557efd0e95d5ca70a03706d39fa5f3fe7a236f584b03c9" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -"checksum wincolor 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "96f5016b18804d24db43cebf3c77269e7569b8954a8464501c216cc5e070eaa9" diff --git a/bindings/node/native/Cargo.toml b/bindings/node/native/Cargo.toml index f086f581..b631ee2f 100644 --- a/bindings/node/native/Cargo.toml +++ b/bindings/node/native/Cargo.toml @@ -15,4 +15,4 @@ neon-build = "0.3.3" [dependencies] neon = "0.3.3" -tokenizers-lib = { path = "../../../tokenizers" } +tokenizers = { path = "../../../tokenizers" } diff --git a/bindings/node/native/src/lib.rs b/bindings/node/native/src/lib.rs index b2a95f37..969feb12 100644 --- a/bindings/node/native/src/lib.rs +++ b/bindings/node/native/src/lib.rs @@ -1,27 +1,17 @@ -#[macro_use] extern crate neon; extern crate tokenizers as tk; +mod models; +mod tokenizer; +mod utils; + use neon::prelude::*; -fn tokenize(mut cx: FunctionContext) -> JsResult { - let s = cx.argument::(0)?.value(); - println!("Tokenizing in rust: {:?}", s); - let result = tk::WhitespaceTokenizer::tokenize(&s); - let js_array = JsArray::new(&mut cx, result.len() as u32); - for (i, token) in result.iter().enumerate() { - let n = cx.string(token); - js_array.set(&mut cx, i as u32, n)?; - } - Ok(js_array) -} +register_module!(mut m, { + // Tokenizer + tokenizer::register(&mut m, "tokenizer")?; + // Models + models::register(&mut m, "models")?; -register_module!(mut cx, { - let whitespace_tokenizer = JsObject::new(&mut cx); - let tokenize = JsFunction::new(&mut cx, tokenize).unwrap(); - whitespace_tokenizer - .set(&mut cx, "tokenize", tokenize) - .unwrap(); - cx.export_value("WhitespaceTokenizer", whitespace_tokenizer); Ok(()) }); diff --git a/bindings/node/native/src/models.rs b/bindings/node/native/src/models.rs new file mode 100644 index 00000000..68396027 --- /dev/null +++ b/bindings/node/native/src/models.rs @@ -0,0 +1,97 @@ +extern crate tokenizers as tk; + +use crate::utils::Container; +use neon::prelude::*; + +/// Model +pub struct Model { + pub model: Container, +} + +declare_types! { + pub class JsModel for Model { + init(_) { + // This should not be called from JS + Ok(Model { + model: Container::Empty + }) + } + } +} + +/// create_BPE(vocab: String, merges: String, options?: { +/// cache_capacity?: number, +/// dropout?: number, +/// unk_token?: String, +/// continuing_subword_prefix?: String, +/// end_of_word_suffix?: String +/// }) +pub fn create_bpe_from_files(mut cx: FunctionContext) -> JsResult { + let vocab = cx.argument::(0)?.value() as String; + let merges = cx.argument::(1)?.value() as String; + let options = cx.argument_opt(2); + + let mut model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?; + let mut builder = tk::models::bpe::BPE::from_files(&vocab, &merges) + .or_else(|e| cx.throw_error(format!("{}", e)))?; + + if let Some(options) = options { + if let Ok(options) = options.downcast::() { + if let Ok(cache_capacity) = options.get(&mut cx, "cache_capacity") { + let cache_capacity = cache_capacity + .downcast::() + .or_throw(&mut cx)? + .value() as usize; + builder = builder.cache_capacity(cache_capacity); + } + if let Ok(dropout) = options.get(&mut cx, "dropout") { + let dropout = dropout.downcast::().or_throw(&mut cx)?.value() as f32; + builder = builder.dropout(dropout); + } + if let Ok(unk_token) = options.get(&mut cx, "unk_token") { + let unk_token = + unk_token.downcast::().or_throw(&mut cx)?.value() as String; + builder = builder.unk_token(unk_token); + } + if let Ok(prefix) = options.get(&mut cx, "continuing_subword_prefix") { + let prefix = prefix.downcast::().or_throw(&mut cx)?.value() as String; + builder = builder.continuing_subword_prefix(prefix); + } + if let Ok(suffix) = options.get(&mut cx, "end_of_word_suffix") { + let suffix = suffix.downcast::().or_throw(&mut cx)?.value() as String; + builder = builder.end_of_word_suffix(suffix); + } + } + } + + match builder.build() { + Ok(bpe) => { + let guard = cx.lock(); + model.borrow_mut(&guard).model.to_owned(Box::new(bpe)); + } + Err(e) => return cx.throw_error(format!("{}", e)), + }; + + Ok(model) +} + +/// create_bpe_empty() +pub fn create_bpe_empty(mut cx: FunctionContext) -> JsResult { + let mut model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?; + let bpe = tk::models::bpe::BPE::default(); + + let guard = cx.lock(); + model.borrow_mut(&guard).model.to_owned(Box::new(bpe)); + + Ok(model) +} + +/// Register everything here +pub fn register(m: &mut ModuleContext, prefix: &str) -> Result<(), neon::result::Throw> { + m.export_function( + &format!("{}_create_BPE_from_files", prefix), + create_bpe_from_files, + )?; + m.export_function(&format!("{}_create_BPE_empty", prefix), create_bpe_empty)?; + Ok(()) +} diff --git a/bindings/node/native/src/tokenizer.rs b/bindings/node/native/src/tokenizer.rs new file mode 100644 index 00000000..db6c0bde --- /dev/null +++ b/bindings/node/native/src/tokenizer.rs @@ -0,0 +1,54 @@ +extern crate tokenizers as tk; + +use crate::models::*; +use neon::prelude::*; + +/// Tokenizer +pub struct Tokenizer { + tokenizer: tk::tokenizer::Tokenizer, +} + +declare_types! { + pub class JsTokenizer for Tokenizer { + // Create + init(mut cx) { + let mut model = cx.argument::(0)?; + if let Some(instance) = { + let guard = cx.lock(); + let mut model = model.borrow_mut(&guard); + model.model.to_pointer() + } { + Ok(Tokenizer { + tokenizer: tk::tokenizer::Tokenizer::new(instance) + }) + } else { + cx.throw_error("The Model is already being used in another Tokenizer") + } + } + + method with_model(mut cx) { + let mut model = cx.argument::(0)?; + if let Some(instance) = { + let guard = cx.lock(); + let mut model = model.borrow_mut(&guard); + model.model.to_pointer() + } { + let mut this = cx.this(); + { + let guard = cx.lock(); + let mut tokenizer = this.borrow_mut(&guard); + tokenizer.tokenizer.with_model(instance); + } + + Ok(cx.undefined().upcast()) + } else { + cx.throw_error("The Model is already being used in another Tokenizer") + } + } + } +} + +pub fn register(m: &mut ModuleContext, prefix: &str) -> Result<(), neon::result::Throw> { + m.export_class::(&format!("{}_Tokenizer", prefix))?; + Ok(()) +} diff --git a/bindings/node/native/src/utils.rs b/bindings/node/native/src/utils.rs new file mode 100644 index 00000000..0abf2ea9 --- /dev/null +++ b/bindings/node/native/src/utils.rs @@ -0,0 +1,77 @@ +/// A Container type +/// +/// Provides an interface to allow transfer of ownership between Python and Rust. +/// It either contains a Box with full ownership of the content, or a pointer to the content. +/// +/// The main goal here is to allow Python calling into Rust to initialize some objects. Later +/// these objects may need to be used by Rust who will expect to take ownership. Since Python +/// does not allow any sort of ownership transfer, it will keep a reference to this object +/// until it gets cleaned up by the GC. In this case, we actually give the ownership to Rust, +/// and just keep a pointer in the Python object. +pub enum Container { + Owned(Box), + Pointer(*mut T), + Empty, +} + +impl Container +where + T: ?Sized, +{ + pub fn from_ref(reference: &Box) -> Self { + let content: *const T = &**reference; + Container::Pointer(content as *mut _) + } + + /// Consumes ourself and return the Boxed element if we have the ownership, None otherwise. + pub fn take(self) -> Option> { + match self { + Container::Owned(obj) => Some(obj), + Container::Pointer(_) => None, + Container::Empty => None, + } + } + + /// Replace an empty content by the new provided owned one, otherwise do nothing + pub fn to_owned(&mut self, o: Box) { + if let Container::Empty = self { + unsafe { + let new_container = Container::Owned(o); + std::ptr::write(self, new_container); + } + } + } + + /// Return the owned T, keeping a Pointer to it if we currently own it. None otherwise + pub fn to_pointer(&mut self) -> Option> { + if let Container::Owned(_) = self { + unsafe { + let old_container = std::ptr::read(self); + let ptr = Box::into_raw(old_container.take().unwrap()); + let new_container = Container::Pointer(ptr); + std::ptr::write(self, new_container); + + Some(Box::from_raw(ptr)) + } + } else { + None + } + } + + pub fn execute(&self, closure: F) -> U + where + F: FnOnce(Option<&Box>) -> U, + { + match self { + Container::Owned(val) => closure(Some(val)), + Container::Pointer(ptr) => unsafe { + let val = Box::from_raw(*ptr); + let res = closure(Some(&val)); + // We call this to make sure we don't drop the Box + Box::into_raw(val); + res + }, + Container::Empty => closure(None), + } + } +}