Move to maturing mimicking move for safetensors. + Rewritten node bindings. (#1331)

* Move to maturing mimicking move for `safetensors`. * Tmp. * Fix sdist. * Wat? * Clippy 1.72 * Remove if. * Conda sed. * Fix doc check workflow. * Moving to maturin AND removing http + openssl mess (smoothing transition moving to `huggingface_hub`) * Fix dep * Black. * New node bindings. * Fix docs + node cache ? * Yarn. * Working dir. * Extension module. * Put back interpreter. * Remove cache. * New attempt * Multi python. * Remove FromPretrained. * Remove traces of `fromPretrained`. * Drop 3.12 for windows? * Typo. * Put back the default feature for ignoring links during simple test. * Fix ? * x86_64 -> x64. * Remove warning for windows bindings. * Excluse aarch. * Include/exclude. * Put back workflows in correct states.
2025-08-22 16:25:30 +00:00 · 2023-08-28 16:24:14 +02:00
parent f2952020d5
commit d2010d5165
155 changed files with 12988 additions and 16409 deletions
--- a/.github/conda/bld.bat
+++ b/.github/conda/bld.bat
@ -1,2 +1,2 @@
 cd bindings\python
-%PYTHON% setup.py install --prefix=%PREFIX%
+%PYTHON% -m pip install . --prefix=%PREFIX%
--- a/.github/conda/build.sh
+++ b/.github/conda/build.sh
@ -1,2 +1,2 @@
 cd bindings/python
-$PYTHON setup.py install --prefix=$PREFIX
+$PYTHON -m pip install . --prefix=$PREFIX
--- a/.github/conda/meta.yaml
+++ b/.github/conda/meta.yaml
@ -15,6 +15,7 @@ requirements:
    - setuptools-rust
    - pkg-config
    - openssl
+    - maturin

  run:
    - python x.x
--- a/.github/workflows/docs-check.yml
+++ b/.github/workflows/docs-check.yml
@ -28,7 +28,7 @@ jobs:

      - name: Build tokenizers
        working-directory: ./bindings/python
-        run: python setup.py install
+        run: pip install -e .

      - name: Build documentation
        working-directory: ./docs
--- a/.github/workflows/node-release.yml
+++ b/.github/workflows/node-release.yml
@ -8,24 +8,25 @@ env:
 on:
  push:
    tags:
-      - v*
+      - node-v*

 jobs:
-  rust_publish:
+  build:
    env:
      MACOSX_DEPLOYMENT_TARGET: 10.11
    strategy:
      matrix:
-        os: [windows-2019, macos-latest, ubuntu-latest]
-        node-version: [10.x, 12.x, 13.x, 14.x, 15.x]
-        exclude:
-          # Exclude node 15 for windows
-          - os: windows-2019
-            node-version: 15.x
-    runs-on: ${{ matrix.os }}
+        settings:
+          - host: macos-latest
+            target: x86_64-apple-darwin
+          - host: windows-latest
+            target: x86_64-pc-windows-msvc
+          - host: ubuntu-latest
+            target: x86_64-unknown-linux-gnu
+    runs-on: ${{ matrix.settings.host }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v1

      - name: Install Rust
        uses: actions-rs/toolchain@v1
@ -43,79 +44,65 @@ jobs:
          key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.toml') }}

      - name: Install Node ${{ matrix.node-version }}
-        uses: actions/setup-node@v1
+        uses: actions/setup-node@v3
        with:
-          node-version: ${{ matrix.node-version }}
-
-      - name: Get NPM cache directory
-        id: npm-cache
-        run: |
-          echo "::set-output name=dir::$(npm config get cache)"
-      - name: Cache NPM cache
-        uses: actions/cache@v1
-        with:
-          path: ${{ steps.npm-cache.outputs.dir }}
-          key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
-          restore-keys: |
-            ${{ runner.os }}-node-
-
-      - name: Install Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: 2.x
+          node-version: latest
+          cache: yarn
+          cache-dependency-path: ./bindings/node/

      - name: Install npm dependencies
        working-directory: ./bindings/node
-        run: npm ci --ignore-scripts
+        run: yarn install

      - name: Build and package rust
        working-directory: ./bindings/node
-        run: node build.js --package-rust
+        run: |
+          yarn build &&
+          strip -x *.node

      - name: Install Python
        uses: actions/setup-python@v1
        with:
          python-version: 3.x
-
-      - name: Upload tarball
-        working-directory: ./bindings/node
-        shell: bash
-        run: |
-          pip install awscli
-          aws s3 sync --exact-timestamps --exclude "*" --include "*.tar.gz" --acl public-read ./bin-package "s3://tokenizers-releases/node/$(node -p -e 'require("./package.json").version')"
-
-  npm_publish:
-    name: Build and publish JS lib
-    needs: rust_publish
+      - name: Upload artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: bindings-${{ matrix.settings.target }}
+          path: ${{ env.APP_NAME }}bindings/node/*.node
+          if-no-files-found: error
+  publish:
+    name: Publish
    runs-on: ubuntu-latest
+    needs:
+      - build
    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v1
-
-      - name: Install Node 12.x
-        uses: actions/setup-node@v1
+      - uses: actions/checkout@v3
+      - name: Setup node
+        uses: actions/setup-node@v3
        with:
-          registry-url: https://registry.npmjs.org
-          node-version: 12.x
-
-      - name: Get NPM cache directory
-        id: npm-cache
+          node-version: latest
+          check-latest: true
+          cache: yarn
+          cache-dependency-path: ./bindings/node/
+      - name: Install dependencies
+        working-directory: ./bindings/node
+        run: yarn install
+      - name: Download all artifacts
+        uses: actions/download-artifact@v3
+        with:
+          path: ./bindings/node/artifacts
+      - name: Move artifacts
+        working-directory: ./bindings/node
+        run: yarn artifacts
+      - name: List packages
+        working-directory: ./bindings/node
+        run: ls -R ./npm
+        shell: bash
+      - name: Publish
+        working-directory: ./bindings/node
        run: |
-          echo "::set-output name=dir::$(npm config get cache)"
-      - name: Cache NPM cache
-        uses: actions/cache@v1
-        with:
-          path: ${{ steps.npm-cache.outputs.dir }}
-          key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
-          restore-keys: |
-            ${{ runner.os }}-node-
-
-      - name: Install npm dependencies
-        working-directory: ./bindings/node
-        run: npm ci --ignore-scripts
-
-      - name: Build and publish on NPM
-        working-directory: ./bindings/node
-        run: node build.js --npm-publish
+          echo "//registry.npmjs.org/:_authToken=$NPM_TOKEN" >> ~/.npmrc
+          npm publish --access public --tag next
        env:
-          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@ -1,5 +1,4 @@
 name: Node
-
 on:
  push:
    branches:
@ -16,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v1

      - name: Install Rust
        uses: actions-rs/toolchain@v1
@ -33,46 +32,33 @@ jobs:
          path: ~/.cargo/registry
          key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}

-      - name: Install Node 12.x
-        uses: actions/setup-node@v1
+      - name: Install Node
+        uses: actions/setup-node@v3
        with:
-          node-version: 12.x
-
-      - name: Get NPM cache directory
-        id: npm-cache
-        run: |
-          echo "::set-output name=dir::$(npm config get cache)"
-      - name: Cache NPM cache
-        uses: actions/cache@v1
-        with:
-          path: ${{ steps.npm-cache.outputs.dir }}
-          key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
-          restore-keys: |
-            ${{ runner.os }}-node-
-
-      - name: Install npm dependencies
+          node-version: latest
+      - name: Install dependencies
        working-directory: ./bindings/node
-        run: npm ci --ignore-scripts
+        run: yarn install

      - name: Build all
        working-directory: ./bindings/node
-        run: node build.js --all
+        run: yarn build

      - name: Lint Rust formatting
        uses: actions-rs/cargo@v1
        with:
          command: fmt
-          args: --manifest-path ./bindings/node/native/Cargo.toml -- --check
+          args: --manifest-path ./bindings/node/Cargo.toml -- --check

      - name: Lint Rust with Clippy
        uses: actions-rs/cargo@v1
        with:
          command: clippy
-          args: --manifest-path ./bindings/node/native/Cargo.toml --all-targets --all-features -- -D warnings
+          args: --manifest-path ./bindings/node/Cargo.toml --all-targets --all-features -- -D warnings

      - name: Lint TS
        working-directory: ./bindings/node
-        run: npm run lint-check
+        run: yarn lint

      - name: Run JS tests
        working-directory: ./bindings/node
--- a/.github/workflows/python-release-conda.yml
+++ b/.github/workflows/python-release-conda.yml
@ -14,8 +14,9 @@ jobs:
    strategy:
      matrix:
        os: [windows-latest, macos-latest]
-        # Conda does not support 3.11 yet.
-        python: ["3.7", "3.8", "3.9", "3.10"]
+        # 3.11 not available on Conda yet.
+        python: ["3.8", "3.9", "3.10", "3.11"]
+
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
@ -44,7 +45,7 @@ jobs:
      - name: Extract version
        shell: bash -l {0}
        working-directory: ./bindings/python
-        run: echo "TOKENIZERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV
+        run: echo "TOKENIZERS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV

      - name: Build conda packages
        shell: bash -l {0}
@ -65,14 +66,16 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python: [37, 38, 39]
+        python: [38, 39, 310, 311]
        include:
-          - python: 37
-            checksum: a1a7285dea0edc430b2bc7951d89bb30a2a1b32026d2a7b02aacaaa95cf69c7c 
          - python: 38
-            checksum: 935d72deb16e42739d69644977290395561b7a6db059b316958d97939e9bdf3d 
+            checksum: e2a4438671e0e42c5bba14cb51de6ce9763938184d6ca2967340bbe972bbe7e6
          - python: 39
-            checksum: 1ea2f885b4dbc3098662845560bc64271eb17085387a70c2ba3f29fff6f8d52f 
+            checksum: 9829d95f639bd0053b2ed06d1204e60644617bf37dd5cc57523732e0e8d64516
+          - python: 310
+            checksum: ea5e6e8a3d5a0247b9df85382d27220fac8e59b5778fd313c5913879cd9baafc
+          - python: 311
+            checksum: 634d76df5e489c44ade4085552b97bebc786d49245ed1a830022b0b406de5817

    steps:
      - name: Checkout repository
@ -81,10 +84,9 @@ jobs:
      - name: Install miniconda
        run: |
          yum install -y wget openssl-devel
-          export FILENAME=Miniconda3-py${{ matrix.python }}_4.10.3-Linux-x86_64.sh
+          export FILENAME=Miniconda3-py${{ matrix.python }}_23.5.2-0-Linux-x86_64.sh
          wget https://repo.anaconda.com/miniconda/$FILENAME
          sha256sum $FILENAME | awk '$1=="${{ matrix.checksum}}"{print"good to go"}'
-          yum remove -y openssl-devel
          bash $FILENAME -b -p $HOME/miniconda
          source $HOME/miniconda/bin/activate

@ -115,7 +117,7 @@ jobs:
        working-directory: ./bindings/python
        run: |
          source $HOME/miniconda/bin/activate
-          echo "TOKENIZERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV
+          echo "TOKENIZERS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV

      - name: Build conda packages
        shell: bash -l {0}
--- a/.github/workflows/python-release-extra.yml
+++ b/.github/workflows/python-release-extra.yml
@ -1,120 +0,0 @@
-name: Python Release extra
-
-on:
-  push:
-    tags:
-      - v*
-
-env:
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  AWS_DEFAULT_REGION: us-east-1
-  PYPI_TOKEN: ${{ secrets.PYPI_TOKEN_DIST }}
-  DIST_DIR: '${{ github.sha }}_extra'
-
-jobs:
-
-  create_wheels_manylinux_2014_ppc64le:
-    runs-on: ubuntu-latest
-    name: Create wheels for manylinux2014 - PowerPC
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Upgrade libssl
-        run: sudo apt-get install -y libssl-dev
-
-      - name: Pull images
-        run: |
-          docker pull multiarch/qemu-user-static
-          docker pull quay.io/pypa/manylinux2014_ppc64le:latest
-
-      - name: Install QEMU
-        run: |
-          docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
-
-      - name: Build and audit wheels
-        working-directory: ./bindings/python
-        run: |
-          docker run -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION -e DIST_DIR \
-            --rm -v `pwd`/../..:/io quay.io/pypa/manylinux2014_ppc64le \
-            /bin/bash -c "yum install -y openssl-devel && cd /io/bindings/python; sh build-wheels.sh"
-
-  create_wheels_manylinux_2014_aarch64:
-    runs-on: ubuntu-latest
-    name: Create wheels for manylinux2014 - Aarch64
-    steps:
-      - uses: actions/checkout@v2
-
-      - name: Upgrade libssl
-        run: sudo apt-get install -y libssl-dev
-
-      - name: Pull images
-        run: |
-          docker pull multiarch/qemu-user-static
-          docker pull quay.io/pypa/manylinux2014_aarch64:latest
-
-      - name: Install QEMU
-        run: |
-          docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
-
-      - name: Build and audit wheels
-        working-directory: ./bindings/python
-        run: |
-          docker run -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION -e DIST_DIR \
-            --rm -v `pwd`/../..:/io quay.io/pypa/manylinux2014_aarch64 \
-            /bin/bash -c "yum install -y openssl-devel && cd /io/bindings/python; sh build-wheels.sh"
-
-  create_wheels_manylinux_2014_x390x:
-    runs-on: ubuntu-latest
-    name: Create wheels for manylinux2014 - S390X
-    steps:
-      - uses: actions/checkout@v2
-
-      - name: Upgrade libssl
-        run: sudo apt-get install -y libssl-dev
-
-      - name: Pull images
-        run: |
-          docker pull multiarch/qemu-user-static
-          docker pull quay.io/pypa/manylinux2014_s390x:latest
-
-      - name: Install QEMU
-        run: |
-          docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
-
-      - name: Build and audit wheels
-        working-directory: ./bindings/python
-        run: |
-          docker run -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION -e DIST_DIR \
-            --rm -v `pwd`/../..:/io quay.io/pypa/manylinux2014_s390x \
-            /bin/bash -c "yum install -y openssl-devel && cd /io/bindings/python; sh build-wheels.sh"
-
-  upload_package:
-    name: Upload package to PyPi
-    runs-on: ubuntu-latest
-    needs:
-      - create_wheels_manylinux_2014_ppc64le
-      - create_wheels_manylinux_2014_aarch64
-      - create_wheels_manylinux_2014_x390x
-
-    steps:
-      - uses: actions/checkout@v2
-
-      - name: Install Python
-        uses: actions/setup-python@v1
-
-      - name: Retrieve all wheels
-        shell: bash
-        run: |
-          pip install awscli
-          aws s3 sync "s3://tokenizers-releases/python/$DIST_DIR" ./bindings/python/dist
-
-      - name: Install dependencies
-        run: |
-          pip install setuptools wheel setuptools-rust
-
-      - name: Upload to PyPi
-        working-directory: ./bindings/python
-        run: |
-          pip install twine
-          twine upload dist/* -u __token__ -p "$PYPI_TOKEN"
--- a/.github/workflows/python-release.yml
+++ b/.github/workflows/python-release.yml
@ -21,67 +21,108 @@ jobs:
        run: cat Cargo.lock
        working-directory: ./bindings/python

-  create_wheels_manylinux:
-    runs-on: ubuntu-latest
+  build:
+    name: build on ${{ matrix.platform || matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }})
+    # only run on push to main and on release
    needs: [lock_exists]
-    name: Create wheels for manylinux2014
-    container: quay.io/pypa/manylinux2014_x86_64
+    if: startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build')
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu, macos, windows]
+        target: [x86_64, aarch64]
+        manylinux: [auto]
+        include:
+          - os: ubuntu
+            platform: linux
+          - os: windows
+            ls: dir
+            interpreter: 3.7 3.8 3.9 3.10 3.11 3.12 pypy3.8 pypy3.9 pypy3.10
+          - os: windows
+            ls: dir
+            target: x86_64
+            python-architecture: x64
+            interpreter: 3.7 3.8 3.9 3.10 3.11
+          - os: windows
+            ls: dir
+            target: i686
+            python-architecture: x86
+            interpreter: 3.7 3.8 3.9 3.10 3.11
+              # - os: windows
+              #   ls: dir
+              #   target: aarch64
+              #   interpreter: 3.11 3.12
+          - os: macos
+            target: aarch64
+            interpreter: 3.7 3.8 3.9 3.10 3.11 3.12 pypy3.8 pypy3.9 pypy3.10
+          - os: ubuntu
+            platform: linux
+            target: i686
+          - os: ubuntu
+            platform: linux
+            target: aarch64
+
+          - os: ubuntu
+            platform: linux
+            target: armv7
+            interpreter: 3.7 3.8 3.9 3.10 3.11 3.12
+          # musllinux
+          - os: ubuntu
+            platform: linux
+            target: x86_64
+            manylinux: musllinux_1_1
+          - os: ubuntu
+            platform: linux
+            target: aarch64
+            manylinux: musllinux_1_1
+          - os: ubuntu
+            platform: linux
+            target: ppc64le
+            interpreter: 3.7 3.8 3.9 3.10 3.11 3.12
+          - os: ubuntu
+            platform: linux
+            target: s390x
+            interpreter: 3.7 3.8 3.9 3.10 3.11 3.12
+        exclude:
+          - os: windows
+            target: aarch64
+              #   # Optimized PGO builds for x86_64 manylinux and windows follow a different matrix,
+              #   # maybe in future maturin-action can support this automatically
+              #   - os: ubuntu
+              #     target: x86_64
+              #     manylinux: auto
+              #   - os: windows
+              #     target: x86_64
+              # Windows on arm64 only supports Python 3.11+
+
+    runs-on: ${{ matrix.os }}-latest
    steps:
      - uses: actions/checkout@v3

-      - name: Install dependencies
-        run: yum install -y openssl-devel
-
-      - name: Build and audit wheels
-        working-directory: ./bindings/python
-        run: sh build-wheels.sh
-
-  create_wheels_windows:
-    name: Windows 
-    runs-on: windows-latest
-    needs: [lock_exists]
-    strategy:
-      matrix:
-        python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
-        bits: ["32", "64"]
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
-
-      - name: Install Rust 32bits
-        if: ${{ matrix.os == '32' }}
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable-i686-pc-windows-msvc
-          override: true
-
-      - name: Install Rust 64bits
-        if: ${{ matrix.os == '32' }}
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable-i686-pc-windows-msvc
-          override: true
-
-      - name: Override toolchain
-        if: ${{ matrix.os == '32' }}
-        shell: bash
-        working-directory: ./bindings/python
-        run: echo "stable-i686-pc-windows-msvc" > rust-toolchain
-
-      - name: Install Python
+      - name: set up python
        uses: actions/setup-python@v4
        with:
-          python-version: ${{ matrix.python }}
-          architecture: x86
+          python-version: "3.11"
+          architecture: ${{ matrix.python-architecture || 'x64' }}

-      - name: Install dependencies
-        run: |
-          # On old versions of python there is an old version of setuptools already installed
-          pip install setuptools wheel setuptools-rust --ignore-installed --force-reinstall
+      - run: pip install -U twine

-      - name: Build wheel
+      - name: build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.target }}
+          working-directory: ./bindings/python
+          manylinux: ${{ matrix.manylinux || 'auto' }}
+          container: ${{ matrix.container }}
+          args: --release --out dist --interpreter ${{ matrix.interpreter || '3.7 3.8 3.9 3.10 3.11 3.12 pypy3.7 pypy3.8 pypy3.9 pypy3.10' }} ${{ matrix.extra-build-args }}
+          rust-toolchain: stable
+          docker-options: -e CI
+
+      - run: ${{ matrix.ls || 'ls -lh' }} dist/
+        working-directory: ./bindings/python
+
+      - run: twine check --strict dist/*
        working-directory: ./bindings/python
-        run: python setup.py bdist_wheel

      - name: Upload wheels
        shell: bash
@ -89,96 +130,23 @@ jobs:
          pip install awscli
          aws s3 sync --exact-timestamps ./bindings/python/dist "s3://tokenizers-releases/python/$DIST_DIR"

-  create_wheels_macos_conda:
-    name: MacOS - Conda
-    runs-on: ${{ matrix.os }}
+          # - uses: actions/upload-artifact@v3
+          #   working-directory: ./bindings/python/
+          #   with:
+          #     name: pypi_files
+          #     path: dist
+  build-sdist:
+    name: build sdist
    needs: [lock_exists]
-    strategy:
-      matrix:
-        os: [macos-latest]
-        # 3.11 not available on Conda yet.
-        python: ["3.7", "3.8", "3.9", "3.10"]
-
+    runs-on: ubuntu-latest
    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-
-      - name: Install miniconda
-        uses: conda-incubator/setup-miniconda@v2
+      - uses: actions/checkout@v3
+      - uses: PyO3/maturin-action@v1
        with:
-          auto-update-conda: true
-          python-version: ${{ matrix.python }}
-
-      - name: Conda info
-        shell: bash -l {0}
-        run: conda info
-
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-
-      - name: Setup conda env
-        shell: bash -l {0}
-        run: |
-          conda install setuptools-rust
-          conda install -c defaults anaconda-client conda-build
-
-      - name: Extract version
-        shell: bash -l {0}
          working-directory: ./bindings/python
-        run: echo "TOKENIZERS_VERSION=`python setup.py --version`" >> $GITHUB_ENV
-
-      - name: Build conda packages
-        shell: bash -l {0}
-        working-directory: ./bindings/python
-        run: |
-          MACOSX_DEPLOYMENT_TARGET=10.11 python setup.py bdist_wheel
-
-      - name: Upload wheels
-        shell: bash
-        run: |
-          pip install awscli
-          aws s3 sync --exact-timestamps ./bindings/python/dist "s3://tokenizers-releases/python/$DIST_DIR"
-
-  create_wheels_macos:
-    name: MacOS
-    runs-on: ${{ matrix.os.os }}
-    needs: [lock_exists]
-    strategy:
-      matrix:
-        python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
-          # os: [{os: "macos-11", target: "11.0"}, {os: "macos-12"}, {os: "macos-13"}, {os: "macos-13", target: "14.0"}]
-        os: [{os: "macos-11", target: "11.0"}, {os: "macos-12"}, {os: "macos-13"}]
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
-
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-
-      - name: Install Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python }}
-
-      - name: Install dependencies
-        run: |
-          # On old versions of python there is an old version of setuptools already installed
-          pip install setuptools wheel setuptools-rust --ignore-installed --force-reinstall
-
-      - name: Override target
-        if: ${{ matrix.os.target }}
-        working-directory: ./bindings/python
-        run: echo "MACOSX_DEPLOYMENT_TARGET=${{ matrix.os.target }}" >> $GITHUB_ENV
-
-      - name: Build wheel
-        working-directory: ./bindings/python
-        run: echo $MACOX_DEPLOYMENT_TARGET && python setup.py bdist_wheel
-
+          command: sdist
+          args: --out dist
+          rust-toolchain: stable
      - name: Upload wheels
        shell: bash
        run: |
@ -186,45 +154,10 @@ jobs:
          aws s3 sync --exact-timestamps ./bindings/python/dist "s3://tokenizers-releases/python/$DIST_DIR"


-  create_wheels_macos_arm64:
-    name: MacOS M1
-    runs-on: macos-arm64
-    needs: [lock_exists]
-    strategy:
-      matrix:
-        python: ["3.8.16", "3.9.13", "3.10.6", "3.11.0"]
-        # target: ["12.0", "13.0", "14.0"]
-        target: ["12.0", "13.0"]
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-
-      - name: Install Python
-        shell: bash
-        run: |
-          echo $HOME
-          export PYENV_ROOT="$HOME/.pyenv"
-          command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"
-          eval "$(pyenv init -)"
-          pyenv shell ${{ matrix.python }}
-          which pyenv
-          which python
-          pip install -U setuptools wheel setuptools-rust awscli
-          cd ./bindings/python
-          MACOSX_DEPLOYMENT_TARGET=${{ matrix.target }} python setup.py bdist_wheel
-          cd ../../
-          aws s3 sync --exact-timestamps ./bindings/python/dist "s3://tokenizers-releases/python/$DIST_DIR"
-
-  Upload_package:
+  upload_package:
    name: Upload package to PyPi
    runs-on: ubuntu-latest
-    needs: [create_wheels_manylinux, create_wheels_windows, create_wheels_macos, create_wheels_macos_arm64, create_wheels_macos_conda]
+    needs: [build, build-sdist]

    steps:
      - uses: actions/checkout@v3
@ -241,14 +174,6 @@ jobs:
          pip install awscli
          aws s3 sync "s3://tokenizers-releases/python/$DIST_DIR" ./bindings/python/dist

-      - name: Install dependencies
-        run: |
-          pip install setuptools wheel setuptools-rust
-
-      - name: Create source distribution
-        working-directory: ./bindings/python
-        run: sh build-sdist.sh
-
      - name: Upload to PyPi
        working-directory: ./bindings/python
        run: |
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@ -64,9 +64,9 @@ jobs:
          components: rustfmt, clippy

      - name: Install Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
-          python-version: 3.9
+          python-version: 3.11
          architecture: "x64"


--- a/bindings/node/.cargo/config.toml
+++ b/bindings/node/.cargo/config.toml
@ -0,0 +1,3 @@
+[target.aarch64-unknown-linux-musl]
+linker = "aarch64-linux-musl-gcc"
+rustflags = ["-C", "target-feature=-crt-static"]
--- a/bindings/node/.editorconfig
+++ b/bindings/node/.editorconfig
@ -0,0 +1,15 @@
+# EditorConfig helps developers define and maintain consistent
+# coding styles between different editors or IDEs
+# http://editorconfig.org
+root = true
+
+[*]
+indent_style = space
+indent_size = 2
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.md]
+trim_trailing_whitespace = false
--- a/bindings/node/.eslintignore
+++ b/bindings/node/.eslintignore
@ -1,3 +0,0 @@
-node_modules
-dist
-coverage
--- a/bindings/node/.eslintrc.json
+++ b/bindings/node/.eslintrc.json
@ -1,40 +0,0 @@
-{
-  "root": true,
-  "env": {
-    "es6": true,
-    "node": true
-  },
-  "extends": [
-    "eslint:recommended",
-    "plugin:prettier/recommended"
-  ],
-  "globals": {
-    "Atomics": "readonly",
-    "SharedArrayBuffer": "readonly"
-  },
-  "parser": "@typescript-eslint/parser",
-  "parserOptions": {
-    "ecmaVersion": 2019,
-    "sourceType": "module"
-  },
-  "plugins": ["@typescript-eslint", "jest", "prettier", "simple-import-sort"],
-  "rules": {
-    "@typescript-eslint/no-use-before-define": ["error", { "functions": false }],
-    "simple-import-sort/sort": "error"
-  },
-  "overrides": [
-    {
-      "files": "**/*.ts",
-      "plugins": ["jsdoc"],
-      "extends": [
-        "plugin:@typescript-eslint/recommended",
-        "plugin:jest/recommended",
-        "plugin:jest/style",
-        "prettier/@typescript-eslint"
-      ],
-      "rules": {
-        "jsdoc/no-types": "error"
-      }
-    }
-  ]
-}
--- a/bindings/node/.eslintrc.yml
+++ b/bindings/node/.eslintrc.yml
@ -0,0 +1,169 @@
+parser: '@typescript-eslint/parser'
+
+parserOptions:
+  ecmaFeatures:
+    jsx: true
+  ecmaVersion: latest
+  sourceType: module
+  project: ./tsconfig.json
+
+env:
+  browser: true
+  es6: true
+  node: true
+  jest: true
+
+ignorePatterns: ['index.js', 'target/']
+
+plugins:
+  - import
+  - '@typescript-eslint'
+
+extends:
+  - eslint:recommended
+  - plugin:prettier/recommended
+
+rules:
+  # 0 = off, 1 = warn, 2 = error
+  'space-before-function-paren': 0
+  'no-useless-constructor': 0
+  'no-undef': 2
+  'no-console': [2, { allow: ['error', 'warn', 'info', 'assert'] }]
+  'comma-dangle': ['error', 'only-multiline']
+  'no-unused-vars': 0
+  'no-var': 2
+  'one-var-declaration-per-line': 2
+  'prefer-const': 2
+  'no-const-assign': 2
+  'no-duplicate-imports': 2
+  'no-use-before-define': [2, { 'functions': false, 'classes': false }]
+  'eqeqeq': [2, 'always', { 'null': 'ignore' }]
+  'no-case-declarations': 0
+  'no-restricted-syntax':
+    [
+      2,
+      {
+        'selector': 'BinaryExpression[operator=/(==|===|!=|!==)/][left.raw=true], BinaryExpression[operator=/(==|===|!=|!==)/][right.raw=true]',
+        'message': Don't compare for equality against boolean literals,
+      },
+    ]
+
+  # https://github.com/benmosher/eslint-plugin-import/pull/334
+  'import/no-duplicates': 2
+  'import/first': 2
+  'import/newline-after-import': 2
+  'import/order':
+    [
+      2,
+      {
+        'newlines-between': 'always',
+        'alphabetize': { 'order': 'asc' },
+        'groups': ['builtin', 'external', 'internal', 'parent', 'sibling', 'index'],
+      },
+    ]
+
+overrides:
+  - files:
+      - ./**/*{.ts,.tsx}
+    rules:
+      'no-unused-vars': [2, { varsIgnorePattern: '^_', argsIgnorePattern: '^_', ignoreRestSiblings: true }]
+      'no-undef': 0
+      # TypeScript declare merge
+      'no-redeclare': 0
+      'no-useless-constructor': 0
+      'no-dupe-class-members': 0
+      'no-case-declarations': 0
+      'no-duplicate-imports': 0
+      # TypeScript Interface and Type
+      'no-use-before-define': 0
+
+      '@typescript-eslint/adjacent-overload-signatures': 2
+      '@typescript-eslint/await-thenable': 2
+      '@typescript-eslint/consistent-type-assertions': 2
+      '@typescript-eslint/ban-types':
+        [
+          'error',
+          {
+            'types':
+              {
+                'String': { 'message': 'Use string instead', 'fixWith': 'string' },
+                'Number': { 'message': 'Use number instead', 'fixWith': 'number' },
+                'Boolean': { 'message': 'Use boolean instead', 'fixWith': 'boolean' },
+                'Function': { 'message': 'Use explicit type instead' },
+              },
+          },
+        ]
+      '@typescript-eslint/explicit-member-accessibility':
+        [
+          'error',
+          {
+            accessibility: 'explicit',
+            overrides:
+              {
+                accessors: 'no-public',
+                constructors: 'no-public',
+                methods: 'no-public',
+                properties: 'no-public',
+                parameterProperties: 'explicit',
+              },
+          },
+        ]
+      '@typescript-eslint/method-signature-style': 2
+      '@typescript-eslint/no-floating-promises': 2
+      '@typescript-eslint/no-implied-eval': 2
+      '@typescript-eslint/no-for-in-array': 2
+      '@typescript-eslint/no-inferrable-types': 2
+      '@typescript-eslint/no-invalid-void-type': 2
+      '@typescript-eslint/no-misused-new': 2
+      '@typescript-eslint/no-misused-promises': 2
+      '@typescript-eslint/no-namespace': 2
+      '@typescript-eslint/no-non-null-asserted-optional-chain': 2
+      '@typescript-eslint/no-throw-literal': 2
+      '@typescript-eslint/no-unnecessary-boolean-literal-compare': 2
+      '@typescript-eslint/prefer-for-of': 2
+      '@typescript-eslint/prefer-nullish-coalescing': 2
+      '@typescript-eslint/switch-exhaustiveness-check': 2
+      '@typescript-eslint/prefer-optional-chain': 2
+      '@typescript-eslint/prefer-readonly': 2
+      '@typescript-eslint/prefer-string-starts-ends-with': 0
+      '@typescript-eslint/no-array-constructor': 2
+      '@typescript-eslint/require-await': 2
+      '@typescript-eslint/return-await': 2
+      '@typescript-eslint/ban-ts-comment':
+        [2, { 'ts-expect-error': false, 'ts-ignore': true, 'ts-nocheck': true, 'ts-check': false }]
+      '@typescript-eslint/naming-convention':
+        [
+          2,
+          {
+            selector: 'memberLike',
+            format: ['camelCase', 'PascalCase'],
+            modifiers: ['private'],
+            leadingUnderscore: 'forbid',
+          },
+        ]
+      '@typescript-eslint/no-unused-vars':
+        [2, { varsIgnorePattern: '^_', argsIgnorePattern: '^_', ignoreRestSiblings: true }]
+      '@typescript-eslint/member-ordering':
+        [
+          2,
+          {
+            default:
+              [
+                'public-static-field',
+                'protected-static-field',
+                'private-static-field',
+                'public-static-method',
+                'protected-static-method',
+                'private-static-method',
+                'public-instance-field',
+                'protected-instance-field',
+                'private-instance-field',
+                'public-constructor',
+                'protected-constructor',
+                'private-constructor',
+                'public-instance-method',
+                'protected-instance-method',
+                'private-instance-method',
+              ],
+          },
+        ]
--- a/bindings/node/.gitattributes
+++ b/bindings/node/.gitattributes
@ -0,0 +1,14 @@
+# Auto detect text files and perform LF normalization
+*        text=auto
+
+
+*.ts    text eol=lf merge=union 
+*.tsx   text eol=lf merge=union 
+*.rs    text eol=lf merge=union 
+*.js    text eol=lf merge=union 
+*.json  text eol=lf merge=union 
+*.debug text eol=lf merge=union 
+
+# Generated codes
+index.js linguist-detectable=false
+index.d.ts linguist-detectable=false 
--- a/bindings/node/.gitignore
+++ b/bindings/node/.gitignore
@ -1,12 +1,129 @@
-native/target
-native/index.node
-native/artifacts.json
-**/*~
-**/node_modules
-**/.DS_Store

+# Created by https://www.toptal.com/developers/gitignore/api/node
+# Edit at https://www.toptal.com/developers/gitignore?templates=node
+
+### Node ###
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# TypeScript v1 declaration files
+typings/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variables file
+.env
+.env.test
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+
+# Next.js build output
+.next
+
+# Nuxt.js build / generate output
+.nuxt
 dist
-build
-bin-package

-data
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# End of https://www.toptal.com/developers/gitignore/api/node
+
+
+#Added by cargo
+
+/target
+Cargo.lock
+
+*.node
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/sdks
+!.yarn/versions
--- a/bindings/node/.prettierignore
+++ b/bindings/node/.prettierignore
@ -0,0 +1,2 @@
+target
+.yarn
--- a/bindings/node/.prettierrc.json
+++ b/bindings/node/.prettierrc.json
@ -1,4 +0,0 @@
-{
-  "$schema": "http://json.schemastore.org/prettierrc",
-  "printWidth": 90
-}
--- a/bindings/node/.taplo.toml
+++ b/bindings/node/.taplo.toml
@ -0,0 +1,7 @@
+exclude = ["node_modules/**/*.toml"]
+
+# https://taplo.tamasfe.dev/configuration/formatter-options.html
+[formatting]
+align_entries = true
+indent_tables = true
+reorder_keys  = true
--- a/bindings/node/.yarn/releases/yarn-3.5.1.cjs
+++ b/bindings/node/.yarn/releases/yarn-3.5.1.cjs
--- a/bindings/node/.yarnrc.yml
+++ b/bindings/node/.yarnrc.yml
@ -0,0 +1,5 @@
+nodeLinker: node-modules
+
+npmAuditRegistry: 'https://registry.npmjs.org'
+
+yarnPath: .yarn/releases/yarn-3.5.1.cjs
--- a/bindings/node/CHANGELOG.md
+++ b/bindings/node/CHANGELOG.md
@ -1,190 +0,0 @@
-## [0.13.2] 
-
- Python only chnages.
-
-## [0.13.1] 
-
- [#1072] Fixing Roberta type ids.
-
-## [0.13.0] 
-
- [#1008] `Decoder` is now a composable trait, but without being backward incompatible
- [#1047, #1051, #1052] `Processor` is now a composable trait, but without being backward incompatible
-
-## [0.12.1] 
-
- [#938] **Reverted breaking change**. https://github.com/huggingface/transformers/issues/16520
-
-## [0.12.0] YANKED
-
-Bump minor version because of a breaking change.
-Using `0.12` to match other bindings.
-
- [#938] [REVERTED IN 0.12.1] **Breaking change**. Decoder trait is modified to be composable. This is only breaking if you are using decoders on their own. tokenizers should be error free.
- [#939] Making the regex in `ByteLevel` pre_tokenizer optional (necessary for BigScience)
-
- [#952] Fixed the vocabulary size of UnigramTrainer output (to respect added tokens)
- [#954] Fixed not being able to save vocabularies with holes in vocab (ConvBert). Yell warnings instead, but stop panicking.
- [#961] Added link for Ruby port of `tokenizers`
-
-# [0.8.0](https://github.com/huggingface/tokenizers/compare/node-v0.7.0...node-v0.8.0) (2021-09-02)
-
-### BREACKING CHANGES
- Many improvements on the Trainer ([#519](https://github.com/huggingface/tokenizers/pull/519)).
-The files must now be provided first when calling `tokenizer.train(files, trainer)`.
-
-### Features
- Adding the `TemplateProcessing`
- Add `WordLevel` and `Unigram` models ([#490](https://github.com/huggingface/tokenizers/pull/490))
- Add `nmtNormalizer` and `precompiledNormalizer` normalizers ([#490](https://github.com/huggingface/tokenizers/pull/490))
- Add `templateProcessing` post-processor ([#490](https://github.com/huggingface/tokenizers/pull/490))
- Add `digitsPreTokenizer` pre-tokenizer ([#490](https://github.com/huggingface/tokenizers/pull/490))
- Add support for mapping to sequences ([#506](https://github.com/huggingface/tokenizers/pull/506))
- Add `splitPreTokenizer` pre-tokenizer ([#542](https://github.com/huggingface/tokenizers/pull/542))
- Add `behavior` option to the `punctuationPreTokenizer` ([#657](https://github.com/huggingface/tokenizers/pull/657))
- Add the ability to load tokenizers from the Hugging Face Hub using `fromPretrained` ([#780](https://github.com/huggingface/tokenizers/pull/780))
-
-### Fixes
- Fix a bug where long tokenizer.json files would be incorrectly deserialized ([#459](https://github.com/huggingface/tokenizers/pull/459))
- Fix RobertaProcessing deserialization in PostProcessorWrapper ([#464](https://github.com/huggingface/tokenizers/pull/464))
-
-# [0.7.0](https://github.com/huggingface/tokenizers/compare/node-v0.6.2...node-v0.7.0) (2020-07-01)
-
-### BREAKING CHANGES
-
- `robertaProcessing` now handles trimming the offsets (activated by default) ([#236](https://github.com/huggingface/tokenizers/pull/236))
- `charToTokenOffsets`, `charToWordOffsets` and `tokenToWordOffsets` helper functions on `Encoding` instances are removed and replaced by new `wordToTokens`, `wordToChars`, `tokenToChars`, `tokenToWord` and `charToWord` methods ([#234](https://github.com/huggingface/tokenizers/pull/234))
- `encode` and `encodeBatch` methods on a tokenizer now handle pre-tokenized inputs and have their signatures changed ([#249](https://github.com/huggingface/tokenizers/pull/249)). In addition:
-  - `encodeTokenized`, `encodeTokenizedBatch` methods are therefore removed
-  - `InputSequence`, `EncodeInput` and `EncodeOptions` types are added
- Improve management of the additional vocabulary ([#309](https://github.com/huggingface/tokenizers/pull/309)):
-  - New parameter `normalized` in `AddedToken` options, controlling whether a token should be extracted from the normalized version of the input text
-  - The `AddedToken` constructor now takes a `special` boolean as second parameter to indicate if the token is special (in this case it won't be normalized)
-
-### Features
-
- Serialization of a `Tokenizer` and all its parts (`PreTokenizer`, `Normalizer`, ...). This adds some methods to easily save/load an entire tokenizer: new static methods `fromString` / `fromFile`, and instance methods `save` / `toString` on `BaseTokenizer` ([#272](https://github.com/huggingface/tokenizers/pull/272))
- New `padToMultipleOf` parameter for `PaddingOptions`, to pad to a multiple of a specified value ([#289](https://github.com/huggingface/tokenizers/pull/289))
- Improved errors generated during truncation when the provided max length is too low ([02cc977](https://github.com/huggingface/tokenizers/commit/02cc97756ffb9193b5d6d8dfcdeb7bf08adf2516))
- Improve BPE training speeds, by reading files sequentially, but parallelizing the processing of each file ([#276](https://github.com/huggingface/tokenizers/pull/276))
- Use `onig` for byte-level pre-tokenization to remove all the differences with the original implementation from GPT-2 ([#280](https://github.com/huggingface/tokenizers/pull/280))
-
-### Fixes
-
- Fix various crash when training a BPE model ([#286](https://github.com/huggingface/tokenizers/pull/286))
- Fix a few bugs related to additional vocabulary/tokens ([#309](https://github.com/huggingface/tokenizers/pull/309))
-
-## [0.6.2](https://github.com/huggingface/tokenizers/compare/node-v0.6.1...node-v0.6.2) (2020-04-13)
-
-### Features
-
- More symbols exposed: `Token`, `BaseTokenizer`, `PaddingConfiguration`, `TruncationConfiguration` ([38d53a7](https://github.com/huggingface/tokenizers/commit/38d53a7b84b2ee86b262eee2de6121351fe03889))
- Expose `setPostProcessor` in `BaseTokenizer` ([38d53a7](https://github.com/huggingface/tokenizers/commit/38d53a7b84b2ee86b262eee2de6121351fe03889))
-
-### Fixes
-
- Fix the word indexes when there are special tokens ([#226](https://github.com/huggingface/tokenizers/pull/226))
- Fix encoding overflowing offsets ([695ab83](https://github.com/huggingface/tokenizers/commit/695ab8388f5f1a7d63d8aaab9b3762312e0d5ac3))
- Fix Roberta overflowings ([c4ecc6f](https://github.com/huggingface/tokenizers/commit/c4ecc6f7ce7af40c558401a3ec9500732a17f9da))
-
-## [0.6.1](https://github.com/huggingface/tokenizers/compare/node-v0.6.0...node-v0.6.1) (2020-04-01)
-
-### Fixes
-
- Fix special tokens with wrong id ([b770f36](https://github.com/huggingface/tokenizers/commit/b770f364280af33efeffea8f0003102cda8cf1b7))
- Fix `AddedToken`'s `leftStrip` and `rightStrip` params (thanks @thirdwing) ([85488dd](https://github.com/huggingface/tokenizers/commit/85488dd6330ec7fa64aeb78c1a86b221f77c5ebb))
-
-# [0.6.0](https://github.com/huggingface/tokenizers/compare/node-v0.5.0...node-v0.6.0) (2020-03-30)
-
-### BREAKING CHANGES
-
- The `getOriginalString` method on `Encoding`s has been removed: this brings a reduction of 70% of the memory footprint. You can use the provided new `slice` function as a replacement to get a subpart of a string according to specified indexes while respecting unicode characters. ([#197](https://github.com/huggingface/tokenizers/pull/197))
- The offsets provided on `Encoding` are now relative to the original string, and not the normalized one anymore ([#197](https://github.com/huggingface/tokenizers/pull/197))
- The added tokens given to `addTokens`, `addSpecialTokens` or `train` methods of a tokenizer can now be instances of `AddedToken` to provide more control over these tokens. The support of the `[string, boolean]` format in `addTokens` method is removed. ([#202](https://github.com/huggingface/tokenizers/pull/202))
- The `addSpecialTokens` option for `BertWordpieceTokenizer` has been removed, and must now be passed to `encode` and `encodeBatch` functions ([7dd2400](https://github.com/huggingface/tokenizers/commit/7dd24002148a452f4d9fc55966e181c2dc699203)) ([#193](https://github.com/huggingface/tokenizers/pull/193))
-
-### Features
-
- `encode` and `encodeBatch` methods on `BaseTokenizer` now take a new optional argument, specifying whether to add the special tokens (activated by default) ([#193](https://github.com/huggingface/tokenizers/pull/193))
- Methods `decode` and `decodeBatch` exposed in `BaseTokenizer` instances ([#184](https://github.com/huggingface/tokenizers/pull/184))
- The `fromFiles` methods for `BPE` and `WordPiece` models are now `async` ([#184](https://github.com/huggingface/tokenizers/pull/184))
- Big improvements in speed for BPE (both training and tokenization) ([#165](https://github.com/huggingface/tokenizers/pull/165))
- `ByteLevel` is also a `PostProcessor` now and handles trimming the offsets if activated. This avoids the unintuitive inclusion of the whitespaces in the produced offsets, even if these whitespaces are part of the actual token. It has been added to `ByteLevelBPETokenizer` but it is off by default. ([#188](https://github.com/huggingface/tokenizers/pull/188))
- New `postProcess`, `encodeTokenized`, `encodeTokenizedBatch` and `normalize` methods on `BaseTokenizer` ([#200](https://github.com/huggingface/tokenizers/pull/200)) ([2aeae55](https://github.com/huggingface/tokenizers/commit/2aeae555e22ac58b11b4956aa3f601bb168e8c3f))
- New `mergeEncodings` static method on `Encoding` class ([#200](https://github.com/huggingface/tokenizers/pull/200)) ([0408567](https://github.com/huggingface/tokenizers/commit/0408567f23d938952f45192a3eff54d48f828882))
- New `wordIndexes` getter and new `charToToken`, `charToTokenOffsets`, `charToWordOffsets` and `tokenToWordOffsets` helper functions on `Encoding` instances ([#200](https://github.com/huggingface/tokenizers/pull/200)) ([ce3cf78](https://github.com/huggingface/tokenizers/commit/ce3cf78ea5423d483895f51f77ff0c7df07f9b0a))
-
-### Fixes
-
- Fix `longest_first` truncation strategy ([#174](https://github.com/huggingface/tokenizers/issues/174))
- Fix options names in `BPE.fromFiles` ([306f427](https://github.com/huggingface/tokenizers/commit/35540d2e0715e88299f8f04f842e23b5a306f427))
- Actually expose `save` method in `Model` ([ddcf8e8](https://github.com/huggingface/tokenizers/commit/3d143a911bde8d15e1431156fe3cf7676ddcf8e8))
- The errors in async functions are now typed ([7aa6c13](https://github.com/huggingface/tokenizers/commit/4510ea5ce37d84754bb782a99353ac5627aa6c13))
- Trim the decoded string in `bpeDecoder` used by `BPETokenizer` ([#205](https://github.com/huggingface/tokenizers/issues/205)) ([3f4a6b7](https://github.com/huggingface/tokenizers/commit/3f4a6b746b921f339de3279d073b29e019ee2e5a))
-
-# [0.5.0](https://github.com/huggingface/tokenizers/compare/node-v0.4.1...node-v0.5.0) (2020-02-27)
-
-### BREAKING CHANGES
-
- The `Encoding` object now exposes getters instead of `get...` methods (except for `getOriginalString`) ([9179968](https://github.com/huggingface/tokenizers/commit/917996841df2b3385e0212c9d7e9910d4e0d3fbf))
- `BertWordPieceTokenizer` now cleans up some tokenization artifacts by default while decoding ([#145](https://github.com/huggingface/tokenizers/issues/145)) ([#147](https://github.com/huggingface/tokenizers/pull/147))
-
-### Features
-
- `Encoding` exposes a new `length` property ([9179968](https://github.com/huggingface/tokenizers/commit/917996841df2b3385e0212c9d7e9910d4e0d3fbf))
- Add a new `stripNormalizer` ([#140](https://github.com/huggingface/tokenizers/pull/140)) ([815d743](https://github.com/huggingface/tokenizers/commit/815d743461f9067ab38237862b7be8114d422300))
- `ByteLevelBPETokenizer` and `BPETokenizer` accept more options ([946ac1a](https://github.com/huggingface/tokenizers/commit/946ac1a9517c3090064e9a972ad71a5cf25b7e7f))
- Add `save` method to `Model` class ([aebc97e](https://github.com/huggingface/tokenizers/commit/aebc97eaf34260c9ed7689dd5e087bf8c8af59fc))
- Improved padding performances ([b30be3b](https://github.com/huggingface/tokenizers/commit/b30be3b2bda977b65f9bdb384258829b2bd91e3d)) ([0dc857e](https://github.com/huggingface/tokenizers/commit/0dc857ea8c557532a52628a6bc80141e65e6d974))
-
-### Fixes
-
- Methods accepting optional arguments now handle explicit `undefined` correctly ([0fe22a7](https://github.com/huggingface/tokenizers/commit/0fe22a7c1c23f8d992f502a3a582e5212b8281ac))
- Special tokens are now declared only if present in the vocabulary ([b70283c](https://github.com/huggingface/tokenizers/commit/b70283c3050056958e8ba020b0386451cc6df80c))
- Add missing mask/padding special tokens in wordpiece tokenizer ([b70283c](https://github.com/huggingface/tokenizers/commit/b70283c3050056958e8ba020b0386451cc6df80c))
- Fix a bug in `ByteLevelBPETokenizer` that caused offsets to be wrong if a char got split up in multiple bytes ([#156](https://github.com/huggingface/tokenizers/pull/156))
-
-## [0.4.1](https://github.com/huggingface/tokenizers/compare/node-v0.4.0...node-v0.4.1) (2020-02-11)
-
-### Fixes
-
- Fix punctuation in BertWordPieceTokenizer (Thanks to @Mansterteddy with [#134](https://github.com/huggingface/tokenizers/pull/134))
-
-# [0.4.0](https://github.com/huggingface/tokenizers/compare/node-v0.3.1...node-v0.4.0) (2020-02-05)
-
-### BREAKING CHANGES
-
- `getOverflowing()` method on `Encoding` now returns all the overflowing `Encoding`s at once ([#77](https://github.com/huggingface/tokenizers/pull/77)) ([0094393](https://github.com/huggingface/tokenizers/commit/0094393610623bafc269790cd1be81fd1474583a))
-
-### Features
-
- Add `setTruncation`, `disableTruncation`, `setPadding` and `disablePadding` methods in `Tokenizer` and `BaseTokenizer` ([#109](https://github.com/huggingface/tokenizers/pull/109)) ([78e2690](https://github.com/huggingface/tokenizers/commit/78e26905a735e14e67590cb09ddb42ed141c455b))
- Expose tokenizer / truncation / padding configuration in `BaseTokenizer` ([#126](https://github.com/huggingface/tokenizers/pull/126)) ([cb8585b](https://github.com/huggingface/tokenizers/commit/cb8585bc4eb8037c52049da677e4791857231f03))
- Expose `addTokens`, `addSpecialTokens`, `idToToken` and `tokenToId` in `BaseTokenizer` ([7051480](https://github.com/huggingface/tokenizers/commit/7051480c333f88bef80aa6846b66032a2d47383c))
- Add `getOriginalString()` method on `Encoding` ([a14c633](https://github.com/huggingface/tokenizers/commit/a14c63343b217a2c501359bec52baf717e3a05ef))
- Add `charDelimiterSplitPreTokenizer`: a new `PreTokenizer` that allows splitting sequences on the given delimiter (works like `.split(delimiter)`) ([#114](https://github.com/huggingface/tokenizers/pull/114)) ([6165910](https://github.com/huggingface/tokenizers/commit/6165910ca66b6bfd9fd996aa38c4c0b2b6505953))
- Add `robertaProcessing` as a new `PostProcessor` ([#111](https://github.com/huggingface/tokenizers/pull/111)) ([6524f09](https://github.com/huggingface/tokenizers/commit/6524f09e991c3a52c839d8eb01bfa41e81fde1d1))
-
-### Fixes
-
- Correctly truncate with `OnlyFirst` and `OnlySecond` strategies ([#108](https://github.com/huggingface/tokenizers/issues/108)) ([6d532fe](https://github.com/huggingface/tokenizers/commit/6d532fedb1d3626328828304a5c39807733d2fa1))
- Fix default special tokens in `BertWordPieceTokenizer` ([10e2d28](https://github.com/huggingface/tokenizers/commit/10e2d286caf517f0977c04cf8e1924aed90403c9))
- Fix return type of `getSpecialTokensMask` on `Encoding` ([9770be5](https://github.com/huggingface/tokenizers/commit/9770be566175dc9c44dd7dcaa00a57d0e4ca632b))
- Actually add special tokens in tokenizers implementations ([acef252](https://github.com/huggingface/tokenizers/commit/acef252dacc43adc414175cfc325668ad1488753))
-
-
-[#1072]: https://github.com/huggingface/tokenizers/pull/1072
-[#956]: https://github.com/huggingface/tokenizers/pull/956
-[#1008]: https://github.com/huggingface/tokenizers/pull/1008
-[#1009]: https://github.com/huggingface/tokenizers/pull/1009
-[#1047]: https://github.com/huggingface/tokenizers/pull/1047
-[#1055]: https://github.com/huggingface/tokenizers/pull/1055
-[#1051]: https://github.com/huggingface/tokenizers/pull/1051
-[#1052]: https://github.com/huggingface/tokenizers/pull/1052
-[#938]: https://github.com/huggingface/tokenizers/pull/938
-[#939]: https://github.com/huggingface/tokenizers/pull/939
-[#952]: https://github.com/huggingface/tokenizers/pull/952
-[#954]: https://github.com/huggingface/tokenizers/pull/954
-[#962]: https://github.com/huggingface/tokenizers/pull/962
-[#961]: https://github.com/huggingface/tokenizers/pull/961
-[#960]: https://github.com/huggingface/tokenizers/pull/960
--- a/bindings/node/Cargo.toml
+++ b/bindings/node/Cargo.toml
@ -0,0 +1,22 @@
+[package]
+authors = ["Nicolas Patry <nicolas@huggingface.co>"]
+edition = "2021"
+name    = "node"
+version = "0.14.0-dev.0"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+napi        = "2"
+napi-derive = "2"
+serde       = { version = "1.0.163", features = ["derive"] }
+tokenizers  = { path = "../../tokenizers/" }
+
+[build-dependencies]
+napi-build = "2"
+
+[profile.release]
+lto = true
--- a/bindings/node/LICENSE
+++ b/bindings/node/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 N-API for Rust
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/bindings/node/README.md
+++ b/bindings/node/README.md
@ -38,29 +38,22 @@ npm install tokenizers@latest
 ## Basic example

 ```ts
-import { BertWordPieceTokenizer } from "tokenizers";
+import { Tokenizer } from "tokenizers";

-const wordPieceTokenizer = await BertWordPieceTokenizer.fromOptions({ vocabFile: "./vocab.txt" });
-const wpEncoded = await wordPieceTokenizer.encode("Who is John?", "John is a teacher");
+const tokenizer = await Tokenizer.fromFile("tokenizer.json");
+const wpEncoded = await tokenizer.encode("Who is John?");

-console.log(wpEncoded.length);
-console.log(wpEncoded.tokens);
-console.log(wpEncoded.ids);
-console.log(wpEncoded.attentionMask);
-console.log(wpEncoded.offsets);
-console.log(wpEncoded.overflowing);
-console.log(wpEncoded.specialTokensMask);
-console.log(wpEncoded.typeIds);
-console.log(wpEncoded.wordIndexes);
+console.log(wpEncoded.getLength());
+console.log(wpEncoded.getTokens());
+console.log(wpEncoded.getIds());
+console.log(wpEncoded.getAttentionMask());
+console.log(wpEncoded.getOffsets());
+console.log(wpEncoded.getOverflowing());
+console.log(wpEncoded.getSpecialTokensMask());
+console.log(wpEncoded.getTypeIds());
+console.log(wpEncoded.getWordIds());
 ```

-## Provided Tokenizers
-
- - `BPETokenizer`: The original BPE
- - `ByteLevelBPETokenizer`: The byte level version of the BPE
- - `SentencePieceBPETokenizer`: A BPE implementation compatible with the one used by SentencePiece
- - `BertWordPieceTokenizer`: The famous Bert tokenizer, using WordPiece
-
 ## License

 [Apache License 2.0](../../LICENSE)
--- a/bindings/node/build.js
+++ b/bindings/node/build.js
@ -1,141 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * Inspired by https://github.com/IronCoreLabs/recrypt-node-binding
- * ==================================
- *
- * This script is responsible for compiling and building the NPM release bundle for this repo. The following steps are taken:
- *
- * + Clean up any existing Rust builds by running `cargo clean`.
- * + Run `cargo update` to make sure all dependencies are available.
- * + Compile rust code into index.node file.
- * + Run unit tests to ensure the library is in good shape for publishing.
- * + Move all expected content into a `dist` directory.
- * + Generate a binary distribution in `bin-package`.
- * + Do a dry run of npm publishing via irish-pub or perform an actual publish step if `--publish` option is provided.
- */
-
-const fs = require("fs");
-const path = require("path");
-const shell = require("shelljs");
-
-const distPath = "./dist";
-
-// Fail this script if any of these commands fail
-shell.set("-e");
-
-// Ensure that our directory is set to the root of the repo
-const rootDirectory = path.dirname(process.argv[1]);
-shell.cd(rootDirectory);
-
-run()
-  // Prevent "unhandledRejection" events, allowing to actually exit with error
-  .catch(() => process.exit(1));
-
-/***************************************/
-
-async function run() {
-  const arg = process.argv.slice(2)[0];
-  switch (arg) {
-    case "--all":
-      buildRust();
-      buildTs();
-      break;
-
-    case "--rust":
-      buildRust();
-      break;
-
-    case "--typescript":
-      buildTs();
-      break;
-
-    case "--package-rust":
-      buildRust();
-      await packageRust();
-      break;
-
-    case "--npm-publish":
-      buildTs();
-      npmPublish();
-      break;
-
-    default:
-      shell.echo("No arg provided, doing nothing...");
-      break;
-  }
-}
-
-function buildRust() {
-  shell.echo("BUILDING RUST...");
-
-  // Cleanup the previous build, if it exists
-  shell.rm("-rf", "./bin-package");
-  shell.rm("-rf", "./build");
-
-  // Cleanup any previous Rust builds, update deps, and compile
-  shell.exec("npm ci --ignore-scripts");
-  shell.exec("npm run clean-rs");
-  shell.pushd("./native");
-  shell.exec("cargo update");
-  shell.popd();
-  shell.exec("npm run compile");
-
-  shell.echo("BUILDING RUST COMPLETE...");
-}
-
-async function packageRust() {
-  shell.echo("PACKAGING RUST...");
-
-  shell.mkdir("./bin-package");
-  shell.cp("./native/index.node", "./bin-package");
-
-  shell.exec("npm run package");
-
-  const version = JSON.parse(await fs.promises.readFile("./package.json")).version;
-  const tarPath = `build/stage/${version}`;
-  const tgz = (await fs.promises.readdir(tarPath)).find(f => f.endsWith(".tar.gz"));
-
-  shell.cp(`${tarPath}/${tgz}`, "./bin-package/");
-
-  shell.echo("PACKAGING RUST COMPLETE...");
-}
-
-function buildTs() {
-  shell.echo("BUILDING TS...");
-
-  // Cleanup the previous build, if it exists
-  shell.rm("-rf", distPath);
-
-  shell.exec("npm ci --ignore-scripts");
-  shell.mkdir(distPath);
-  shell.exec("npx tsc -p tsconfig.prod.json");
-
-  shell.echo("BUILDING TS COMPLETE...");
-}
-
-async function npmPublish() {
-  shell.echo("PUBLISHING ON NPM...");
-
-  shell.cp("-ur", ["lib/bindings/**/*.{js,d.ts}"], `${distPath}/bindings/`);
-  shell.mv([`${distPath}/bindings/native.prod.js`], [`${distPath}/bindings/native.js`]);
-  // shell.rm("-r", [`${distPath}/**/*.test.ts`]); // No more remaining *.test.ts files for now at this step
-
-  shell.cp("-r", ["package.json", "README.md", "../../LICENSE"], distPath);
-
-  // Add a NPM install script to the package.json that we push to NPM so that when consumers pull it down it
-  // runs the expected node-pre-gyp step.
-  const npmPackageJson = require(`${distPath}/package.json`);
-  npmPackageJson.scripts.install = "node-pre-gyp install";
-  npmPackageJson.main = "./index.js";
-  npmPackageJson.types = "./index.d.ts";
-
-  await fs.promises.writeFile(
-    `${distPath}/package.json`,
-    JSON.stringify(npmPackageJson, null, 2)
-  );
-
-  shell.exec(`npm publish ${distPath} --access public`);
-
-  shell.echo("PUBLISHING ON NPM COMPLETE...");
-}
--- a/bindings/node/build.rs
+++ b/bindings/node/build.rs
@ -0,0 +1,5 @@
+extern crate napi_build;
+
+fn main() {
+  napi_build::setup();
+}
--- a/bindings/node/examples/documentation/pipeline.test.ts
+++ b/bindings/node/examples/documentation/pipeline.test.ts
@ -4,9 +4,9 @@ var globRequire = require;
 describe("pipelineExample", () => {
    // This is a hack to let us require using path similar to what the user has to use
    function require(mod: string) {
-        if (mod.startsWith("tokenizers/")) {
-            let path = mod.slice("tokenizers/".length);
-            return globRequire("../../lib/" + path);
+        if (mod.startsWith("tokenizers")) {
+            // let path = mod.slice("tokenizers".length);
+            return globRequire("../../");
        } else {
            return globRequire(mod);
        }
@ -17,12 +17,12 @@ describe("pipelineExample", () => {

    it("shows pipeline parts", async () => {
        // START reload_tokenizer
-        let { Tokenizer } = require("tokenizers/bindings/tokenizer");
+        let { Tokenizer } = require("tokenizers");

        let tokenizer = Tokenizer.fromFile("data/tokenizer-wiki.json");
        // END reload_tokenizer
        // START setup_normalizer
-        let { sequenceNormalizer, nfdNormalizer, stripAccentsNormalizer } = require("tokenizers/bindings/normalizers");
+        let { sequenceNormalizer, nfdNormalizer, stripAccentsNormalizer } = require("tokenizers");

        let normalizer = sequenceNormalizer([nfdNormalizer(), stripAccentsNormalizer()]);
        // END setup_normalizer
@ -35,7 +35,7 @@ describe("pipelineExample", () => {
        tokenizer.setNormalizer(normalizer)
        // END replace_normalizer
        // START setup_pre_tokenizer
-        let { whitespacePreTokenizer } = require("tokenizers/bindings/pre-tokenizers");
+        let { whitespacePreTokenizer } = require("tokenizers");

        var preTokenizer = whitespacePreTokenizer();
        var preTokenized = preTokenizer.preTokenizeString("Hello! How are you? I'm fine, thank you.");
@ -57,7 +57,7 @@ describe("pipelineExample", () => {
            [".", [39, 40]]
        ]);
        // START combine_pre_tokenizer
-        let { sequencePreTokenizer, digitsPreTokenizer } = require("tokenizers/bindings/pre-tokenizers");
+        let { sequencePreTokenizer, digitsPreTokenizer } = require("tokenizers");

        var preTokenizer = sequencePreTokenizer([whitespacePreTokenizer(), digitsPreTokenizer(true)]);
        var preTokenized = preTokenizer.preTokenizeString("Call 911!");
@ -66,7 +66,7 @@ describe("pipelineExample", () => {
        tokenizer.setPreTokenizer(preTokenizer)
        // END replace_pre_tokenizer
        // START setup_processor
-        let { templateProcessing } = require("tokenizers/bindings/post-processors");
+        let { templateProcessing } = require("tokenizers");

        tokenizer.setPostProcessor(templateProcessing(
            "[CLS] $A [SEP]",
@ -75,15 +75,11 @@ describe("pipelineExample", () => {
        ));
        // END setup_processor
        // START test_decoding
-        let { promisify } = require('util');
-        let encode = promisify(tokenizer.encode.bind(tokenizer));
-        let decode = promisify(tokenizer.decode.bind(tokenizer));
-
-        let output = await encode("Hello, y'all! How are you 😁 ?");
+        let output = await tokenizer.encode("Hello, y'all! How are you 😁 ?");
        console.log(output.getIds());
        // [1, 27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35, 2]

-        let decoded = await decode([1, 27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35, 2], true);
+        let decoded = await tokenizer.decode([1, 27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35, 2], true);
        // "Hello , y ' all ! How are you ?"
        // END test_decoding
        expect(decoded).toEqual("Hello , y ' all ! How are you ?");
@ -91,26 +87,26 @@ describe("pipelineExample", () => {

    it.skip("trains the tokenizer", async () => {
        // START bert_setup_tokenizer
-        let { Tokenizer } = require("tokenizers/bindings/tokenizer");
-        let { WordPiece } = require("tokenizers/bindings/models");
+        let { Tokenizer } = require("tokenizers");
+        let { WordPiece } = require("tokenizers");

        let bertTokenizer = new Tokenizer(WordPiece.init({}, { unkToken: "[UNK]" }));
        // END bert_setup_tokenizer
        // START bert_setup_normalizer
        let { sequenceNormalizer, lowercaseNormalizer, nfdNormalizer, stripAccentsNormalizer }
-            = require("tokenizers/bindings/normalizers");
+            = require("tokenizers");

        bertTokenizer.setNormalizer(sequenceNormalizer([
            nfdNormalizer(), lowercaseNormalizer(), stripAccentsNormalizer()
        ]))
        // END bert_setup_normalizer
        // START bert_setup_pre_tokenizer
-        let { whitespacePreTokenizer } = require("tokenizers/bindings/pre-tokenizers");
+        let { whitespacePreTokenizer } = require("tokenizers");

        bertTokenizer.setPreTokenizer(whitespacePreTokenizer());
        // END bert_setup_pre_tokenizer
        // START bert_setup_processor
-        let { templateProcessing } = require("tokenizers/bindings/post-processors");
+        let { templateProcessing } = require("tokenizers");

        bertTokenizer.setPostProcessor(templateProcessing(
            "[CLS] $A [SEP]",
@ -119,7 +115,7 @@ describe("pipelineExample", () => {
        ));
        // END bert_setup_processor
        // START bert_train_tokenizer
-        let { wordPieceTrainer } = require("tokenizers/bindings/trainers");
+        let { wordPieceTrainer } = require("tokenizers");

        let trainer = wordPieceTrainer({
            vocabSize: 30522,
@ -133,26 +129,23 @@ describe("pipelineExample", () => {
    });

    it("shows a full bert example", async () => {
-        let { Tokenizer } = require("tokenizers/bindings/tokenizer");
+        let { Tokenizer } = require("tokenizers");
        let bertTokenizer = await Tokenizer.fromFile("data/bert-wiki.json")

        // START bert_test_decoding
-        let { promisify } = require("util");
-        let encode = promisify(bertTokenizer.encode.bind(bertTokenizer));
-        let decode = promisify(bertTokenizer.decode.bind(bertTokenizer));

-        let output = await encode("Welcome to the 🤗 Tokenizers library.");
+        let output = await bertTokenizer.encode("Welcome to the 🤗 Tokenizers library.");
        console.log(output.getTokens());
        // ["[CLS]", "welcome", "to", "the", "[UNK]", "tok", "##eni", "##zer", "##s", "library", ".", "[SEP]"]

-        var decoded = await decode(output.getIds(), true);
+        var decoded = await bertTokenizer.decode(output.getIds(), true);
        // "welcome to the tok ##eni ##zer ##s library ."
        // END bert_test_decoding
        expect(decoded).toEqual("welcome to the tok ##eni ##zer ##s library .");
        // START bert_proper_decoding
-        let { wordPieceDecoder } = require("tokenizers/bindings/decoders");
+        let { wordPieceDecoder } = require("tokenizers");
        bertTokenizer.setDecoder(wordPieceDecoder());
-        var decoded = await decode(output.getIds(), true);
+        var decoded = await bertTokenizer.decode(output.getIds(), true);
        // "welcome to the tokenizers library."
        // END bert_proper_decoding
        expect(decoded).toEqual("welcome to the tokenizers library.");
--- a/bindings/node/examples/documentation/quicktour.test.ts
+++ b/bindings/node/examples/documentation/quicktour.test.ts
@ -1,182 +1,163 @@
 /* eslint-disable */
-var globRequire = require;
+var globRequire = require

-describe("quicktourExample", () => {
+console.log = (..._args: any[]) => {}
+
+describe('quicktourExample', () => {
  function require(mod: string) {
-        if (mod.startsWith("tokenizers/")) {
-            let path = mod.slice("tokenizers/".length);
-            return globRequire("../../lib/" + path);
+    if (mod.startsWith('tokenizers')) {
+      return globRequire('../../')
    } else {
-            return globRequire(mod);
+      return globRequire(mod)
    }
  }

-    it.skip("trains the tokenizer", async () => {
+  it.skip('trains the tokenizer', async () => {
    // START init_tokenizer
-        let { Tokenizer } = require("tokenizers/bindings/tokenizer");
-        let { BPE } = require("tokenizers/bindings/models");
+    let { Tokenizer } = require('tokenizers')
+    let { BPE } = require('tokenizers')

-        let tokenizer = new Tokenizer(BPE.init({}, [], { unkToken: "[UNK]" }));
+    let tokenizer = new Tokenizer(BPE.init({}, [], { unkToken: '[UNK]' }))
    // END init_tokenizer
    // START init_trainer
-        let { bpeTrainer } = require("tokenizers/bindings/trainers");
+    let { bpeTrainer } = require('tokenizers')

    let trainer = bpeTrainer({
-            specialTokens: ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
-        });
+      specialTokens: ['[UNK]', '[CLS]', '[SEP]', '[PAD]', '[MASK]'],
+    })
    // END init_trainer
    // START init_pretok
-        let { whitespacePreTokenizer } = require("tokenizers/bindings/pre-tokenizers");
+    let { whitespacePreTokenizer } = require('tokenizers')

-        tokenizer.setPreTokenizer(whitespacePreTokenizer());
+    tokenizer.setPreTokenizer(whitespacePreTokenizer())
    // END init_pretok
    // START train
-        let files = ["test", "train", "valid"].map(split => `data/wikitext-103-raw/wiki.${split}.raw`);
-        tokenizer.train(files, trainer);
+    let files = ['test', 'train', 'valid'].map((split) => `data/wikitext-103-raw/wiki.${split}.raw`)
+    tokenizer.train(files, trainer)
    // END train
    // START save
-        tokenizer.save("data/tokenizer-wiki.json");
+    tokenizer.save('data/tokenizer-wiki.json')
    // END save
-    });
+  })

-    it("shows a quicktour example", async () => {
-        let { Tokenizer } = require("tokenizers/bindings/tokenizer");
-        let console = {
-            log: (..._args: any[]) => {}
-        };
+  it('shows a quicktour example', async () => {
+    let { Tokenizer } = require('tokenizers')

    // START reload_tokenizer
-        let tokenizer = Tokenizer.fromFile("data/tokenizer-wiki.json");
+    let tokenizer = Tokenizer.fromFile('data/tokenizer-wiki.json')
    // END reload_tokenizer
    // START encode
-        let { promisify } = require('util');
-        let encode = promisify(tokenizer.encode.bind(tokenizer));

-        var output = await encode("Hello, y'all! How are you 😁 ?");
+    var output = await tokenizer.encode("Hello, y'all! How are you 😁 ?")
    // END encode
    // START print_tokens
-        console.log(output.getTokens());
+    console.log(output.getTokens())
    // ["Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?"]
    // END print_tokens
-        expect(output.getTokens()).toEqual([
-            "Hello",
-            ",",
-            "y",
-            "'",
-            "all",
-            "!",
-            "How",
-            "are",
-            "you",
-            "[UNK]",
-            "?",
-        ]);
+    expect(output.getTokens()).toEqual(['Hello', ',', 'y', "'", 'all', '!', 'How', 'are', 'you', '[UNK]', '?'])
    // START print_ids
-        console.log(output.getIds());
+    console.log(output.getIds())
    // [27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35]
    // END print_ids
-        expect(output.getIds()).toEqual([27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35]);
+    expect(output.getIds()).toEqual([27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35])
    // START print_offsets
-        let offsets = output.getOffsets();
-        console.log(offsets[9]);
+    let offsets = output.getOffsets()
+    console.log(offsets[9])
    // (26, 27)
    // END print_offsets
-        expect(offsets[9]).toEqual([26, 27]);
+    expect(offsets[9]).toEqual([26, 27])
    // START use_offsets
-        let { slice } = require("tokenizers/bindings/utils");
+    let { slice } = require('tokenizers')

    let sentence = "Hello, y'all! How are you 😁 ?"
-        let [start, end] = offsets[9];
-        console.log(slice(sentence, start, end));
+    let [start, end] = offsets[9]
+    console.log(slice(sentence, start, end))
    // "😁"
    // END use_offsets
-        expect(slice(sentence, start, end)).toEqual("😁");
+    expect(slice(sentence, start, end)).toEqual('😁')
    // START check_sep
-        console.log(tokenizer.tokenToId("[SEP]"));
+    console.log(tokenizer.tokenToId('[SEP]'))
    // 2
    // END check_sep
-        expect(tokenizer.tokenToId("[SEP]")).toEqual(2);
+    expect(tokenizer.tokenToId('[SEP]')).toEqual(2)
    // START init_template_processing
-        let { templateProcessing } = require("tokenizers/bindings/post-processors");
+    let { templateProcessing } = require('tokenizers')

-        tokenizer.setPostProcessor(templateProcessing(
-            "[CLS] $A [SEP]",
-            "[CLS] $A [SEP] $B:1 [SEP]:1",
-            [
-                ["[CLS]", tokenizer.tokenToId("[CLS]")],
-                ["[SEP]", tokenizer.tokenToId("[SEP]")],
-            ],
-        ));
+    tokenizer.setPostProcessor(
+      templateProcessing('[CLS] $A [SEP]', '[CLS] $A [SEP] $B:1 [SEP]:1', [
+        ['[CLS]', tokenizer.tokenToId('[CLS]')],
+        ['[SEP]', tokenizer.tokenToId('[SEP]')],
+      ]),
+    )
    // END init_template_processing
    // START print_special_tokens
-        var output = await encode("Hello, y'all! How are you 😁 ?");
-        console.log(output.getTokens());
+    var output = await tokenizer.encode("Hello, y'all! How are you 😁 ?")
+    console.log(output.getTokens())
    // ["[CLS]", "Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?", "[SEP]"]
    // END print_special_tokens
    expect(output.getTokens()).toEqual([
-            "[CLS]",
-            "Hello",
-            ",",
-            "y",
+      '[CLS]',
+      'Hello',
+      ',',
+      'y',
      "'",
-            "all",
-            "!",
-            "How",
-            "are",
-            "you",
-            "[UNK]",
-            "?",
-            "[SEP]",
-        ]);
+      'all',
+      '!',
+      'How',
+      'are',
+      'you',
+      '[UNK]',
+      '?',
+      '[SEP]',
+    ])
    // START print_special_tokens_pair
-        var output = await encode("Hello, y'all!", "How are you 😁 ?");
-        console.log(output.getTokens());
+    var output = await tokenizer.encode("Hello, y'all!", 'How are you 😁 ?')
+    console.log(output.getTokens())
    // ["[CLS]", "Hello", ",", "y", "'", "all", "!", "[SEP]", "How", "are", "you", "[UNK]", "?", "[SEP]"]
    // END print_special_tokens_pair
    expect(output.getTokens()).toEqual([
-            "[CLS]",
-            "Hello",
-            ",",
-            "y",
+      '[CLS]',
+      'Hello',
+      ',',
+      'y',
      "'",
-            "all",
-            "!",
-            "[SEP]",
-            "How",
-            "are",
-            "you",
-            "[UNK]",
-            "?",
-            "[SEP]",
-        ]);
+      'all',
+      '!',
+      '[SEP]',
+      'How',
+      'are',
+      'you',
+      '[UNK]',
+      '?',
+      '[SEP]',
+    ])
    // START print_type_ids
-        console.log(output.getTypeIds());
+    console.log(output.getTypeIds())
    // [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
    // END print_type_ids
-        expect(output.getTypeIds()).toEqual([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]);
+    expect(output.getTypeIds()).toEqual([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
    // START encode_batch
-        let encodeBatch = promisify(tokenizer.encodeBatch.bind(tokenizer));

-        var output = await encodeBatch(["Hello, y'all!", "How are you 😁 ?"]);
+    var output = await tokenizer.encodeBatch(["Hello, y'all!", 'How are you 😁 ?'])
    // END encode_batch
    // START encode_batch_pair
-        var output = await encodeBatch(
-            [["Hello, y'all!", "How are you 😁 ?"], ["Hello to you too!", "I'm fine, thank you!"]]
-        );
+    // var output = await tokenizer.encodeBatch(
+    //     [["Hello, y'all!", "How are you 😁 ?"], ["Hello to you too!", "I'm fine, thank you!"]]
+    // );
    // END encode_batch_pair
    // START enable_padding
-        tokenizer.setPadding({ padId: 3, padToken: "[PAD]" });
+    tokenizer.setPadding({ padId: 3, padToken: '[PAD]' })
    // END enable_padding
    // START print_batch_tokens
-        var output = await encodeBatch(["Hello, y'all!", "How are you 😁 ?"]);
-        console.log(output[1].getTokens());
+    var output = await tokenizer.encodeBatch(["Hello, y'all!", 'How are you 😁 ?'])
+    console.log(output[1].getTokens())
    // ["[CLS]", "How", "are", "you", "[UNK]", "?", "[SEP]", "[PAD]"]
    // END print_batch_tokens
-        expect(output[1].getTokens()).toEqual(["[CLS]", "How", "are", "you", "[UNK]", "?", "[SEP]", "[PAD]"]);
+    expect(output[1].getTokens()).toEqual(['[CLS]', 'How', 'are', 'you', '[UNK]', '?', '[SEP]', '[PAD]'])
    // START print_attention_mask
-        console.log(output[1].getAttentionMask());
+    console.log(output[1].getAttentionMask())
    // [1, 1, 1, 1, 1, 1, 1, 0]
    // END print_attention_mask
-        expect(output[1].getAttentionMask()).toEqual([1, 1, 1, 1, 1, 1, 1, 0]);
-    });
-});
+    expect(output[1].getAttentionMask()).toEqual([1, 1, 1, 1, 1, 1, 1, 0])
+  })
+})
--- a/bindings/node/index.d.ts
+++ b/bindings/node/index.d.ts
@ -0,0 +1,254 @@
+/* tslint:disable */
+/* eslint-disable */
+
+/* auto-generated by NAPI-RS */
+
+export function bpeDecoder(suffix?: string | undefined | null): Decoder
+export function byteFallbackDecoder(): Decoder
+export function ctcDecoder(
+  padToken?: string = '<pad>',
+  wordDelimiterToken?: string | undefined | null,
+  cleanup?: boolean | undefined | null,
+): Decoder
+export function fuseDecoder(): Decoder
+export function metaspaceDecoder(replacement?: string = '▁', addPrefixSpace?: bool = true): Decoder
+export function replaceDecoder(pattern: string, content: string): Decoder
+export function sequenceDecoder(decoders: Array<Decoder>): Decoder
+export function stripDecoder(content: string, left: number, right: number): Decoder
+export function wordPieceDecoder(prefix?: string = '##', cleanup?: bool = true): Decoder
+export const enum TruncationDirection {
+  Left = 'Left',
+  Right = 'Right',
+}
+export const enum TruncationStrategy {
+  LongestFirst = 'LongestFirst',
+  OnlyFirst = 'OnlyFirst',
+  OnlySecond = 'OnlySecond',
+}
+export interface BpeOptions {
+  cacheCapacity?: number
+  dropout?: number
+  unkToken?: string
+  continuingSubwordPrefix?: string
+  endOfWordSuffix?: string
+  fuseUnk?: boolean
+  byteFallback?: boolean
+}
+export interface WordPieceOptions {
+  unkToken?: string
+  continuingSubwordPrefix?: string
+  maxInputCharsPerWord?: number
+}
+export interface WordLevelOptions {
+  unkToken?: string
+}
+export interface UnigramOptions {
+  unkId?: number
+  byteFallback?: boolean
+}
+export function prependNormalizer(prepend: string): Normalizer
+export function stripAccentsNormalizer(): Normalizer
+export interface BertNormalizerOptions {
+  cleanText?: boolean
+  handleChineseChars?: boolean
+  stripAccents?: boolean
+  lowercase?: boolean
+}
+/**
+ * bert_normalizer(options?: {
+ *   cleanText?: bool = true,
+ *   handleChineseChars?: bool = true,
+ *   stripAccents?: bool = true,
+ *   lowercase?: bool = true
+ * })
+ */
+export function bertNormalizer(options?: BertNormalizerOptions | undefined | null): Normalizer
+export function nfdNormalizer(): Normalizer
+export function nfkdNormalizer(): Normalizer
+export function nfcNormalizer(): Normalizer
+export function nfkcNormalizer(): Normalizer
+export function stripNormalizer(left?: boolean | undefined | null, right?: boolean | undefined | null): Normalizer
+export function sequenceNormalizer(normalizers: Array<Normalizer>): Normalizer
+export function lowercase(): Normalizer
+export function replace(pattern: string, content: string): Normalizer
+export function nmt(): Normalizer
+export function precompiled(bytes: Array<number>): Normalizer
+export const enum JsSplitDelimiterBehavior {
+  Removed = 'Removed',
+  Isolated = 'Isolated',
+  MergedWithPrevious = 'MergedWithPrevious',
+  MergedWithNext = 'MergedWithNext',
+  Contiguous = 'Contiguous',
+}
+/** byte_level(addPrefixSpace: bool = true, useRegex: bool = true) */
+export function byteLevelPreTokenizer(
+  addPrefixSpace?: boolean | undefined | null,
+  useRegex?: boolean | undefined | null,
+): PreTokenizer
+export function byteLevelAlphabet(): Array<string>
+export function whitespacePreTokenizer(): PreTokenizer
+export function whitespaceSplitPreTokenizer(): PreTokenizer
+export function bertPreTokenizer(): PreTokenizer
+export function metaspacePreTokenizer(replacement?: string = '▁', addPrefixSpace?: bool = true): PreTokenizer
+export function splitPreTokenizer(pattern: string, behavior: string, invert?: boolean | undefined | null): PreTokenizer
+export function punctuationPreTokenizer(behavior?: string | undefined | null): PreTokenizer
+export function sequencePreTokenizer(preTokenizers: Array<PreTokenizer>): PreTokenizer
+export function charDelimiterSplit(delimiter: string): PreTokenizer
+export function digitsPreTokenizer(individualDigits?: boolean | undefined | null): PreTokenizer
+export function bertProcessing(sep: [string, number], cls: [string, number]): Processor
+export function robertaProcessing(
+  sep: [string, number],
+  cls: [string, number],
+  trimOffsets?: boolean | undefined | null,
+  addPrefixSpace?: boolean | undefined | null,
+): Processor
+export function byteLevelProcessing(trimOffsets?: boolean | undefined | null): Processor
+export function templateProcessing(
+  single: string,
+  pair?: string | undefined | null,
+  specialTokens?: Array<[string, number]> | undefined | null,
+): Processor
+export function sequenceProcessing(processors: Array<Processor>): Processor
+export const enum PaddingDirection {
+  Left = 0,
+  Right = 1,
+}
+export interface PaddingOptions {
+  maxLength?: number
+  direction?: string | PaddingDirection
+  padToMultipleOf?: number
+  padId?: number
+  padTypeId?: number
+  padToken?: string
+}
+export interface EncodeOptions {
+  isPretokenized?: boolean
+  addSpecialTokens?: boolean
+}
+export interface TruncationOptions {
+  maxLength?: number
+  strategy?: TruncationStrategy
+  direction?: string | TruncationDirection
+  stride?: number
+}
+export interface AddedTokenOptions {
+  singleWord?: boolean
+  leftStrip?: boolean
+  rightStrip?: boolean
+  normalized?: boolean
+}
+export interface JsFromPretrainedParameters {
+  revision?: string
+  authToken?: string
+}
+export function slice(s: string, beginIndex?: number | undefined | null, endIndex?: number | undefined | null): string
+export function mergeEncodings(encodings: Array<Encoding>, growingOffsets?: boolean | undefined | null): Encoding
+/** Decoder */
+export class Decoder {
+  decode(tokens: Array<string>): string
+}
+export type JsEncoding = Encoding
+export class Encoding {
+  constructor()
+  getLength(): number
+  getNSequences(): number
+  getIds(): Array<number>
+  getTypeIds(): Array<number>
+  getAttentionMask(): Array<number>
+  getSpecialTokensMask(): Array<number>
+  getTokens(): Array<string>
+  getOffsets(): Array<Array<number>>
+  getWordIds(): Array<number | undefined | null>
+  charToToken(pos: number, seqId?: number | undefined | null): number | null
+  charToWord(pos: number, seqId?: number | undefined | null): number | null
+  pad(length: number, options?: PaddingOptions | undefined | null): void
+  truncate(
+    length: number,
+    stride?: number | undefined | null,
+    direction?: string | TruncationDirection | undefined | null,
+  ): void
+  wordToTokens(word: number, seqId?: number | undefined | null): [number, number] | null | undefined
+  wordToChars(word: number, seqId?: number | undefined | null): [number, number] | null | undefined
+  tokenToChars(token: number): [number, [number, number]] | null | undefined
+  tokenToWord(token: number): number | null
+  getOverflowing(): Array<Encoding>
+  getSequenceIds(): Array<number | undefined | null>
+  tokenToSequence(token: number): number | null
+}
+export class Model { }
+export type Bpe = BPE
+export class BPE {
+  static empty(): Model
+  static init(vocab: Vocab, merges: Merges, options?: BpeOptions | undefined | null): Model
+  static fromFile(vocab: string, merges: string, options?: BpeOptions | undefined | null): Promise<Model>
+}
+export class WordPiece {
+  static init(vocab: Vocab, options?: WordPieceOptions | undefined | null): Model
+  static empty(): WordPiece
+  static fromFile(vocab: string, options?: WordPieceOptions | undefined | null): Promise<Model>
+}
+export class WordLevel {
+  static init(vocab: Vocab, options?: WordLevelOptions | undefined | null): Model
+  static empty(): WordLevel
+  static fromFile(vocab: string, options?: WordLevelOptions | undefined | null): Promise<Model>
+}
+export class Unigram {
+  static init(vocab: Array<[string, number]>, options?: UnigramOptions | undefined | null): Model
+  static empty(): Model
+}
+/** Normalizer */
+export class Normalizer {
+  normalizeString(sequence: string): string
+}
+/** PreTokenizers */
+export class PreTokenizer {
+  preTokenizeString(sequence: string): [string, [number, number]][]
+}
+export class Processor { }
+export class AddedToken {
+  constructor(token: string, isSpecial: boolean, options?: AddedTokenOptions | undefined | null)
+  getContent(): string
+}
+export class Tokenizer {
+  constructor(model: Model)
+  setPreTokenizer(preTokenizer: PreTokenizer): void
+  setDecoder(decoder: Decoder): void
+  setModel(model: Model): void
+  setPostProcessor(postProcessor: Processor): void
+  setNormalizer(normalizer: Normalizer): void
+  save(path: string, pretty?: boolean | undefined | null): void
+  addAddedTokens(tokens: Array<AddedToken>): number
+  addTokens(tokens: Array<string>): number
+  encode(
+    sentence: InputSequence,
+    pair?: InputSequence | null,
+    encodeOptions?: EncodeOptions | undefined | null,
+  ): Promise<JsEncoding>
+  encodeBatch(sentences: EncodeInput[], encodeOptions?: EncodeOptions | undefined | null): Promise<JsEncoding[]>
+  decode(ids: Array<number>, skipSpecialTokens: boolean): Promise<string>
+  decodeBatch(ids: Array<Array<number>>, skipSpecialTokens: boolean): Promise<string[]>
+  static fromString(s: string): Tokenizer
+  static fromFile(file: string): Tokenizer
+  // static fromPretrained(file: string, parameters?: JsFromPretrainedParameters | undefined | null): Tokenizer
+  addSpecialTokens(tokens: Array<string>): void
+  setTruncation(maxLength: number, options?: TruncationOptions | undefined | null): void
+  disableTruncation(): void
+  setPadding(options?: PaddingOptions | undefined | null): void
+  disablePadding(): void
+  getDecoder(): Decoder | null
+  getNormalizer(): Normalizer | null
+  getPreTokenizer(): PreTokenizer | null
+  getPostProcessor(): Processor | null
+  getVocab(withAddedTokens?: boolean | undefined | null): Record<string, number>
+  getVocabSize(withAddedTokens?: boolean | undefined | null): number
+  idToToken(id: number): string | null
+  tokenToId(token: string): number | null
+  train(files: Array<string>): void
+  runningTasks(): number
+  postProcess(
+    encoding: Encoding,
+    pair?: Encoding | undefined | null,
+    addSpecialTokens?: boolean | undefined | null,
+  ): Encoding
+}
+export class Trainer { }
--- a/bindings/node/index.js
+++ b/bindings/node/index.js
@ -0,0 +1,353 @@
+/* tslint:disable */
+/* eslint-disable */
+/* prettier-ignore */
+
+/* auto-generated by NAPI-RS */
+
+const { existsSync, readFileSync } = require('fs')
+const { join } = require('path')
+
+const { platform, arch } = process
+
+let nativeBinding = null
+let localFileExisted = false
+let loadError = null
+
+function isMusl() {
+  // For Node 10
+  if (!process.report || typeof process.report.getReport !== 'function') {
+    try {
+      const lddPath = require('child_process').execSync('which ldd').toString().trim()
+      return readFileSync(lddPath, 'utf8').includes('musl')
+    } catch (e) {
+      return true
+    }
+  } else {
+    const { glibcVersionRuntime } = process.report.getReport().header
+    return !glibcVersionRuntime
+  }
+}
+
+switch (platform) {
+  case 'android':
+    switch (arch) {
+      case 'arm64':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.android-arm64.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.android-arm64.node')
+          } else {
+            nativeBinding = require('tokenizers-android-arm64')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      case 'arm':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.android-arm-eabi.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.android-arm-eabi.node')
+          } else {
+            nativeBinding = require('tokenizers-android-arm-eabi')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      default:
+        throw new Error(`Unsupported architecture on Android ${arch}`)
+    }
+    break
+  case 'win32':
+    switch (arch) {
+      case 'x64':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.win32-x64-msvc.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.win32-x64-msvc.node')
+          } else {
+            nativeBinding = require('tokenizers-win32-x64-msvc')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      case 'ia32':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.win32-ia32-msvc.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.win32-ia32-msvc.node')
+          } else {
+            nativeBinding = require('tokenizers-win32-ia32-msvc')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      case 'arm64':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.win32-arm64-msvc.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.win32-arm64-msvc.node')
+          } else {
+            nativeBinding = require('tokenizers-win32-arm64-msvc')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      default:
+        throw new Error(`Unsupported architecture on Windows: ${arch}`)
+    }
+    break
+  case 'darwin':
+    localFileExisted = existsSync(join(__dirname, 'tokenizers.darwin-universal.node'))
+    try {
+      if (localFileExisted) {
+        nativeBinding = require('./tokenizers.darwin-universal.node')
+      } else {
+        nativeBinding = require('tokenizers-darwin-universal')
+      }
+      break
+    } catch {}
+    switch (arch) {
+      case 'x64':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.darwin-x64.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.darwin-x64.node')
+          } else {
+            nativeBinding = require('tokenizers-darwin-x64')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      case 'arm64':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.darwin-arm64.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.darwin-arm64.node')
+          } else {
+            nativeBinding = require('tokenizers-darwin-arm64')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      default:
+        throw new Error(`Unsupported architecture on macOS: ${arch}`)
+    }
+    break
+  case 'freebsd':
+    if (arch !== 'x64') {
+      throw new Error(`Unsupported architecture on FreeBSD: ${arch}`)
+    }
+    localFileExisted = existsSync(join(__dirname, 'tokenizers.freebsd-x64.node'))
+    try {
+      if (localFileExisted) {
+        nativeBinding = require('./tokenizers.freebsd-x64.node')
+      } else {
+        nativeBinding = require('tokenizers-freebsd-x64')
+      }
+    } catch (e) {
+      loadError = e
+    }
+    break
+  case 'linux':
+    switch (arch) {
+      case 'x64':
+        if (isMusl()) {
+          localFileExisted = existsSync(join(__dirname, 'tokenizers.linux-x64-musl.node'))
+          try {
+            if (localFileExisted) {
+              nativeBinding = require('./tokenizers.linux-x64-musl.node')
+            } else {
+              nativeBinding = require('tokenizers-linux-x64-musl')
+            }
+          } catch (e) {
+            loadError = e
+          }
+        } else {
+          localFileExisted = existsSync(join(__dirname, 'tokenizers.linux-x64-gnu.node'))
+          try {
+            if (localFileExisted) {
+              nativeBinding = require('./tokenizers.linux-x64-gnu.node')
+            } else {
+              nativeBinding = require('tokenizers-linux-x64-gnu')
+            }
+          } catch (e) {
+            loadError = e
+          }
+        }
+        break
+      case 'arm64':
+        if (isMusl()) {
+          localFileExisted = existsSync(join(__dirname, 'tokenizers.linux-arm64-musl.node'))
+          try {
+            if (localFileExisted) {
+              nativeBinding = require('./tokenizers.linux-arm64-musl.node')
+            } else {
+              nativeBinding = require('tokenizers-linux-arm64-musl')
+            }
+          } catch (e) {
+            loadError = e
+          }
+        } else {
+          localFileExisted = existsSync(join(__dirname, 'tokenizers.linux-arm64-gnu.node'))
+          try {
+            if (localFileExisted) {
+              nativeBinding = require('./tokenizers.linux-arm64-gnu.node')
+            } else {
+              nativeBinding = require('tokenizers-linux-arm64-gnu')
+            }
+          } catch (e) {
+            loadError = e
+          }
+        }
+        break
+      case 'arm':
+        localFileExisted = existsSync(join(__dirname, 'tokenizers.linux-arm-gnueabihf.node'))
+        try {
+          if (localFileExisted) {
+            nativeBinding = require('./tokenizers.linux-arm-gnueabihf.node')
+          } else {
+            nativeBinding = require('tokenizers-linux-arm-gnueabihf')
+          }
+        } catch (e) {
+          loadError = e
+        }
+        break
+      default:
+        throw new Error(`Unsupported architecture on Linux: ${arch}`)
+    }
+    break
+  default:
+    throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`)
+}
+
+if (!nativeBinding) {
+  if (loadError) {
+    throw loadError
+  }
+  throw new Error(`Failed to load native binding`)
+}
+
+const {
+  Decoder,
+  bpeDecoder,
+  byteFallbackDecoder,
+  ctcDecoder,
+  fuseDecoder,
+  metaspaceDecoder,
+  replaceDecoder,
+  sequenceDecoder,
+  stripDecoder,
+  wordPieceDecoder,
+  Encoding,
+  TruncationDirection,
+  TruncationStrategy,
+  Model,
+  BPE,
+  WordPiece,
+  WordLevel,
+  Unigram,
+  Normalizer,
+  prependNormalizer,
+  stripAccentsNormalizer,
+  bertNormalizer,
+  nfdNormalizer,
+  nfkdNormalizer,
+  nfcNormalizer,
+  nfkcNormalizer,
+  stripNormalizer,
+  sequenceNormalizer,
+  lowercase,
+  replace,
+  nmt,
+  precompiled,
+  JsSplitDelimiterBehavior,
+  PreTokenizer,
+  byteLevelPreTokenizer,
+  byteLevelAlphabet,
+  whitespacePreTokenizer,
+  whitespaceSplitPreTokenizer,
+  bertPreTokenizer,
+  metaspacePreTokenizer,
+  splitPreTokenizer,
+  punctuationPreTokenizer,
+  sequencePreTokenizer,
+  charDelimiterSplit,
+  digitsPreTokenizer,
+  Processor,
+  bertProcessing,
+  robertaProcessing,
+  byteLevelProcessing,
+  templateProcessing,
+  sequenceProcessing,
+  PaddingDirection,
+  AddedToken,
+  Tokenizer,
+  Trainer,
+  slice,
+  mergeEncodings,
+} = nativeBinding
+
+module.exports.Decoder = Decoder
+module.exports.bpeDecoder = bpeDecoder
+module.exports.byteFallbackDecoder = byteFallbackDecoder
+module.exports.ctcDecoder = ctcDecoder
+module.exports.fuseDecoder = fuseDecoder
+module.exports.metaspaceDecoder = metaspaceDecoder
+module.exports.replaceDecoder = replaceDecoder
+module.exports.sequenceDecoder = sequenceDecoder
+module.exports.stripDecoder = stripDecoder
+module.exports.wordPieceDecoder = wordPieceDecoder
+module.exports.Encoding = Encoding
+module.exports.TruncationDirection = TruncationDirection
+module.exports.TruncationStrategy = TruncationStrategy
+module.exports.Model = Model
+module.exports.BPE = BPE
+module.exports.WordPiece = WordPiece
+module.exports.WordLevel = WordLevel
+module.exports.Unigram = Unigram
+module.exports.Normalizer = Normalizer
+module.exports.prependNormalizer = prependNormalizer
+module.exports.stripAccentsNormalizer = stripAccentsNormalizer
+module.exports.bertNormalizer = bertNormalizer
+module.exports.nfdNormalizer = nfdNormalizer
+module.exports.nfkdNormalizer = nfkdNormalizer
+module.exports.nfcNormalizer = nfcNormalizer
+module.exports.nfkcNormalizer = nfkcNormalizer
+module.exports.stripNormalizer = stripNormalizer
+module.exports.sequenceNormalizer = sequenceNormalizer
+module.exports.lowercase = lowercase
+module.exports.replace = replace
+module.exports.nmt = nmt
+module.exports.precompiled = precompiled
+module.exports.JsSplitDelimiterBehavior = JsSplitDelimiterBehavior
+module.exports.PreTokenizer = PreTokenizer
+module.exports.byteLevelPreTokenizer = byteLevelPreTokenizer
+module.exports.byteLevelAlphabet = byteLevelAlphabet
+module.exports.whitespacePreTokenizer = whitespacePreTokenizer
+module.exports.whitespaceSplitPreTokenizer = whitespaceSplitPreTokenizer
+module.exports.bertPreTokenizer = bertPreTokenizer
+module.exports.metaspacePreTokenizer = metaspacePreTokenizer
+module.exports.splitPreTokenizer = splitPreTokenizer
+module.exports.punctuationPreTokenizer = punctuationPreTokenizer
+module.exports.sequencePreTokenizer = sequencePreTokenizer
+module.exports.charDelimiterSplit = charDelimiterSplit
+module.exports.digitsPreTokenizer = digitsPreTokenizer
+module.exports.Processor = Processor
+module.exports.bertProcessing = bertProcessing
+module.exports.robertaProcessing = robertaProcessing
+module.exports.byteLevelProcessing = byteLevelProcessing
+module.exports.templateProcessing = templateProcessing
+module.exports.sequenceProcessing = sequenceProcessing
+module.exports.PaddingDirection = PaddingDirection
+module.exports.AddedToken = AddedToken
+module.exports.Tokenizer = Tokenizer
+module.exports.Trainer = Trainer
+module.exports.slice = slice
+module.exports.mergeEncodings = mergeEncodings
--- a/bindings/node/jest.config.js
+++ b/bindings/node/jest.config.js
@ -92,7 +92,7 @@ module.exports = {
  // notifyMode: "failure-change",

  // A preset that is used as a base for Jest's configuration
-  preset: "ts-jest",
+  preset: 'ts-jest',

  // Run tests from one or more projects
  // projects: null,
@ -133,7 +133,7 @@ module.exports = {
  // snapshotSerializers: [],

  // The test environment that will be used for testing
-  testEnvironment: "node",
+  testEnvironment: 'node',

  // Options that will be passed to the testEnvironment
  // testEnvironmentOptions: {},
@ -148,10 +148,7 @@ module.exports = {
  // ],

  // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
-  testPathIgnorePatterns: [
-    "/node_modules/",
-    "/dist/"
-  ],
+  testPathIgnorePatterns: ['/node_modules/', '/dist/'],

  // The regexp pattern or array of patterns that Jest uses to detect test files
  // testRegex: [],
@ -183,13 +180,8 @@ module.exports = {
  // verbose: null,

  // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
-  watchPathIgnorePatterns: [
-    "<rootDir>/node_modules/",
-    "<rootDir>/native/",
-    "<rootDir>/dist/",
-    "<rootDir>/build/"
-  ],
+  watchPathIgnorePatterns: ['<rootDir>/node_modules/', '<rootDir>/native/', '<rootDir>/dist/', '<rootDir>/build/'],

  // Whether to use watchman for file crawling
  // watchman: true,
-};
+}
--- a/bindings/node/lib/bindings/decoders.d.ts
+++ b/bindings/node/lib/bindings/decoders.d.ts
@ -1,85 +0,0 @@
-/**
- * This class is not supposed to be instantiated directly. Instead, any implementation of
- * a Decoder will return an instance of this class when instantiated.
- */
-// eslint-disable-next-line @typescript-eslint/no-empty-interface
-interface Decoder {
-  decode(tokens: string[]): string;
-}
-
-/**
- * Instantiate a new ByteLevel Decoder
- */
-export function byteLevelDecoder(): Decoder;
-
-/**
- * Instantiate a new Replace Decoder
- * @param [pattern] The pattern to replace
- * @param [content] The replacement.
- */
-export function replaceDecoder(pattern: string, content: string): Decoder;
-
-/**
- * Instantiate a new WordPiece Decoder
- * @param [prefix='##'] The prefix to use for subwords that are not a beginning-of-word
- * @param [cleanup=true] Whether to cleanup some tokenization artifacts.
- * Mainly spaces before punctuation, and some abbreviated english forms.
- */
-export function wordPieceDecoder(prefix?: string, cleanup?: boolean): Decoder;
-
-/**
- * Instantiate a new ByteFallback Decoder
- * ByteFallback is a simple trick which converts tokens looking like `<0x61>`
- * to pure bytes, and attempts to make them into a string. If the tokens
- * cannot be decoded you will get <20> instead for each inconvertable byte token
- */
-export function byteFallbackDecoder(): Decoder;
-
-/**
- * Instantiate a new Fuse Decoder which fuses all tokens into one string
- */
-export function fuseDecoder(): Decoder;
-
-/**
- * Instantiate a new Strip Decoder
- * @param [content] The character to strip
- * @param [left] The number of chars to remove from the left of each token
- * @param [right] The number of chars to remove from the right of each token
- */
-export function stripDecoder(content: string, left: number, right: number): Decoder;
-
-/**
- * Instantiate a new Metaspace
- *
- * @param [replacement='▁'] The replacement character.
- * Must be exactly one character. By default we use the `▁` (U+2581) meta symbol (same as in SentencePiece).
- * @param [addPrefixSpace=true] Whether to add a space to the first word if there isn't already one.
- * This lets us treat `hello` exactly like `say hello`.
- */
-export function metaspaceDecoder(replacement?: string, addPrefixSpace?: boolean): Decoder;
-
-/**
- * Instantiate a new BPE Decoder
- * @param [suffix='</w>'] The suffix that was used to characterize an end-of-word.
- * This suffix will be replaced by whitespaces during the decoding
- */
-export function bpeDecoder(suffix?: string): Decoder;
-
-/**
- * Instantiate a new CTC Decoder
- * @param [pad_token='pad'] The pad token used by CTC to delimit a new token.
- * @param [word_delimiter_token='|'] The word delimiter token. It will be replaced by a space
- * @param [cleanup=true] Whether to cleanup some tokenization artifacts.
- * Mainly spaces before punctuation, and some abbreviated english forms.
- */
-export function ctcDecoder(
-  pad_token?: string,
-  word_delimiter_token?: string,
-  cleanup?: boolean
-): Decoder;
-
-/**
- * Instantiate a new Sequence Decoder
- * @param [decoders] The decoders to chain
- */
-export function sequenceDecoder(decoders: Decoder[]): Decoder;
--- a/bindings/node/lib/bindings/decoders.js
+++ b/bindings/node/lib/bindings/decoders.js
@ -1,14 +0,0 @@
-const native = require("./native");
-
-module.exports = {
-  byteLevelDecoder: native.decoders_ByteLevel,
-  replaceDecoder: native.decoders_Replace,
-  wordPieceDecoder: native.decoders_WordPiece,
-  byteFallbackDecoder: native.decoders_ByteFallback,
-  fuseDecoder: native.decoders_Fuse,
-  stripDecoder: native.decoders_Strip,
-  metaspaceDecoder: native.decoders_Metaspace,
-  bpeDecoder: native.decoders_BPEDecoder,
-  ctcDecoder: native.decoders_CTC,
-  sequenceDecoder: native.decoders_Sequence,
-};
--- a/bindings/node/lib/bindings/decoders.test.ts
+++ b/bindings/node/lib/bindings/decoders.test.ts
@ -8,118 +8,102 @@ import {
  sequenceDecoder,
  stripDecoder,
  wordPieceDecoder,
-} from "./decoders";
+} from '../../'

-describe("wordPieceDecoder", () => {
-  it("accepts `undefined` as first parameter", () => {
-    expect(wordPieceDecoder(undefined)).toBeDefined();
-  });
+describe('wordPieceDecoder', () => {
+  it('accepts `undefined` as first parameter', () => {
+    expect(wordPieceDecoder(undefined)).toBeDefined()
+  })

-  it("accepts `undefined` as second parameter", () => {
-    expect(wordPieceDecoder("test", undefined)).toBeDefined();
-  });
+  it('accepts `undefined` as second parameter', () => {
+    expect(wordPieceDecoder('test', undefined)).toBeDefined()
+  })

-  it("can decode arrays of strings", () => {
+  it('can decode arrays of strings', () => {
+    expect(wordPieceDecoder().decode(['Hel', '##lo', 'there', 'my', 'fr', '##iend'])).toEqual('Hello there my friend')
+  })
+})
+
+describe('byteFallbackDecoder', () => {
+  it('accepts `undefined` as first parameter', () => {
+    expect(byteFallbackDecoder()).toBeDefined()
+  })
+
+  it('can decode arrays of strings', () => {
+    expect(byteFallbackDecoder().decode(['Hel', 'lo'])).toEqual('Hello')
+    expect(byteFallbackDecoder().decode(['<0x61>'])).toEqual('a')
+    expect(byteFallbackDecoder().decode(['<0x61>'])).toEqual('a')
+    expect(byteFallbackDecoder().decode(['My', ' na', 'me'])).toEqual('My name')
+    expect(byteFallbackDecoder().decode(['<0x61>'])).toEqual('a')
+    expect(byteFallbackDecoder().decode(['<0xE5>'])).toEqual('<27>')
+    expect(byteFallbackDecoder().decode(['<0xE5>', '<0x8f>'])).toEqual('<27><>')
+    expect(byteFallbackDecoder().decode(['<0xE5>', '<0x8f>', '<0xab>'])).toEqual('叫')
+    expect(byteFallbackDecoder().decode(['<0xE5>', '<0x8f>', 'a'])).toEqual('<27><>a')
+    expect(byteFallbackDecoder().decode(['<0xE5>', '<0x8f>', '<0xab>', 'a'])).toEqual('叫a')
+  })
+})
+
+describe('replaceDecoder', () => {
+  it('can decode arrays of strings', () => {
+    expect(replaceDecoder('_', ' ').decode(['Hello', '_Hello'])).toEqual('Hello Hello')
+  })
+})
+
+describe('fuseDecoder', () => {
+  it('accepts `undefined` as first parameter', () => {
+    expect(fuseDecoder()).toBeDefined()
+  })
+
+  it('can decode arrays of strings', () => {
+    expect(fuseDecoder().decode(['Hel', 'lo'])).toEqual('Hello')
+  })
+})
+
+describe('stripDecoder', () => {
+  it('accepts `undefined` as first parameter', () => {
+    expect(stripDecoder('_', 0, 0)).toBeDefined()
+  })
+
+  it('can decode arrays of strings', () => {
+    expect(stripDecoder('_', 1, 0).decode(['_Hel', 'lo', '__there'])).toEqual('Hello_there')
+  })
+})
+
+describe('metaspaceDecoder', () => {
+  it('accepts `undefined` as first parameter', () => {
+    expect(metaspaceDecoder(undefined)).toBeDefined()
+  })
+
+  it('accepts `undefined` as second parameter', () => {
+    expect(metaspaceDecoder('t', undefined)).toBeDefined()
+  })
+  it('works', () => {
+    expect(metaspaceDecoder().decode(['▁Hello'])).toEqual('Hello')
+  })
+})
+
+describe('bpeDecoder', () => {
+  it('accepts `undefined` as parameter', () => {
+    expect(bpeDecoder(undefined)).toBeDefined()
+  })
+})
+
+describe('ctcDecoder', () => {
+  it('accepts `undefined` as parameter', () => {
+    expect(ctcDecoder(undefined)).toBeDefined()
+  })
+  it('encodes correctly', () => {
+    expect(ctcDecoder().decode(['<pad>', 'h', 'h', 'e', 'e', 'l', 'l', '<pad>', 'l', 'l', 'o'])).toEqual('hello')
+  })
+})
+
+describe('sequenceDecoder', () => {
+  it('accepts `empty list` as parameter', () => {
+    expect(sequenceDecoder([])).toBeDefined()
+  })
+  it('encodes correctly', () => {
    expect(
-      wordPieceDecoder().decode(["Hel", "##lo", "there", "my", "fr", "##iend"])
-    ).toEqual("Hello there my friend");
-  });
-});
-
-describe("byteFallbackDecoder", () => {
-  it("accepts `undefined` as first parameter", () => {
-    expect(byteFallbackDecoder()).toBeDefined();
-  });
-
-  it("can decode arrays of strings", () => {
-    expect(byteFallbackDecoder().decode(["Hel", "lo"])).toEqual("Hello");
-    expect(byteFallbackDecoder().decode(["<0x61>"])).toEqual("a");
-    expect(byteFallbackDecoder().decode(["<0x61>"])).toEqual("a");
-    expect(byteFallbackDecoder().decode(["My", " na", "me"])).toEqual("My name");
-    expect(byteFallbackDecoder().decode(["<0x61>"])).toEqual("a");
-    expect(byteFallbackDecoder().decode(["<0xE5>"])).toEqual("<22>");
-    expect(byteFallbackDecoder().decode(["<0xE5>", "<0x8f>"])).toEqual("<22><>");
-    expect(byteFallbackDecoder().decode(["<0xE5>", "<0x8f>", "<0xab>"])).toEqual("叫");
-    expect(byteFallbackDecoder().decode(["<0xE5>", "<0x8f>", "a"])).toEqual("<22><>a");
-    expect(byteFallbackDecoder().decode(["<0xE5>", "<0x8f>", "<0xab>", "a"])).toEqual(
-      "叫a"
-    );
-  });
-});
-
-describe("replaceDecoder", () => {
-  it("can decode arrays of strings", () => {
-    expect(replaceDecoder("_", " ").decode(["Hello", "_Hello"])).toEqual("Hello Hello");
-  });
-});
-
-describe("fuseDecoder", () => {
-  it("accepts `undefined` as first parameter", () => {
-    expect(fuseDecoder()).toBeDefined();
-  });
-
-  it("can decode arrays of strings", () => {
-    expect(fuseDecoder().decode(["Hel", "lo"])).toEqual("Hello");
-  });
-});
-
-describe("stripDecoder", () => {
-  it("accepts `undefined` as first parameter", () => {
-    expect(stripDecoder("_", 0, 0)).toBeDefined();
-  });
-
-  it("can decode arrays of strings", () => {
-    expect(stripDecoder("_", 1, 0).decode(["_Hel", "lo", "__there"])).toEqual(
-      "Hello_there"
-    );
-  });
-});
-
-describe("metaspaceDecoder", () => {
-  it("accepts `undefined` as first parameter", () => {
-    expect(metaspaceDecoder(undefined)).toBeDefined();
-  });
-
-  it("accepts `undefined` as second parameter", () => {
-    expect(metaspaceDecoder("t", undefined)).toBeDefined();
-  });
-});
-
-describe("bpeDecoder", () => {
-  it("accepts `undefined` as parameter", () => {
-    expect(bpeDecoder(undefined)).toBeDefined();
-  });
-});
-
-describe("ctcDecoder", () => {
-  it("accepts `undefined` as parameter", () => {
-    expect(ctcDecoder(undefined)).toBeDefined();
-  });
-  it("encodes correctly", () => {
-    expect(
-      ctcDecoder().decode(["<pad>", "h", "h", "e", "e", "l", "l", "<pad>", "l", "l", "o"])
-    ).toEqual("hello");
-  });
-});
-
-describe("sequenceDecoder", () => {
-  it("accepts `empty list` as parameter", () => {
-    expect(sequenceDecoder([])).toBeDefined();
-  });
-  it("encodes correctly", () => {
-    expect(
-      sequenceDecoder([ctcDecoder(), metaspaceDecoder()]).decode([
-        "▁",
-        "▁",
-        "H",
-        "H",
-        "i",
-        "i",
-        "▁",
-        "y",
-        "o",
-        "u",
-      ])
-    ).toEqual("Hi you");
-  });
-});
+      sequenceDecoder([ctcDecoder(), metaspaceDecoder()]).decode(['▁', '▁', 'H', 'H', 'i', 'i', '▁', 'y', 'o', 'u']),
+    ).toEqual('Hi you')
+  })
+})
--- a/bindings/node/lib/bindings/encoding.test.ts
+++ b/bindings/node/lib/bindings/encoding.test.ts
@ -0,0 +1,254 @@
+import {
+  PaddingDirection,
+  WordPiece,
+  punctuationPreTokenizer,
+  sequencePreTokenizer,
+  whitespacePreTokenizer,
+  Encoding,
+  EncodeOptions,
+  Tokenizer,
+} from '../../'
+import { InputSequence } from '../../types'
+
+const MOCKS_DIR = __dirname + '/__mocks__'
+
+describe('Can modify pretokenizers on the fly', () => {
+  let encoding: Encoding
+  let encode: (
+    sequence: InputSequence,
+    pair?: InputSequence | null,
+    options?: EncodeOptions | null,
+  ) => Promise<Encoding>
+  let tokenizer: Tokenizer
+
+  beforeAll(async () => {
+    const model = await WordPiece.fromFile(`${MOCKS_DIR}/vocab.txt`, {
+      continuingSubwordPrefix: '##',
+    })
+
+    tokenizer = new Tokenizer(model)
+    encode = tokenizer.encode.bind(tokenizer)
+  })
+
+  it('Can change pre tokenizer', async () => {
+    const input = 'my  name is john.!?'
+    tokenizer.setPreTokenizer(sequencePreTokenizer([whitespacePreTokenizer()]))
+
+    encoding = await encode(input, null)
+    expect(encoding.getIds()).toEqual([0, 1, 2, 3, 4, 8])
+
+    // Change pre tokenizer
+    tokenizer.setPreTokenizer(sequencePreTokenizer([whitespacePreTokenizer(), punctuationPreTokenizer()]))
+
+    encoding = await encode(input, null)
+    expect(encoding.getIds()).toEqual([0, 1, 2, 3, 4, 8, 8, 8])
+  })
+})
+
+describe('Encoding', () => {
+  const originalString = 'my name is john'
+  const originalPairString = 'what is yours?'
+  let encoding: Encoding
+  let encodingDual: Encoding
+  let encode: (
+    sequence: InputSequence,
+    pair?: InputSequence | null,
+    options?: EncodeOptions | null,
+  ) => Promise<Encoding>
+
+  beforeAll(async () => {
+    const model = await WordPiece.fromFile(`${MOCKS_DIR}/vocab.txt`, {
+      continuingSubwordPrefix: '##',
+    })
+
+    const tokenizer = new Tokenizer(model)
+    tokenizer.setPreTokenizer(whitespacePreTokenizer())
+    encode = tokenizer.encode.bind(tokenizer)
+  })
+
+  beforeEach(async () => {
+    encoding = await encode(originalString, null)
+    encodingDual = await encode(originalString, originalPairString)
+  })
+
+  it('has a list of defined methods', () => {
+    expect(typeof encoding.wordToTokens).toBe('function')
+    expect(typeof encoding.wordToChars).toBe('function')
+    expect(typeof encoding.tokenToChars).toBe('function')
+    expect(typeof encoding.tokenToWord).toBe('function')
+    expect(typeof encoding.charToToken).toBe('function')
+    expect(typeof encoding.charToWord).toBe('function')
+    expect(typeof encoding.getAttentionMask).toBe('function')
+    expect(typeof encoding.getIds).toBe('function')
+    expect(typeof encoding.getLength).toBe('function')
+    expect(typeof encoding.getOffsets).toBe('function')
+    expect(typeof encoding.getOverflowing).toBe('function')
+    expect(typeof encoding.getSpecialTokensMask).toBe('function')
+    expect(typeof encoding.getTokens).toBe('function')
+    expect(typeof encoding.getTypeIds).toBe('function')
+    expect(typeof encoding.getWordIds).toBe('function')
+    expect(typeof encoding.getSequenceIds).toBe('function')
+    expect(typeof encoding.pad).toBe('function')
+    expect(typeof encoding.truncate).toBe('function')
+  })
+
+  describe('truncate', () => {
+    it('accepts `undefined` as second parameter', () => {
+      expect(encoding.truncate(10, undefined)).toBeUndefined()
+    })
+    it('should throw an Error on invalid direction', () => {
+      const t = () => encoding.truncate(10, 3, 'not_valid')
+      expect(t).toThrow(`not_valid is not a valid truncation direction`)
+    })
+  })
+
+  describe('getWordIds', () => {
+    it('returns the correct list of indexes', () => {
+      const indexes = encoding.getWordIds()
+      expect(indexes).toEqual([0, 1, 2, 3, 3])
+    })
+  })
+
+  describe('getSequenceIds', () => {
+    it('returns the correct list of indexes', () => {
+      expect(encoding.getSequenceIds()).toEqual([0, 0, 0, 0, 0])
+      expect(encodingDual.getSequenceIds()).toEqual([0, 0, 0, 0, 0, 1, 1, 1, 1])
+    })
+  })
+
+  describe('wordToTokens', () => {
+    it('returns the correct indexes', () => {
+      const indexes = encoding.wordToTokens(3)
+      expect(indexes).toEqual([3, 5])
+    })
+
+    it('returns the corrent indexes with pair sequences', () => {
+      expect(encodingDual.wordToTokens(3, 0)).toEqual([3, 5])
+      expect(encodingDual.wordToTokens(3, 1)).toEqual([8, 9])
+    })
+
+    it('returns undefined when out of range word', () => {
+      const index = encoding.wordToTokens(100)
+      expect(index).toBeNull()
+    })
+  })
+
+  describe('wordToChars', () => {
+    it('returns the correct offsets', () => {
+      const offsets = encoding.wordToChars(3)
+      expect(offsets).toEqual([11, 15])
+    })
+
+    it('returns the correct offsets with pair sequences', () => {
+      expect(encodingDual.wordToChars(3, 0)).toEqual([11, 15])
+      expect(encodingDual.wordToChars(3, 1)).toEqual([13, 14])
+    })
+
+    it('returns undefined when out of range word', () => {
+      const offsets = encoding.wordToChars(100)
+      expect(offsets).toBeNull()
+    })
+  })
+
+  describe('tokenToSequence', () => {
+    it('returns the correct value', () => {
+      expect(encodingDual.tokenToSequence(4)).toEqual(0)
+      expect(encodingDual.tokenToSequence(6)).toEqual(1)
+    })
+  })
+
+  describe('tokenToChars', () => {
+    it('returns the correct offsets', () => {
+      const offsets = encoding.tokenToChars(3)
+      expect(offsets).toEqual([11, 13])
+    })
+
+    it('returns the correct offsets with pair sequences', () => {
+      expect(encodingDual.tokenToChars(3)).toEqual([11, 13])
+      expect(encodingDual.tokenToChars(7)).toEqual([8, 13])
+    })
+
+    it('returns undefined when out of range token', () => {
+      const offsets = encoding.tokenToChars(100)
+      expect(offsets).toBeNull()
+    })
+  })
+
+  describe('tokenToWord', () => {
+    it('returns the correct index', () => {
+      const index = encoding.tokenToWord(3)
+      expect(index).toEqual(3)
+    })
+
+    it('returns the correct index with pair sequences', () => {
+      expect(encodingDual.tokenToWord(3)).toEqual(3)
+      expect(encodingDual.tokenToWord(7)).toEqual(2)
+    })
+
+    it('returns undefined when out of range token', () => {
+      const index = encoding.tokenToWord(100)
+      expect(index).toBeNull()
+    })
+  })
+
+  describe('charToToken', () => {
+    it('returns the correct index', () => {
+      const index = encoding.charToToken(3)
+      expect(index).toEqual(1)
+    })
+
+    it('returns the correct index with pair sequences', () => {
+      expect(encodingDual.charToToken(3, 0)).toEqual(1)
+      expect(encodingDual.charToToken(3, 1)).toEqual(5)
+    })
+
+    it('returns undefined when out of range char', () => {
+      const index = encoding.charToToken(100)
+      expect(index).toBeNull()
+    })
+  })
+
+  describe('charToWord', () => {
+    it('returns the correct index', () => {
+      const index = encoding.charToWord(3)
+      expect(index).toEqual(1)
+    })
+
+    it('returns the correct index with pair sequences', () => {
+      expect(encodingDual.charToWord(3, 0)).toEqual(1)
+      expect(encodingDual.charToWord(3, 1)).toEqual(0)
+    })
+
+    it('returns undefined when out of range char', () => {
+      const index = encoding.charToWord(100)
+      expect(index).toBeNull()
+    })
+  })
+
+  describe('pad', () => {
+    it('works correctly with only one parameter', () => {
+      encoding.pad(10)
+      expect(encoding.getTokens()).toHaveLength(10)
+    })
+
+    it('accepts `undefined` as second parameter', () => {
+      encoding.pad(10, undefined)
+      expect(encoding.getTokens()).toHaveLength(10)
+    })
+
+    it('accepts options as second parameter', () => {
+      encoding.pad(10, {
+        direction: PaddingDirection.Left,
+        padToken: '[PA]',
+        padTypeId: 10,
+        padId: 400,
+      })
+
+      const tokens = encoding.getTokens()
+      expect(tokens).toHaveLength(10)
+      expect(tokens[0]).toBe('[PA]')
+      expect(encoding.getTypeIds()[0]).toBe(10)
+      expect(encoding.getIds()[0]).toBe(400)
+    })
+  })
+})
--- a/bindings/node/lib/bindings/enums.ts
+++ b/bindings/node/lib/bindings/enums.ts
@ -1,15 +0,0 @@
-export enum TruncationStrategy {
-  LongestFirst = "longest_first",
-  OnlyFirst = "only_first",
-  OnlySecond = "only_second",
-}
-
-export enum TruncationDirection {
-  Left = "left",
-  Right = "right",
-}
-
-export enum PaddingDirection {
-  Left = "left",
-  Right = "right",
-}
--- a/bindings/node/lib/bindings/models.d.ts
+++ b/bindings/node/lib/bindings/models.d.ts
@ -1,193 +0,0 @@
-/**
- * This class is not supposed to be instantiated directly. Instead, any implementation of
- * a Model will return a instance of this class when instantiated.
- */
-interface Model {
-  /**
-   * Save the current model in the given folder, using the given name
-   * for the various files that will get created.
-   * Any file with the same name that already exist in this folder will be overwritten.
-   *
-   * @param folder Name of the destination folder
-   * @param name Prefix to use in the name of created files
-   */
-  save(folder: string, name?: string): string[];
-}
-
-type ModelCallback = (err: Error, model: Model) => void;
-
-export interface BPEOptions {
-  /**
-   * The number of words that the BPE cache can contain. The cache allows
-   * to speed-up the process by keeping the result of the merge operations
-   * for a number of words.
-   * @default 10_000
-   */
-  cacheCapacity?: number;
-  /**
-   * The BPE dropout to use. Must be an float between 0 and 1
-   */
-  dropout?: number;
-  /**
-   * The unknown token to be used by the model
-   */
-  unkToken?: string;
-  /**
-   * The prefix to attach to subword units that don't represent a beginning of word
-   */
-  continuingSubwordPrefix?: string;
-  /**
-   * The suffix to attach to subword units that represent an end of word
-   */
-  endOfWordSuffix?: string;
-}
-
-export namespace BPE {
-  /**
-   * Instantiate a BPE model from the given vocab and merges
-   *
-   * @param vocab A dict mapping strings to number, representing the vocab
-   * @param merges An array of tuples of strings, representing two tokens to be merged
-   * @param options BPE model options
-   */
-  export function init(
-    vocab: { [token: string]: number },
-    merges: [string, string][],
-    options?: BPEOptions
-  ): Model;
-  /**
-   * Instantiate a BPE model from the given vocab and merges files
-   *
-   * @param vocab Path to a vocabulary JSON file
-   * @param merges Path to a merge file
-   * @param options BPE model options
-   * @param __callback Callback called when model is loaded
-   */
-  export function fromFile(
-    vocab: string,
-    merges: string,
-    optionsOrCallback?: BPEOptions | ModelCallback,
-    __callback?: ModelCallback
-  ): void;
-
-  /**
-   * Instantiate an empty BPE Model
-   */
-  export function empty(): Model;
-}
-
-export interface WordPieceOptions {
-  /**
-   * The prefix to attach to subword units that don't represent a beginning of word
-   * @default "##"
-   */
-  continuingSubwordPrefix?: string;
-  /**
-   * The maximum number of characters to authorize in a single word.
-   * @default 100
-   */
-  maxInputCharsPerWord?: number;
-  /**
-   * The unknown token to be used by the model.
-   * @default "[UNK]"
-   */
-  unkToken?: string;
-}
-
-export namespace WordPiece {
-  /**
-   * Instantiate a WordPiece model from the given vocab
-   *
-   * @param vocab A dict mapping strings to numbers, representing the vocab
-   * @param options WordPiece model options
-   */
-  export function init(
-    vocab: { [token: string]: number },
-    options?: WordPieceOptions
-  ): Model;
-
-  /**
-   * Instantiate a WordPiece model from the given vocab file
-   *
-   * @param vocab Path to a vocabulary file
-   * @param options WordPiece model options
-   * @param __callback Callback called when model is loaded
-   */
-  export function fromFile(
-    vocab: string,
-    optionsOrCallback?: WordPieceOptions | ModelCallback,
-    __callback?: ModelCallback
-  ): void;
-
-  /**
-   * Instantiate an empty WordPiece model
-   */
-  export function empty(): Model;
-}
-
-export interface WordLevelOptions {
-  /**
-   * The unknown token to be used by the model.
-   * @default "[UNK]"
-   */
-  unkToken?: string;
-}
-
-export namespace WordLevel {
-  /**
-   * Instantiate a WordLevel model from the given vocab
-   *
-   * @param vocab A dict mapping strings to numbers, representing the vocab
-   * @param options WordLevel model options
-   */
-  export function init(
-    vocab: { [token: string]: number },
-    options?: WordLevelOptions
-  ): Model;
-
-  /**
-   * Instantiate a WordLevel model from the given vocab file
-   *
-   * @param vocab Path to a vocabulary file
-   * @param options WordLevel model options
-   * @param __callback Callback called when model is loaded
-   */
-  export function fromFile(
-    vocab: string,
-    optionsOrCallback?: WordLevelOptions | ModelCallback,
-    __callback?: ModelCallback
-  ): void;
-
-  /**
-   * Instantiate an empty WordLevel model
-   */
-  export function empty(): Model;
-}
-
-export interface UnigramOptions {
-  /**
-   * The unknown token id to be used by the model.
-   * @default undefined
-   */
-  unkId?: number;
-  /**
-   * Whether or not bytefallback support should be enabled.
-   * @default false
-   */
-  byte_fallback?: boolean;
-}
-
-export namespace Unigram {
-  /**
-   * Instantiate a Unigram model from the given vocab
-   *
-   * @param vocab An array of token and id tuples
-   * @param optiosn Unigram model options
-   */
-  export function init(vocab: [string, number][], options?: UnigramOptions): Model;
-
-  /**
-   * Instantiate an empty Unigram model
-   */
-  export function empty(): Model;
-}
--- a/bindings/node/lib/bindings/models.js
+++ b/bindings/node/lib/bindings/models.js
@ -1,23 +0,0 @@
-const native = require("./native");
-
-module.exports = {
-  BPE: {
-    init: native.models_BPE_init,
-    fromFile: native.models_BPE_from_file,
-    empty: native.models_BPE_empty,
-  },
-  WordPiece: {
-    init: native.models_WordPiece_init,
-    fromFile: native.models_WordPiece_from_file,
-    empty: native.models_WordPiece_empty,
-  },
-  WordLevel: {
-    init: native.models_WordLevel_init,
-    fromFile: native.models_WordLevel_from_file,
-    empty: native.models_WordLevel_empty,
-  },
-  Unigram: {
-    init: native.models_Unigram_init,
-    empty: native.models_Unigram_empty,
-  },
-};
--- a/bindings/node/lib/bindings/models.test.ts
+++ b/bindings/node/lib/bindings/models.test.ts
@ -1,132 +1,64 @@
 /* eslint-disable @typescript-eslint/no-empty-function */
 /* eslint-disable @typescript-eslint/no-explicit-any */

-import { BPE, Unigram, WordPiece } from "./models";
+import { BPE, Unigram, WordPiece } from '../../'

-const MOCKS_DIR = __dirname + "/__mocks__";
+const MOCKS_DIR = __dirname + '/__mocks__'

-describe("WordPiece", () => {
-  describe("fromFile", () => {
-    it("throws if called with only one argument", () => {
-      expect(() => (WordPiece as any).fromFile("test")).toThrow("not enough arguments");
-    });
+describe('WordPiece', () => {
+  describe('fromFile', () => {
+    it('throws if called with only one argument', () => {
+      expect(() => (WordPiece as any).fromFile()).toThrow(
+        'Failed to convert JavaScript value `Undefined` into rust type `String`',
+      )
+    })

-    it("throws if called with 2 arguments without a callback as third argument", () => {
-      expect(() => (WordPiece as any).fromFile("test", {})).toThrow(
-        "not enough arguments"
-      );
-    });
+    it('throws if called with 2 arguments without a callback as third argument', () => {
+      expect(() => (WordPiece as any).fromFile({})).toThrow(
+        'Failed to convert JavaScript value `Object {}` into rust type `String`',
+      )
+    })

-    describe("when called with 2 correct arguments", () => {
-      it("returns `undefined` ", () => {
-        expect(WordPiece.fromFile(`${MOCKS_DIR}/vocab.txt`, () => {})).toBeUndefined();
-      });
+    it('has its callback called with the loaded model', async () => {
+      const model = await WordPiece.fromFile(`${MOCKS_DIR}/vocab.txt`)
+      expect(model).toBeDefined()
+    })
+  })
+})

-      it("has its callback called with the loaded model", () => {
-        return new Promise((done) => {
-          WordPiece.fromFile(`${MOCKS_DIR}/vocab.txt`, (err, model) => {
-            expect(model).toBeDefined();
-            done();
-          });
-        });
-      });
-    });
+describe('BPE', () => {
+  describe('fromFile', () => {
+    it('has its callback called with the loaded model', async () => {
+      const model = await BPE.fromFile(`${MOCKS_DIR}/vocab.json`, `${MOCKS_DIR}/merges.txt`)
+      expect(model).toBeDefined()
+    })

-    describe("when called with 3 correct arguments", () => {
-      it("returns `undefined`", () => {
-        expect(
-          WordPiece.fromFile(`${MOCKS_DIR}/vocab.txt`, {}, () => {})
-        ).toBeUndefined();
-      });
+    it('has its callback called with the loaded model', async () => {
+      const model = await BPE.fromFile(`${MOCKS_DIR}/vocab.json`, `${MOCKS_DIR}/merges.txt`, {})
+      expect(model).toBeDefined()
+    })
+  })
+  describe('When initialized from memory', () => {
+    it('returns the loaded Model', () => {
+      const bpe = BPE.init({ a: 0, b: 1, ab: 2 }, [['a', 'b']])
+      // expect(bpe.constructor.name).toEqual("Model");
+      expect(bpe.constructor.name).toEqual('BPE')
+    })
+  })
+})

-      it("has its callback called with the loaded model", () => {
-        return new Promise((done) => {
-          WordPiece.fromFile(`${MOCKS_DIR}/vocab.txt`, {}, (err, model) => {
-            expect(model).toBeDefined();
-            done();
-          });
-        });
-      });
-    });
-  });
-});
-
-describe("BPE", () => {
-  describe("fromFile", () => {
-    it("throws if called with only two arguments", () => {
-      expect(() => (BPE as any).fromFile("test", "bis")).toThrow("not enough arguments");
-    });
-
-    it("throws if called with 3 arguments without a callback as last argument", () => {
-      expect(() => (BPE as any).fromFile("test", "bis", {})).toThrow(
-        "not enough arguments"
-      );
-    });
-  });
-
-  describe("when called with 3 correct arguments", () => {
-    it("returns `undefined`", () => {
-      expect(
-        BPE.fromFile(`${MOCKS_DIR}/vocab.json`, `${MOCKS_DIR}/merges.txt`, () => {})
-      ).toBeUndefined();
-    });
-
-    it("has its callback called with the loaded model", () => {
-      return new Promise((done) => {
-        BPE.fromFile(
-          `${MOCKS_DIR}/vocab.json`,
-          `${MOCKS_DIR}/merges.txt`,
-          (err, model) => {
-            expect(model).toBeDefined();
-            done();
-          }
-        );
-      });
-    });
-  });
-
-  describe("when called with 4 correct arguments", () => {
-    it("returns `undefined`", () => {
-      expect(
-        BPE.fromFile(`${MOCKS_DIR}/vocab.json`, `${MOCKS_DIR}/merges.txt`, {}, () => {})
-      ).toBeUndefined();
-    });
-
-    it("has its callback called with the loaded model", () => {
-      return new Promise((done) => {
-        BPE.fromFile(
-          `${MOCKS_DIR}/vocab.json`,
-          `${MOCKS_DIR}/merges.txt`,
-          {},
-          (err, model) => {
-            expect(model).toBeDefined();
-            done();
-          }
-        );
-      });
-    });
-  });
-  describe("When initialized from memory", () => {
-    it("returns the loaded Model", () => {
-      const bpe = BPE.init({ a: 0, b: 1, ab: 2 }, [["a", "b"]]);
-      expect(bpe.constructor.name).toEqual("Model");
-    });
-  });
-});
-
-describe("Unigram", () => {
-  it("can be initialized from memory", () => {
+describe('Unigram', () => {
+  it('can be initialized from memory', () => {
    const unigram = Unigram.init(
      [
-        ["<unk>", 0],
-        ["Hello", -1],
-        ["there", -2],
+        ['<unk>', 0],
+        ['Hello', -1],
+        ['there', -2],
      ],
      {
        unkId: 0,
-        byte_fallback: false,
-      }
-    );
-    expect(unigram.constructor.name).toEqual("Model");
-  });
-});
+      },
+    )
+    expect(unigram.constructor.name).toEqual('Unigram')
+  })
+})
--- a/bindings/node/lib/bindings/native.js
+++ b/bindings/node/lib/bindings/native.js
@ -1,2 +0,0 @@
-const addon = require("../../native");
-module.exports = addon;
--- a/bindings/node/lib/bindings/native.prod.js
+++ b/bindings/node/lib/bindings/native.prod.js
@ -1,2 +0,0 @@
-const native = require("../bin-package");
-module.exports = native;
--- a/bindings/node/lib/bindings/normalizers.d.ts
+++ b/bindings/node/lib/bindings/normalizers.d.ts
@ -1,105 +0,0 @@
-/**
- * This class is not supposed to be instantiated directly. Instead, any implementation of a
- * Normalizer will return an instance of this class when instantiated.
- */
-// eslint-disable-next-line @typescript-eslint/no-empty-interface
-interface Normalizer {
-  normalizeString(s: string): string;
-}
-
-export interface BertNormalizerOptions {
-  /**
-   * Whether to clean the text, by removing any control characters
-   * and replacing all whitespaces by the classic one.
-   * @default true
-   */
-  cleanText?: boolean;
-  /**
-   * Whether to handle chinese chars by putting spaces around them.
-   * @default true
-   */
-  handleChineseChars?: boolean;
-  /**
-   * Whether to lowercase.
-   * @default true
-   */
-  lowercase?: boolean;
-  /**
-   * Whether to strip all accents.
-   * @default undefined
-   */
-  stripAccents?: boolean;
-}
-
-/**
- * Instantiate a Bert Normalizer with the given options
- *
- * @param [options] Normalizer options
- * @returns Bert Normalizer. Takes care of normalizing raw text before giving it to a Bert model.
- * This includes cleaning the text, handling accents, chinese chars and lowercasing
- */
-export function bertNormalizer(options?: BertNormalizerOptions): Normalizer;
-
-/**
- * Returns a new NFC Unicode Normalizer
- */
-export function nfcNormalizer(): Normalizer;
-
-/**
- * Returns a new NFD Unicode Normalizer
- */
-export function nfdNormalizer(): Normalizer;
-
-/**
- * Returns a new NFKC Unicode Normalizer
- */
-export function nfkcNormalizer(): Normalizer;
-
-/**
- * Returns a new NFKD Unicode Normalizer
- */
-export function nfkdNormalizer(): Normalizer;
-
-/**
- * Instantiate a new Normalization Sequence using the given normalizers
- * @param normalizers A list of Normalizer to be run as a sequence
- */
-export function sequenceNormalizer(normalizers: Normalizer[]): Normalizer;
-
-/**
- * Returns a new Lowercase Normalizer
- */
-export function lowercaseNormalizer(): Normalizer;
-
-/**
- *  Returns a new Strip Normalizer
- * @param [left=true] Whether or not to strip on the left (defaults to `true`)
- * @param [right=true] Whether or not to strip on the right (defaults to `true`)
- */
-export function stripNormalizer(left?: boolean, right?: boolean): Normalizer;
-
-/**
- *  Returns a new Prepend Normalizer
- * @param [prepend] The string to prepend
- */
-export function prependNormalizer(prepend: string): Normalizer;
-
-/**
- *  Returns a new StripAccents Normalizer
- */
-export function stripAccentsNormalizer(): Normalizer;
-
-/**
- * Returns a new Nmt Normalizer
- */
-export function nmtNormalizer(): Normalizer;
-
-/**
- * Returns a new Precompiled Normalizer
- */
-export function precompiledNormalizer(): Normalizer;
-
-/**
- * Returns a new Replace Normalizer
- */
-export function replaceNormalizer(): Normalizer;
--- a/bindings/node/lib/bindings/normalizers.js
+++ b/bindings/node/lib/bindings/normalizers.js
@ -1,17 +0,0 @@
-const native = require("./native");
-
-module.exports = {
-  bertNormalizer: native.normalizers_BertNormalizer,
-  nfcNormalizer: native.normalizers_NFC,
-  nfdNormalizer: native.normalizers_NFD,
-  nfkcNormalizer: native.normalizers_NFKC,
-  nfkdNormalizer: native.normalizers_NFKD,
-  sequenceNormalizer: native.normalizers_Sequence,
-  lowercaseNormalizer: native.normalizers_Lowercase,
-  stripNormalizer: native.normalizers_Strip,
-  prependNormalizer: native.normalizers_Prepend,
-  stripAccentsNormalizer: native.normalizers_StripAccents,
-  nmtNormalizer: native.normalizers_Nmt,
-  precompiledNormalizer: native.normalizers_Precompiled,
-  replaceNormalizer: native.normalizers_Replace,
-};
--- a/bindings/node/lib/bindings/normalizers.test.ts
+++ b/bindings/node/lib/bindings/normalizers.test.ts
@ -1,48 +1,44 @@
-import {
-  prependNormalizer,
-  stripAccentsNormalizer,
-  stripNormalizer,
-} from "./normalizers";
+import { prependNormalizer, stripAccentsNormalizer, stripNormalizer } from '../../'

-describe("stripNormalizer", () => {
-  it("instantiates with no parameters", () => {
-    const normalizer = stripNormalizer();
-    expect(normalizer.constructor.name).toEqual("Normalizer");
-  });
+describe('stripNormalizer', () => {
+  it('instantiates with no parameters', () => {
+    const normalizer = stripNormalizer()
+    expect(normalizer.constructor.name).toEqual('Normalizer')
+  })

-  it("accepts `undefined` as first parameter", () => {
-    expect(stripNormalizer(undefined)).toBeDefined();
-  });
+  it('accepts `undefined` as first parameter', () => {
+    expect(stripNormalizer(undefined)).toBeDefined()
+  })

-  it("accepts `undefined` as second parameter", () => {
-    expect(stripNormalizer(false, undefined)).toBeDefined();
-  });
+  it('accepts `undefined` as second parameter', () => {
+    expect(stripNormalizer(false, undefined)).toBeDefined()
+  })

-  it("instantiates with one parameter", () => {
-    const normalizer = stripNormalizer(false);
-    expect(normalizer.constructor.name).toEqual("Normalizer");
-  });
+  it('instantiates with one parameter', () => {
+    const normalizer = stripNormalizer(false)
+    expect(normalizer.constructor.name).toEqual('Normalizer')
+  })

-  it("instantiates with two parameters", () => {
-    const normalizer = stripNormalizer(false, true);
-    expect(normalizer.constructor.name).toEqual("Normalizer");
-  });
+  it('instantiates with two parameters', () => {
+    const normalizer = stripNormalizer(false, true)
+    expect(normalizer.constructor.name).toEqual('Normalizer')
+  })

-  it("prepend instantiates with one parameter", () => {
-    const normalizer = prependNormalizer("_");
-    expect(normalizer.constructor.name).toEqual("Normalizer");
-    expect(normalizer.normalizeString("Hello")).toEqual("_Hello");
-  });
+  it('prepend instantiates with one parameter', () => {
+    const normalizer = prependNormalizer('_')
+    expect(normalizer.constructor.name).toEqual('Normalizer')
+    expect(normalizer.normalizeString('Hello')).toEqual('_Hello')
+  })

-  it("can normalize strings", () => {
-    const normalizer = stripNormalizer();
-    expect(normalizer.normalizeString("     Hello there   ")).toEqual("Hello there");
-  });
-});
+  it('can normalize strings', () => {
+    const normalizer = stripNormalizer()
+    expect(normalizer.normalizeString('     Hello there   ')).toEqual('Hello there')
+  })
+})

-describe("stripAccentsNormalizer", () => {
-  it("initialize", () => {
-    const normalizer = stripAccentsNormalizer();
-    expect(normalizer.constructor.name).toEqual("Normalizer");
-  });
-});
+describe('stripAccentsNormalizer', () => {
+  it('initialize', () => {
+    const normalizer = stripAccentsNormalizer()
+    expect(normalizer.constructor.name).toEqual('Normalizer')
+  })
+})
--- a/bindings/node/lib/bindings/post-processors.d.ts
+++ b/bindings/node/lib/bindings/post-processors.d.ts
@ -1,64 +0,0 @@
-/**
- * This class is not supposed to be instantiated directly. Instead, any implementation of
- * a PostProcessor will return an instance of this class when instantiated.
- */
-// eslint-disable-next-line @typescript-eslint/no-empty-interface
-interface PostProcessor {}
-
-/**
- * Instantiate a new BertProcessing with the given tokens
- *
- * @param sep A tuple with the string representation of the SEP token, and its id
- * @param cls A tuple with the string representation of the CLS token, and its id
- */
-export function bertProcessing(
-  sep: [string, number],
-  cls: [string, number]
-): PostProcessor;
-
-/**
- * Instantiate a new ByteLevelProcessing.
- *
- * @param [trimOffsets=true] Whether to trim the whitespaces from the produced offsets.
- * Takes care of trimming the produced offsets to avoid whitespaces.
- * By default, the ByteLevel BPE might include whitespaces in the produced tokens. If you
- * don't want the offsets to include these whitespaces, then this processing step must be used.
- * @since 0.6.0
- */
-export function byteLevelProcessing(trimOffsets?: boolean): PostProcessor;
-
-/**
- * Instantiate a new RobertaProcessing with the given tokens
- *
- * @param sep A tuple with the string representation of the SEP token, and its id
- * @param cls A tuple with the string representation of the CLS token, and its id
- * @param [trimOffsets=true] Whether to trim the whitespaces in the produced offsets
- * @param [addPrefixSpace=true] Whether addPrefixSpace was ON during the pre-tokenization
- */
-export function robertaProcessing(
-  sep: [string, number],
-  cls: [string, number],
-  trimOffsets?: boolean,
-  addPrefixSpace?: boolean
-): PostProcessor;
-
-/**
- * Instantiate a new TemplateProcessing.
- *
- * @param single A string describing the template for a single sequence
- * @param pair A string describing the template for a pair of sequences
- * @param specialTokens An array with all the special tokens
- */
-export function templateProcessing(
-  single: string,
-  pair?: string,
-  specialTokens?: [string, number][]
-): PostProcessor;
-
-/**
- * Instantiate a new SequenceProcessing.
- *
- * @param PostProcessor[] The list of Processors to use
- * @since 0.13.0
- */
-export function sequenceProcessing(processors: PostProcessor[]): PostProcessor;
--- a/bindings/node/lib/bindings/post-processors.js
+++ b/bindings/node/lib/bindings/post-processors.js
@ -1,9 +0,0 @@
-const native = require("./native");
-
-module.exports = {
-  bertProcessing: native.processors_BertProcessing,
-  byteLevelProcessing: native.processors_ByteLevel,
-  robertaProcessing: native.processors_RobertaProcessing,
-  templateProcessing: native.processors_TemplateProcessing,
-  sequenceProcessing: native.processors_Sequence,
-};
--- a/bindings/node/lib/bindings/post-processors.test.ts
+++ b/bindings/node/lib/bindings/post-processors.test.ts
@ -1,95 +1,81 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */

-import {
-  bertProcessing,
-  byteLevelProcessing,
-  robertaProcessing,
-  sequenceProcessing,
-  templateProcessing,
-} from "./post-processors";
+import { bertProcessing, byteLevelProcessing, robertaProcessing, sequenceProcessing, templateProcessing } from '../../'

-describe("bertProcessing", () => {
-  it("instantiates correctly with only two parameters", () => {
-    const processor = bertProcessing(["sep", 1], ["cls", 2]);
-    expect(processor.constructor.name).toEqual("Processor");
-  });
+describe('bertProcessing', () => {
+  it('instantiates correctly with only two parameters', () => {
+    const processor = bertProcessing(['sep', 1], ['cls', 2])
+    expect(processor.constructor.name).toEqual('Processor')
+  })

-  it("throws if only one argument is provided", () => {
-    expect(() => (bertProcessing as any)(["sep", 1])).toThrow("Argument 1 is missing");
-  });
+  it('throws if only one argument is provided', () => {
+    expect(() => (bertProcessing as any)(['sep', 1])).toThrow('Given napi value is not an array')
+  })

-  it("throws if arguments are malformed", () => {
-    expect(() => (bertProcessing as any)(["sep", "1"], ["cls", "2"])).toThrow(
-      'invalid type: string "1", expected u32'
-    );
-    expect(() => (bertProcessing as any)(["sep"], ["cls"])).toThrow(
-      "invalid length 1, expected a tuple of size 2"
-    );
-  });
-});
+  it('throws if arguments are malformed', () => {
+    expect(() => (bertProcessing as any)(['sep', '1'], ['cls', '2'])).toThrow(
+      'Failed to convert napi value String into rust type `u32`',
+    )
+    expect(() => (bertProcessing as any)(['sep'], ['cls'])).toThrow('Array length < 2')
+  })
+})

-describe("byteLevelProcessing", () => {
-  it("instantiates correctly without any parameter", () => {
-    const processor = byteLevelProcessing();
-    expect(processor.constructor.name).toEqual("Processor");
-  });
+describe('byteLevelProcessing', () => {
+  it('instantiates correctly without any parameter', () => {
+    const processor = byteLevelProcessing()
+    expect(processor.constructor.name).toEqual('Processor')
+  })

-  it("accepts `undefined` as first parameter", () => {
-    expect(byteLevelProcessing(undefined)).toBeDefined();
-  });
+  it('accepts `undefined` as first parameter', () => {
+    expect(byteLevelProcessing(undefined)).toBeDefined()
+  })

-  it("accepts `boolean` as first parameter", () => {
-    expect(byteLevelProcessing(true)).toBeDefined();
-  });
-});
+  it('accepts `boolean` as first parameter', () => {
+    expect(byteLevelProcessing(true)).toBeDefined()
+  })
+})

-describe("robertaProcessing", () => {
-  it("instantiates correctly with only two parameters", () => {
-    const processor = robertaProcessing(["sep", 1], ["cls", 2]);
-    expect(processor.constructor.name).toEqual("Processor");
-  });
+describe('robertaProcessing', () => {
+  it('instantiates correctly with only two parameters', () => {
+    const processor = robertaProcessing(['sep', 1], ['cls', 2])
+    expect(processor.constructor.name).toEqual('Processor')
+  })

-  it("accepts `undefined` as third and fourth parameters", () => {
-    expect(robertaProcessing(["sep", 1], ["cls", 2], undefined, undefined)).toBeDefined();
-  });
+  it('accepts `undefined` as third and fourth parameters', () => {
+    expect(robertaProcessing(['sep', 1], ['cls', 2], undefined, undefined)).toBeDefined()
+  })

-  it("accepts `boolean` as third and fourth parameter", () => {
-    expect(robertaProcessing(["sep", 1], ["cls", 2], true, true)).toBeDefined();
-  });
-});
+  it('accepts `boolean` as third and fourth parameter', () => {
+    expect(robertaProcessing(['sep', 1], ['cls', 2], true, true)).toBeDefined()
+  })
+})

-describe("templateProcessing", () => {
-  it("instantiates correctly with only a single template", () => {
-    const processor = templateProcessing("$A $A");
-    expect(processor.constructor.name).toEqual("Processor");
-  });
+describe('templateProcessing', () => {
+  it('instantiates correctly with only a single template', () => {
+    const processor = templateProcessing('$A $A')
+    expect(processor.constructor.name).toEqual('Processor')
+  })

-  it("throws if special tokens are missing", () => {
-    expect(() => templateProcessing("[CLS] $A [SEP]")).toThrow(
-      "Missing SpecialToken(s) with id(s)"
-    );
-  });
+  it('throws if special tokens are missing', () => {
+    expect(() => templateProcessing('[CLS] $A [SEP]')).toThrow('Missing SpecialToken(s) with id(s)')
+  })

-  it("instantiates correctly with both templates", () => {
-    const processor = templateProcessing(
-      "[CLS] $A [SEP]",
-      "[CLS] $A [SEP] $B:1 [SEP]:1",
-      [
-        ["[CLS]", 1],
-        ["[SEP]", 2],
-      ]
-    );
-    expect(processor.constructor.name).toEqual("Processor");
-  });
-});
+  it('instantiates correctly with both templates', () => {
+    const processor = templateProcessing('[CLS] $A [SEP]', '[CLS] $A [SEP] $B:1 [SEP]:1', [
+      ['[CLS]', 1],
+      ['[SEP]', 2],
+    ])
+    expect(processor.constructor.name).toEqual('Processor')
+  })
+})

-describe("sequenceProcessing", () => {
-  it("accepts `PostProcessor[]` as first parameter", () => {
-    const template = templateProcessing("[CLS] $A [SEP]", "[CLS] $A [SEP] $B:1 [SEP]:1", [
-      ["[CLS]", 1],
-      ["[SEP]", 2],
-    ]);
-    const bytelevel = byteLevelProcessing(true);
-    expect(sequenceProcessing([bytelevel, template])).toBeDefined();
-  });
-});
+describe('sequenceProcessing', () => {
+  it('accepts `PostProcessor[]` as first parameter', () => {
+    const template = templateProcessing('[CLS] $A [SEP]', '[CLS] $A [SEP] $B:1 [SEP]:1', [
+      ['[CLS]', 1],
+      ['[SEP]', 2],
+    ])
+    const bytelevel = byteLevelProcessing(true)
+    expect(sequenceProcessing([bytelevel, template])).toBeDefined()
+  })
+})
--- a/bindings/node/lib/bindings/pre-tokenizers.d.ts
+++ b/bindings/node/lib/bindings/pre-tokenizers.d.ts
@ -1,115 +0,0 @@
-/**
- * This class is not supposed to be instantiated directly. Instead, any implementation of a
- * PreTokenizer will return an instance of this class when instantiated.
- */
-// eslint-disable-next-line @typescript-eslint/no-empty-interface
-interface PreTokenizer {
-  preTokenizeString(s: string): [string, [number, number]][];
-}
-
-/**
- * Instantiate a new ByteLevel PreTokenizer
- *
- * @param [addPrefixSpace=true] Whether to add a space to the first word if there isn't already one.
- * This lets us treat `hello` exactly like `say hello`.
- * @returns ByteLevel PreTokenizer.
- * This pre-tokenizer takes care of replacing all bytes of the given string
- * with a corresponding representation, as well as splitting into words.
- */
-export function byteLevelPreTokenizer(addPrefixSpace?: boolean): PreTokenizer;
-
-/**
- * Returns the alphabet used by the ByteLevel PreTokenizer.
- * Since the ByteLevel works as its name suggests, at the byte level, it
- * encodes any byte to one visible character. This means that there is a
- * total of 256 different characters composing this alphabet.
- */
-export function byteLevelAlphabet(): string[];
-
-/**
- * Returns a Whitespace PreTokenizer
- * This pre-tokenizer simply splits using the following regex: `\w+|[^\w\s]+`
- */
-export function whitespacePreTokenizer(): PreTokenizer;
-
-/**
- * Returns a WhitespaceSplit PreTokenizer
- * This pre-tokenizer simply splits on whitespaces only. Works almost like the `.split(' ')`
- * function, except that it accounts for multiple consecutive spaces
- */
-export function whitespaceSplitPreTokenizer(): PreTokenizer;
-
-/**
- * Returns a Split PreTokenizer
- * This versatile pre-tokenizer splits using the provided pattern and
- * according to the provided behavior. The pattern can be inverted by
- * making use of the invert flag.
- *
- * @param [pattern] A pattern used to split the string. Usually a string or a Regex.
- * @param [behavior] The behavior to use when splitting.
- * Choices: "removed", "isolated", "mergedWithPrevious", "mergedWithNext",
- * "contiguous".
- * @param [invert=false] Whether to invert the pattern.
- */
-export function splitPreTokenizer(
-  pattern?: string,
-  behavior?: string,
-  invert?: boolean
-): PreTokenizer;
-
-/**
- * Returns a new Bert PreTokenizer.
- * This pre-tokenizer splits tokens on spaces, and also on punctuation.
- * Each occurrence of a punctuation character will be treated separately.
- */
-export function bertPreTokenizer(): PreTokenizer;
-
-/**
- * Returns a new Metaspace PreTokenizer.
- * This pre-tokenizer replaces any whitespace by the provided replacement character.
- * It then tries to split on these spaces.
- *
- * @param [replacement="▁"] The replacement character. Must be exactly one character.
- * By default we use the `▁` (U+2581) meta symbol (Same as in SentencePiece).
- * @param [addPrefixSpace] Whether to add a space to the first word if there isn't already one.
- * This lets us treat `hello` exactly like `say hello`.
- */
-export function metaspacePreTokenizer(
-  replacement?: string,
-  addPrefixSpace?: boolean
-): PreTokenizer;
-
-/**
- * Returns a CharDelimiterSplit PreTokenizer
- * This pre-tokenizer simply splits on the provided delimiter. Works almost like the `.split(delimiter)`
- * function, except that it accounts for multiple consecutive spaces
- *
- * @param delimiter The delimiter character on which the sequence will be split.
- */
-export function charDelimiterSplitPreTokenizer(delimiter: string): PreTokenizer;
-
-/**
- * Returns a new Punctuation PreTokenizer.
- * This pre-tokenizer splits tokens on punctuation according to the provided behavior.
- * Each occurrence of a punctuation character is treated separately.
- *
- * @param [behavior="isolated"] The behavior to use when splitting.
- * Choices: "removed", "isolated", "mergedWithPrevious", "mergedWithNext",
- * "contiguous"
- */
-export function punctuationPreTokenizer(behavior?: string): PreTokenizer;
-
-/**
- * Returns a new Sequence PreTokenizer.
- * This pre-tokenizer combines other pretokenizers and applies them.
- * sequentially.
- */
-export function sequencePreTokenizer(pretokenizers: PreTokenizer[]): PreTokenizer;
-
-/**
- * Returns a new Digits PreTokenizer.
- * This pre-tokenizer splits on numbers. Optionnaly it can split on individual digits.
- *
- * @param [individualDigits=false] Whether to split on individual digits.
- */
-export function digitsPreTokenizer(individualDigits?: boolean): PreTokenizer;
--- a/bindings/node/lib/bindings/pre-tokenizers.js
+++ b/bindings/node/lib/bindings/pre-tokenizers.js
@ -1,15 +0,0 @@
-const native = require("./native");
-
-module.exports = {
-  byteLevelPreTokenizer: native.pre_tokenizers_ByteLevel,
-  byteLevelAlphabet: native.pre_tokenizers_ByteLevel_Alphabet,
-  whitespacePreTokenizer: native.pre_tokenizers_Whitespace,
-  whitespaceSplitPreTokenizer: native.pre_tokenizers_WhitespaceSplit,
-  bertPreTokenizer: native.pre_tokenizers_BertPreTokenizer,
-  metaspacePreTokenizer: native.pre_tokenizers_Metaspace,
-  charDelimiterSplitPreTokenizer: native.pre_tokenizers_CharDelimiterSplit,
-  punctuationPreTokenizer: native.pre_tokenizers_Punctuation,
-  sequencePreTokenizer: native.pre_tokenizers_Sequence,
-  digitsPreTokenizer: native.pre_tokenizers_Digits,
-  splitPreTokenizer: native.pre_tokenizers_Split,
-};
--- a/bindings/node/lib/bindings/pre-tokenizers.test.ts
+++ b/bindings/node/lib/bindings/pre-tokenizers.test.ts
@ -5,65 +5,65 @@ import {
  sequencePreTokenizer,
  splitPreTokenizer,
  whitespaceSplitPreTokenizer,
-} from "./pre-tokenizers";
+} from '../../'

-describe("byteLevelPreTokenizer", () => {
-  it("instantiates correctly", () => {
-    const processor = byteLevelPreTokenizer();
-    expect(processor.constructor.name).toEqual("PreTokenizer");
-  });
-});
+describe('byteLevelPreTokenizer', () => {
+  it('instantiates correctly', () => {
+    const processor = byteLevelPreTokenizer()
+    expect(processor.constructor.name).toEqual('PreTokenizer')
+  })
+})

-describe("metaspacePreTokenizer", () => {
-  it("instantiates correctly without any parameter", () => {
-    const processor = metaspacePreTokenizer();
-    expect(processor.constructor.name).toEqual("PreTokenizer");
-  });
+describe('metaspacePreTokenizer', () => {
+  it('instantiates correctly without any parameter', () => {
+    const processor = metaspacePreTokenizer()
+    expect(processor.constructor.name).toEqual('PreTokenizer')
+  })

-  it("accepts `undefined` as first parameter", () => {
-    expect(metaspacePreTokenizer(undefined)).toBeDefined();
-  });
+  it('accepts `undefined` as first parameter', () => {
+    expect(metaspacePreTokenizer(undefined)).toBeDefined()
+  })

-  it("accepts `undefined` as second parameter", () => {
-    expect(metaspacePreTokenizer("t", undefined)).toBeDefined();
-  });
+  it('accepts `undefined` as second parameter', () => {
+    expect(metaspacePreTokenizer('t', undefined)).toBeDefined()
+  })

-  it("can pre-tokenize strings", () => {
-    const pretok = metaspacePreTokenizer();
-    expect(pretok.preTokenizeString("Hello there friend")).toEqual([
-      ["▁Hello", [0, 5]],
-      ["▁there", [5, 11]],
-      ["▁friend", [11, 18]],
-    ]);
-  });
-});
+  it('can pre-tokenize strings', () => {
+    const pretok = metaspacePreTokenizer()
+    expect(pretok.preTokenizeString('Hello there friend')).toEqual([
+      ['▁Hello', [0, 5]],
+      ['▁there', [5, 11]],
+      ['▁friend', [11, 18]],
+    ])
+  })
+})

-describe("punctuationPreTokenizer", () => {
-  it("instantiates correctly without any parameter", () => {
-    const processor = punctuationPreTokenizer();
-    expect(processor.constructor.name).toEqual("PreTokenizer");
-  });
+describe('punctuationPreTokenizer', () => {
+  it('instantiates correctly without any parameter', () => {
+    const processor = punctuationPreTokenizer()
+    expect(processor.constructor.name).toEqual('PreTokenizer')
+  })

-  it("instantiates correctly with non-default split delimeter", () => {
-    const processor = punctuationPreTokenizer("removed");
-    expect(processor.constructor.name).toEqual("PreTokenizer");
-  });
-});
+  it('instantiates correctly with non-default split delimeter', () => {
+    const processor = punctuationPreTokenizer('removed')
+    expect(processor.constructor.name).toEqual('PreTokenizer')
+  })
+})

-describe("splitPreTokenizer", () => {
-  it("instantiates correctly with invert parameter", () => {
-    const processor = splitPreTokenizer(" ", "mergedWithPrevious", false);
-    expect(processor.constructor.name).toEqual("PreTokenizer");
-  });
-});
+describe('splitPreTokenizer', () => {
+  it('instantiates correctly with invert parameter', () => {
+    const processor = splitPreTokenizer(' ', 'mergedWithPrevious', false)
+    expect(processor.constructor.name).toEqual('PreTokenizer')
+  })
+})

-describe("sequencePreTokenizer", () => {
-  it("instantiates correctly", () => {
-    const punctuation = punctuationPreTokenizer();
-    const whitespace = whitespaceSplitPreTokenizer();
-    const sequence2 = sequencePreTokenizer([]);
-    expect(sequence2.constructor.name).toEqual("PreTokenizer");
-    const sequence3 = sequencePreTokenizer([punctuation, whitespace]);
-    expect(sequence3.constructor.name).toEqual("PreTokenizer");
-  });
-});
+describe('sequencePreTokenizer', () => {
+  it('instantiates correctly', () => {
+    const punctuation = punctuationPreTokenizer()
+    const whitespace = whitespaceSplitPreTokenizer()
+    const sequence2 = sequencePreTokenizer([])
+    expect(sequence2.constructor.name).toEqual('PreTokenizer')
+    const sequence3 = sequencePreTokenizer([punctuation, whitespace])
+    expect(sequence3.constructor.name).toEqual('PreTokenizer')
+  })
+})
--- a/bindings/node/lib/bindings/raw-encoding.d.ts
+++ b/bindings/node/lib/bindings/raw-encoding.d.ts
@ -1,170 +0,0 @@
-import { PaddingDirection } from "./enums";
-
-/**
- * An Encoding as returned by the Tokenizer
- */
-export interface RawEncoding {
-  /**
-   * Get the encoded tokens corresponding to the word at the given index in one of the input
-   * sequences, with the form [startToken, endToken+1]
-   * @param word The position of a word in one of the input sequences
-   * @param seqId The index of the input sequence that contains said word
-   * @since 0.7.0
-   */
-  wordToTokens(word: number, seqId?: number): [number, number] | undefined;
-
-  /**
-   * Get the offsets of the word at the given index in the input sequence
-   * @param word The index of the word in the input sequence
-   * @param seqId The index of the input sequence that contains said word
-   * @since 0.7.0
-   */
-  wordToChars(word: number, seqId?: number): [number, number] | undefined;
-
-  /**
-   * Get the index of the sequence that contains the given token
-   * @param token The index of the token in the encoded sequence
-   */
-  tokenToSequence(token: number): number | undefined;
-
-  /**
-   * Get the offsets of the token at the given index
-   *
-   * The returned offsets are related to the input sequence that contains the
-   * token.  In order to determine in which input sequence it belongs, you
-   * must call `tokenToSequence`.
-   *
-   * @param token The index of the token in the encoded sequence
-   * @since 0.7.0
-   */
-  tokenToChars(token: number): [number, number] | undefined;
-
-  /**
-   * Get the word that contains the token at the given index
-   *
-   * The returned index is related to the input sequence that contains the
-   * token.  In order to determine in which input sequence it belongs, you
-   * must call `tokenToSequence`.
-   *
-   * @param token The index of the token  in the encoded sequence
-   * @since 0.7.0
-   */
-  tokenToWord(token: number): number | undefined;
-
-  /**
-   * Find the index of the token at the position of the given char
-   * @param pos The position of a char in one of the input strings
-   * @param seqId The index of the input sequence that contains said char
-   * @since 0.6.0
-   */
-  charToToken(pos: number, seqId?: number): number | undefined;
-
-  /**
-   * Get the word that contains the given char
-   * @param pos The position of a char in the input string
-   * @param seqId The index of the input sequence that contains said char
-   * @since 0.7.0
-   */
-  charToWord(pos: number, seqId?: number): number | undefined;
-
-  /**
-   * Returns the attention mask
-   */
-  getAttentionMask(): number[];
-
-  /**
-   * Returns the number of sequences
-   */
-  getNSequences(): number;
-
-  /**
-   * Set the sequence id for this encoding
-   */
-  setSequenceId(seqId: number): undefined;
-
-  /**
-   * Returns the tokenized ids
-   */
-  getIds(): number[];
-
-  /**
-   * Returns the number of tokens
-   */
-  getLength(): number;
-
-  /**
-   * Returns the offsets
-   */
-  getOffsets(): [number, number][];
-
-  /**
-   * Returns the overflowing encodings, after truncation
-   */
-  getOverflowing(): RawEncoding[];
-
-  /**
-   * Returns the special tokens mask
-   */
-  getSpecialTokensMask(): number[];
-
-  /**
-   * Returns the tokenized string
-   */
-  getTokens(): string[];
-
-  /**
-   * Returns the type ids
-   */
-  getTypeIds(): number[];
-
-  /**
-   * The tokenized words indexes
-   * @since 0.6.0
-   */
-  getWordIds(): (number | undefined)[];
-
-  /**
-   * The sequences indices
-   */
-  getSequenceIds(): (number | undefined)[];
-
-  /**
-   * Pad the current Encoding at the given length
-   *
-   * @param length The length at which to pad
-   * @param [options] Padding options
-   */
-  pad(length: number, options?: PaddingOptions): void;
-
-  /**
-   * Truncate the current Encoding at the given max_length
-   *
-   * @param length The maximum length to be kept
-   * @param [stride=0] The length of the previous first sequence
-   * to be included in the overflowing sequence
-   * @param [direction='right'] Truncate direction
-   */
-  truncate(length: number, stride?: number, direction?: string): void;
-}
-
-interface PaddingOptions {
-  /**
-   * @default "right"
-   */
-  direction?: PaddingDirection;
-  /**
-   * The index to be used when padding
-   * @default 0
-   */
-  padId?: number;
-  /**
-   * The type index to be used when padding
-   * @default 0
-   */
-  padTypeId?: number;
-  /**
-   * The pad token to be used when padding
-   * @default "[PAD]"
-   */
-  padToken?: string;
-}
--- a/bindings/node/lib/bindings/raw-encoding.test.ts
+++ b/bindings/node/lib/bindings/raw-encoding.test.ts
@ -1,262 +0,0 @@
-import { promisify } from "util";
-
-import { PaddingDirection } from "./enums";
-import { Model, WordPiece, WordPieceOptions } from "./models";
-import {
-  punctuationPreTokenizer,
-  sequencePreTokenizer,
-  whitespacePreTokenizer,
-} from "./pre-tokenizers";
-import { RawEncoding } from "./raw-encoding";
-import { EncodeOptions, InputSequence, Tokenizer } from "./tokenizer";
-
-const MOCKS_DIR = __dirname + "/__mocks__";
-
-describe("Can modify pretokenizers on the fly", () => {
-  let encoding: RawEncoding;
-  let encode: (
-    sequence: InputSequence,
-    pair?: InputSequence | null,
-    options?: EncodeOptions | null
-  ) => Promise<RawEncoding>;
-  let tokenizer: Tokenizer;
-
-  beforeAll(async () => {
-    const model = await promisify<string, WordPieceOptions, Model>(WordPiece.fromFile)(
-      `${MOCKS_DIR}/vocab.txt`,
-      {
-        continuingSubwordPrefix: "##",
-      }
-    );
-
-    tokenizer = new Tokenizer(model);
-    encode = promisify(tokenizer.encode.bind(tokenizer));
-  });
-
-  it("Can change pre tokenizer", async () => {
-    const input = "my  name is john.!?";
-    tokenizer.setPreTokenizer(sequencePreTokenizer([whitespacePreTokenizer()]));
-
-    encoding = await encode(input, null);
-    expect(encoding.getIds()).toEqual([0, 1, 2, 3, 4, 8]);
-
-    // Change pre tokenizer
-    tokenizer.setPreTokenizer(
-      sequencePreTokenizer([whitespacePreTokenizer(), punctuationPreTokenizer()])
-    );
-
-    encoding = await encode(input, null);
-    expect(encoding.getIds()).toEqual([0, 1, 2, 3, 4, 8, 8, 8]);
-  });
-});
-
-describe("RawEncoding", () => {
-  const originalString = "my name is john";
-  const originalPairString = "what is yours?";
-  let encoding: RawEncoding;
-  let encodingDual: RawEncoding;
-  let encode: (
-    sequence: InputSequence,
-    pair?: InputSequence | null,
-    options?: EncodeOptions | null
-  ) => Promise<RawEncoding>;
-
-  beforeAll(async () => {
-    const model = await promisify<string, WordPieceOptions, Model>(WordPiece.fromFile)(
-      `${MOCKS_DIR}/vocab.txt`,
-      {
-        continuingSubwordPrefix: "##",
-      }
-    );
-
-    const tokenizer = new Tokenizer(model);
-    tokenizer.setPreTokenizer(whitespacePreTokenizer());
-    encode = promisify(tokenizer.encode.bind(tokenizer));
-  });
-
-  beforeEach(async () => {
-    encoding = await encode(originalString, null);
-    encodingDual = await encode(originalString, originalPairString);
-  });
-
-  it("has a list of defined methods", async () => {
-    expect(typeof encoding.wordToTokens).toBe("function");
-    expect(typeof encoding.wordToChars).toBe("function");
-    expect(typeof encoding.tokenToChars).toBe("function");
-    expect(typeof encoding.tokenToWord).toBe("function");
-    expect(typeof encoding.charToToken).toBe("function");
-    expect(typeof encoding.charToWord).toBe("function");
-    expect(typeof encoding.getAttentionMask).toBe("function");
-    expect(typeof encoding.getIds).toBe("function");
-    expect(typeof encoding.getLength).toBe("function");
-    expect(typeof encoding.getOffsets).toBe("function");
-    expect(typeof encoding.getOverflowing).toBe("function");
-    expect(typeof encoding.getSpecialTokensMask).toBe("function");
-    expect(typeof encoding.getTokens).toBe("function");
-    expect(typeof encoding.getTypeIds).toBe("function");
-    expect(typeof encoding.getWordIds).toBe("function");
-    expect(typeof encoding.getSequenceIds).toBe("function");
-    expect(typeof encoding.pad).toBe("function");
-    expect(typeof encoding.truncate).toBe("function");
-  });
-
-  describe("truncate", () => {
-    it("accepts `undefined` as second parameter", () => {
-      expect(encoding.truncate(10, undefined)).toBeUndefined();
-    });
-    it("should throw an Error on invalid direction", () => {
-      const t = () => encoding.truncate(10, 3, "not_valid");
-      expect(t).toThrow(`Invalid truncation direction value : not_valid`);
-    });
-  });
-
-  describe("getWordIds", () => {
-    it("returns the correct list of indexes", () => {
-      const indexes = encoding.getWordIds();
-      expect(indexes).toEqual([0, 1, 2, 3, 3]);
-    });
-  });
-
-  describe("getSequenceIds", () => {
-    it("returns the correct list of indexes", () => {
-      expect(encoding.getSequenceIds()).toEqual([0, 0, 0, 0, 0]);
-      expect(encodingDual.getSequenceIds()).toEqual([0, 0, 0, 0, 0, 1, 1, 1, 1]);
-    });
-  });
-
-  describe("wordToTokens", () => {
-    it("returns the correct indexes", () => {
-      const indexes = encoding.wordToTokens(3);
-      expect(indexes).toEqual([3, 5]);
-    });
-
-    it("returns the corrent indexes with pair sequences", () => {
-      expect(encodingDual.wordToTokens(3, 0)).toEqual([3, 5]);
-      expect(encodingDual.wordToTokens(3, 1)).toEqual([8, 9]);
-    });
-
-    it("returns undefined when out of range word", () => {
-      const index = encoding.wordToTokens(100);
-      expect(index).toBeUndefined();
-    });
-  });
-
-  describe("wordToChars", () => {
-    it("returns the correct offsets", () => {
-      const offsets = encoding.wordToChars(3);
-      expect(offsets).toEqual([11, 15]);
-    });
-
-    it("returns the correct offsets with pair sequences", () => {
-      expect(encodingDual.wordToChars(3, 0)).toEqual([11, 15]);
-      expect(encodingDual.wordToChars(3, 1)).toEqual([13, 14]);
-    });
-
-    it("returns undefined when out of range word", () => {
-      const offsets = encoding.wordToChars(100);
-      expect(offsets).toBeUndefined();
-    });
-  });
-
-  describe("tokenToSequence", () => {
-    it("returns the correct value", () => {
-      expect(encodingDual.tokenToSequence(4)).toEqual(0);
-      expect(encodingDual.tokenToSequence(6)).toEqual(1);
-    });
-  });
-
-  describe("tokenToChars", () => {
-    it("returns the correct offsets", () => {
-      const offsets = encoding.tokenToChars(3);
-      expect(offsets).toEqual([11, 13]);
-    });
-
-    it("returns the correct offsets with pair sequences", () => {
-      expect(encodingDual.tokenToChars(3)).toEqual([11, 13]);
-      expect(encodingDual.tokenToChars(7)).toEqual([8, 13]);
-    });
-
-    it("returns undefined when out of range token", () => {
-      const offsets = encoding.tokenToChars(100);
-      expect(offsets).toBeUndefined();
-    });
-  });
-
-  describe("tokenToWord", () => {
-    it("returns the correct index", () => {
-      const index = encoding.tokenToWord(3);
-      expect(index).toEqual(3);
-    });
-
-    it("returns the correct index with pair sequences", () => {
-      expect(encodingDual.tokenToWord(3)).toEqual(3);
-      expect(encodingDual.tokenToWord(7)).toEqual(2);
-    });
-
-    it("returns undefined when out of range token", () => {
-      const index = encoding.tokenToWord(100);
-      expect(index).toBeUndefined();
-    });
-  });
-
-  describe("charToToken", () => {
-    it("returns the correct index", () => {
-      const index = encoding.charToToken(3);
-      expect(index).toEqual(1);
-    });
-
-    it("returns the correct index with pair sequences", () => {
-      expect(encodingDual.charToToken(3, 0)).toEqual(1);
-      expect(encodingDual.charToToken(3, 1)).toEqual(5);
-    });
-
-    it("returns undefined when out of range char", () => {
-      const index = encoding.charToToken(100);
-      expect(index).toBeUndefined();
-    });
-  });
-
-  describe("charToWord", () => {
-    it("returns the correct index", () => {
-      const index = encoding.charToWord(3);
-      expect(index).toEqual(1);
-    });
-
-    it("returns the correct index with pair sequences", () => {
-      expect(encodingDual.charToWord(3, 0)).toEqual(1);
-      expect(encodingDual.charToWord(3, 1)).toEqual(0);
-    });
-
-    it("returns undefined when out of range char", () => {
-      const index = encoding.charToWord(100);
-      expect(index).toBeUndefined();
-    });
-  });
-
-  describe("pad", () => {
-    it("works correctly with only one parameter", () => {
-      encoding.pad(10);
-      expect(encoding.getTokens()).toHaveLength(10);
-    });
-
-    it("accepts `undefined` as second parameter", () => {
-      encoding.pad(10, undefined);
-      expect(encoding.getTokens()).toHaveLength(10);
-    });
-
-    it("accepts options as second parameter", () => {
-      encoding.pad(10, {
-        direction: PaddingDirection.Left,
-        padToken: "[PA]",
-        padTypeId: 10,
-        padId: 400,
-      });
-
-      const tokens = encoding.getTokens();
-      expect(tokens).toHaveLength(10);
-      expect(tokens[0]).toBe("[PA]");
-      expect(encoding.getTypeIds()[0]).toBe(10);
-      expect(encoding.getIds()[0]).toBe(400);
-    });
-  });
-});
--- a/bindings/node/lib/bindings/tokenizer.d.ts
+++ b/bindings/node/lib/bindings/tokenizer.d.ts
@ -1,444 +0,0 @@
-import { Decoder } from "./decoders";
-import { PaddingDirection, TruncationDirection, TruncationStrategy } from "./enums";
-import { Model } from "./models";
-import { Normalizer } from "./normalizers";
-import { PostProcessor } from "./post-processors";
-import { PreTokenizer } from "./pre-tokenizers";
-import { RawEncoding } from "./raw-encoding";
-import { Trainer } from "./trainers";
-
-export interface FromPretrainedOptions {
-  /**
-   * The revision to download
-   * @default "main"
-   */
-  revision?: string;
-  /**
-   * The auth token to use to access private repositories on the Hugging Face Hub
-   * @default undefined
-   */
-  authToken?: string;
-}
-
-export interface TruncationOptions {
-  /**
-   * The length of the previous sequence to be included in the overflowing sequence
-   * @default 0
-   */
-  stride?: number;
-  /**
-   * Strategy to use:
-   * - `TruncationStrategy.LongestFirst` Iteratively reduce the inputs sequence until the input is under max_length
-   * starting from the longest one at each token (when there is a pair of input sequences).
-   * - `TruncationStrategy.OnlyFirst` Only truncate the first sequence.
-   * - `TruncationStrategy.OnlySecond` Only truncate the second sequence.
-   * @default TruncationStrategy.LongestFirst
-   */
-  strategy?: TruncationStrategy;
-
-  /**
-   * Which side to truncate
-   * @default TruncationDirection.Left
-   */
-  direction?: TruncationDirection;
-}
-
-export interface TruncationConfiguration extends Required<TruncationOptions> {
-  /**
-   * The maximum length at which to truncate
-   */
-  maxLength: number;
-}
-
-export type PaddingConfiguration = Required<
-  Omit<PaddingOptions, "maxLength" | "padToMultipleOf">
-> &
-  Pick<PaddingOptions, "maxLength" | "padToMultipleOf">;
-
-export interface PaddingOptions {
-  /**
-   * @default PaddingDirection.Right
-   */
-  direction?: PaddingDirection;
-  /**
-   * Padding length. If not provided:
-   * - Will default to the longest sequence when encoding in batch.
-   * - No padding will be applied when single encoding
-   */
-  maxLength?: number;
-  /**
-   * If specified, the padding will snap to a multiple of the given value.
-   * @default undefined
-   */
-  padToMultipleOf?: number;
-  /**
-   * The index to be used when padding
-   * @default 0
-   */
-  padId?: number;
-  /**
-   * The type index to be used when padding
-   * @default 0
-   */
-  padTypeId?: number;
-  /**
-   * The pad token to be used when padding
-   * @default "[PAD]"
-   */
-  padToken?: string;
-}
-
-export type TextInputSequence = string;
-export type PreTokenizedInputSequence = string[];
-export type InputSequence = TextInputSequence | PreTokenizedInputSequence;
-
-export type TextEncodeInput = TextInputSequence | [TextInputSequence, TextInputSequence];
-export type PreTokenizedEncodeInput =
-  | PreTokenizedInputSequence
-  | [PreTokenizedInputSequence, PreTokenizedInputSequence];
-export type EncodeInput = TextEncodeInput | PreTokenizedEncodeInput;
-
-export interface EncodeOptions {
-  /**
-   * Whether the given sequence is pre-tokenized
-   * @default false
-   */
-  isPretokenized?: boolean;
-  /**
-   * Whether we should add special tokens
-   * @default true
-   */
-  addSpecialTokens?: boolean;
-}
-
-/**
- * A Tokenizer works as a pipeline, it processes some raw text as input and outputs
- * an `Encoding`.
- * The various steps of the pipeline are:
- * 1. The `Normalizer`: in charge of normalizing the text. Common examples of
- *    normalization are the unicode normalization standards, such as NFD or NFKC.
- * 2. The `PreTokenizer`: in charge of creating initial words splits in the text.
- *    The most common way of splitting text is simply on whitespace.
- * 3. The `Model`: in charge of doing the actual tokenization. An example of a
- *    `Model` would be `BPE` or `WordPiece`.
- * 4. The `PostProcessor`: in charge of post-processing the `Encoding` to add anything
- *    relevant that, for example, a language model would need, such as special tokens.
- */
-export class Tokenizer {
-  /**
-   * Instantiate a new Tokenizer using the given Model
-   */
-  constructor(model: Model);
-
-  /**
-   * Instantiate a new Tokenizer from the given file
-   * @param path Path to a file containing a Tokenizer
-   */
-  static fromFile(path: string): Tokenizer;
-
-  /**
-   * Instantiate a new Tokenizer from the given JSON string
-   * @param s A JSON string representation of the Tokenizer
-   */
-  static fromString(s: string): Tokenizer;
-
-  /**
-   * Instantiate a new Tokenizer from an existing file on the
-   * Hugging Face Hub. Any model repo containing a `tokenizer.json`
-   * can be used here.
-   * @param identifier A model identifier on the Hub
-   * @param options Additional options
-   */
-  static fromPretrained(s: string, options?: FromPretrainedOptions): Tokenizer;
-
-  /**
-   * Add the given tokens to the vocabulary
-   *
-   * @param tokens A list of tokens to add to the vocabulary.
-   * Each token can either be a string, or an instance of {@link AddedToken}.
-   * @returns The number of tokens that were added to the vocabulary
-   */
-  addTokens(tokens: (string | AddedToken)[]): number;
-
-  /**
-   * Add the given special tokens to the vocabulary, and treat them as special tokens.
-   * The special tokens will never be processed by the model, and will be removed while decoding.
-   *
-   * @param tokens The list of special tokens to add.
-   * Each token can either be a string or an instance of {@link AddedToken}.
-   * @returns The number of tokens that were added to the vocabulary
-   */
-  addSpecialTokens(tokens: (string | AddedToken)[]): number;
-
-  /**
-   * Encode the given sequence
-   *
-   * @param sequence The sequence to encode
-   * @param pair The optional pair sequence
-   * @param addSpecialTokens Whether to add the special tokens while encoding
-   * @param __callback Callback called when encoding is complete
-   */
-  encode(
-    sequence: InputSequence,
-    pair?: InputSequence | null,
-    options?: EncodeOptions | null, // |(err: Error, encoding: RawEncoding) => void,
-    __callback?: (err: Error, encoding: RawEncoding) => void
-  ): void;
-
-  /**
-   * Encode the given sequences or pair of sequences
-   *
-   * @param sequences A list of sequences or pair of sequences. The list can contain both at the same time.
-   * @param addSpecialTokens Whether to add the special tokens while encoding
-   * @param __callback Callback called when encoding is complete
-   */
-  encodeBatch(
-    inputs: EncodeInput[],
-    options?: EncodeOptions | null, // (err: Error, encodings: RawEncoding[]) => void,
-    __callback?: (err: Error, encodings: RawEncoding[]) => void
-  ): void;
-
-  /**
-   * Decode the given list of ids to a string sequence
-   *
-   * @param ids A list of ids to be decoded
-   * @param skipSpecialTokens Whether to remove all the special tokens from the output string
-   * @param __callback Callback called with decoded string
-   */
-  decode(
-    ids: number[],
-    skipSpecialTokens: boolean,
-    __callback: (err: Error, encodings: string) => void
-  ): void;
-
-  /**
-   * Decode the list of sequences to a list of string sequences
-   *
-   * @param sequences A list of sequence of ids to be decoded
-   * @param skipSpecialTokens Whether to remove all the special tokens from the output strings
-   * @param __callback Callback called with decoded strings
-   */
-  decodeBatch(
-    sequences: number[][],
-    skipSpecialTokens: boolean,
-    __callback: (err: Error, encodings: string[]) => void
-  ): void[];
-
-  /**
-   * Convert the given token id to its corresponding string
-   *
-   * @param id The token id to convert
-   * @returns The corresponding string if it exists
-   */
-  idToToken(id: number): string | undefined;
-
-  /**
-   * Convert the given token to its corresponding id
-   *
-   * @param token The token to convert
-   * @returns The corresponding id if it exists
-   */
-  tokenToId(token: string): number | undefined;
-
-  /**
-   * Enable/change padding with specified options
-   * @param [options] Padding options
-   */
-  setPadding(options?: PaddingOptions): PaddingConfiguration;
-
-  /**
-   * Disable padding
-   */
-  disablePadding(): void;
-
-  /**
-   * Enable/change truncation with specified options
-   *
-   * @param maxLength The maximum length at which to truncate
-   * @param [options] Additional truncation options
-   */
-  setTruncation(maxLength: number, options?: TruncationOptions): TruncationConfiguration;
-
-  /**
-   * Disable truncation
-   */
-  disableTruncation(): void;
-
-  /**
-   * Train the model using the given files
-   *
-   * @param trainer Trainer to use
-   * @param files List of files to use
-   */
-  train(trainer: Trainer, files: string[]): void;
-
-  /**
-   * Returns the vocabulary
-   *
-   * @param [withAddedTokens=true] Whether to include the added tokens in the vocabulary
-   */
-  getVocab(withAddedTokens?: boolean): { [token: string]: number };
-
-  /**
-   * Returns the size of the vocabulary
-   *
-   * @param [withAddedTokens=true] Whether to include the added tokens in the vocabulary's size
-   */
-  getVocabSize(withAddedTokens?: boolean): number;
-
-  /**
-   * Returns the number of encoding tasks running currently
-   */
-  runningTasks(): number;
-
-  /**
-   * Returns the model in use
-   */
-  getModel(): Model;
-
-  /**
-   * Change the model to use with this Tokenizer
-   * @param model New model to use
-   * @throws Will throw an error if any task is running
-   * @throws Will throw an error if the model is already used in another Tokenizer
-   */
-  setModel(model: Model): void;
-
-  /**
-   * Returns the normalizer in use
-   */
-  getNormalizer(): Normalizer | undefined;
-
-  /**
-   * Change the normalizer to use with this Tokenizer
-   * @param normalizer New normalizer to use
-   * @throws Will throw an error if any task is running
-   * @throws Will throw an error if the normalizer is already used in another Tokenizer
-   */
-  setNormalizer(normalizer: Normalizer): void;
-
-  /**
-   * Returns the pre-tokenizer in use
-   */
-  getPreTokenizer(): PreTokenizer | undefined;
-
-  /**
-   * Change the pre-tokenizer to use with this Tokenizer
-   * @param preTokenizer New pre-tokenizer to use
-   * @throws Will throw an error if any task is running
-   * @throws Will throw an error if the pre-tokenizer is already used in another Tokenizer
-   */
-  setPreTokenizer(preTokenizer: PreTokenizer): void;
-
-  /**
-   * Returns the post-processor in use
-   */
-  getPostProcessor(): PostProcessor | undefined;
-
-  /**
-   * Change the post-processor to use with this Tokenizer
-   * @param postProcessor New post-processor to use
-   * @throws Will throw an error if any task is running
-   * @throws Will throw an error if the post-processor is already used in another Tokenizer
-   */
-  setPostProcessor(processor: PostProcessor): void;
-
-  /**
-   * Returns the decoder in use
-   */
-  getDecoder(): Decoder | undefined;
-
-  /**
-   * Change the decoder to use with this Tokenizer
-   * @param decoder New decoder to use
-   * @throws Will throw an error if any task is running
-   * @throws Will throw an error if the decoder is already used in another Tokenizer
-   */
-  setDecoder(decoder: Decoder): void;
-
-  /**
-   * Apply all the post-processing steps to the given encodings.
-   * The various steps are:
-   * 1. Truncate according to global params (@see setTruncation)
-   * 2. Apply the PostProcessor
-   * 3. Pad according to global params (@see setPadding)
-   * @param encoding The main Encoding to post process
-   * @param [pair] An optional pair Encoding
-   * @param [addSpecialTokens=true] Whether to add special tokens. Default to `true`.
-   * @since 0.6.0
-   */
-  postProcess(
-    encoding: RawEncoding,
-    pair?: RawEncoding,
-    addSpecialTokens?: boolean
-  ): RawEncoding;
-
-  /**
-   * Save the Tokenizer as JSON to the given path
-   * @param path Path to the JSON file to write
-   * @param [pretty=false] Whether the JSON string should be prettified
-   */
-  save(path: string, pretty?: boolean): void;
-
-  /**
-   * Get a serialized JSON version of the Tokenizer as a string
-   * @param [pretty=false] Whether the JSON string should be prettified
-   */
-  toString(pretty?: boolean): string;
-}
-
-/**
- * Options used to construct an AddedToken
- * @since 0.6.0
- */
-export interface AddedTokenOptions {
-  /**
-   * Whether this token should strip all potential whitespaces on the left side.
-   * If True, this token will greedily match any whitespace on the left and then strip
-   * them out.
-   * @default False
-   */
-  leftStrip?: boolean;
-  /**
-   * Whether this token should strip all potential whitespaces on the right side.
-   * If True, this token will greedily match any whitespace on the right and then strip
-   * them out.
-   * @default False
-   */
-  rightStrip?: boolean;
-  /**
-   * Whether this token should only match against single word.
-   * If True, this token will never match inside of a word.
-   * @default False
-   */
-  singleWord?: boolean;
-  /**
-   * Whether this token should match on the normalized version of the text. For example
-   * with the added token `yesterday` and a normalizer in charge of lowercasing the text,
-   * the input `I saw a lion Yesterday` would match the token.
-   * This is False for special tokens by default, true otherwise
-   * @default True
-   */
-  normalized?: boolean;
-}
-
-/**
- * AddedToken represents a token to be added to a Tokenizer.
- * An AddedToken can have special options defining the way it should behave.
- *
- * @since 0.6.0
- */
-export class AddedToken {
-  /**
-   * Instantiate a new AddedToken
-   * @param content The content of the token
-   * @param special Whether this is a special token
-   * @param [options] Options for the token
-   */
-  constructor(content: string, special: boolean, options?: AddedTokenOptions);
-
-  /**
-   * Get the content of the AddedToken
-   */
-  getContent(): string;
-}
--- a/bindings/node/lib/bindings/tokenizer.js
+++ b/bindings/node/lib/bindings/tokenizer.js
@ -1,12 +0,0 @@
-const native = require("./native");
-
-class Tokenizer extends native.tokenizer_Tokenizer {
-  static fromString = native.tokenizer_Tokenizer_from_string;
-  static fromFile = native.tokenizer_Tokenizer_from_file;
-  static fromPretrained = native.tokenizer_Tokenizer_from_pretrained;
-}
-
-module.exports = {
-  AddedToken: native.tokenizer_AddedToken,
-  Tokenizer,
-};
--- a/bindings/node/lib/bindings/tokenizer.test.ts
+++ b/bindings/node/lib/bindings/tokenizer.test.ts
@ -1,222 +1,189 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable @typescript-eslint/no-empty-function */

-import { promisify } from "util";
+import { TruncationStrategy, BPE, Encoding, AddedToken, Tokenizer } from '../../'

-import { PaddingDirection, TruncationDirection, TruncationStrategy } from "./enums";
-import { BPE } from "./models";
-import { RawEncoding } from "./raw-encoding";
-import {
-  AddedToken,
-  EncodeInput,
-  EncodeOptions,
-  InputSequence,
-  PaddingConfiguration,
-  Tokenizer,
-  TruncationConfiguration,
-} from "./tokenizer";
-
-// jest.mock('../bindings/tokenizer');
-// jest.mock('../bindings/models', () => ({
+// jest.mock('../../bindings/tokenizer');
+// jest.mock('../../bindings/models', () => ({
 //   __esModule: true,
 //   Model: jest.fn()
 // }));

 // Or:
-// jest.mock('../bindings/models', () => {
-//   return require('../bindings/__mocks__/models');
+// jest.mock('../../bindings/models', () => {
+//   return require('../../bindings/__mocks__/models');
 // });

 // const TokenizerMock = mocked(Tokenizer);

-describe("AddedToken", () => {
-  it("instantiates with only content", () => {
-    const addToken = new AddedToken("test", false);
-    expect(addToken.constructor.name).toEqual("AddedToken");
-  });
+describe('AddedToken', () => {
+  it('instantiates with only content', () => {
+    const addToken = new AddedToken('test', false)
+    expect(addToken.constructor.name).toEqual('AddedToken')
+  })

-  it("instantiates with empty options", () => {
-    const addToken = new AddedToken("test", false, {});
-    expect(addToken.constructor.name).toEqual("AddedToken");
-  });
+  it('instantiates with empty options', () => {
+    const addToken = new AddedToken('test', false, {})
+    expect(addToken.constructor.name).toEqual('AddedToken')
+  })

-  it("instantiates with options", () => {
-    const addToken = new AddedToken("test", false, {
+  it('instantiates with options', () => {
+    const addToken = new AddedToken('test', false, {
      leftStrip: true,
      rightStrip: true,
      singleWord: true,
-    });
-    expect(addToken.constructor.name).toEqual("AddedToken");
-  });
+    })
+    expect(addToken.constructor.name).toEqual('AddedToken')
+  })

-  describe("getContent", () => {
-    it("returns the string content of AddedToken", () => {
-      const addedToken = new AddedToken("test", false);
-      expect(addedToken.getContent()).toEqual("test");
-    });
-  });
-});
+  describe('getContent', () => {
+    it('returns the string content of AddedToken', () => {
+      const addedToken = new AddedToken('test', false)
+      expect(addedToken.getContent()).toEqual('test')
+    })
+  })
+})

-describe("Tokenizer", () => {
-  it("has expected methods", () => {
-    const model = BPE.empty();
-    const tokenizer = new Tokenizer(model);
+describe('Tokenizer', () => {
+  it('has expected methods', () => {
+    const model = BPE.empty()
+    const tokenizer = new Tokenizer(model)

-    expect(typeof Tokenizer.fromFile).toBe("function");
-    expect(typeof Tokenizer.fromString).toBe("function");
-    expect(typeof Tokenizer.fromPretrained).toBe("function");
+    expect(typeof Tokenizer.fromFile).toBe('function')
+    expect(typeof Tokenizer.fromString).toBe('function')
+    // expect(typeof Tokenizer.fromPretrained).toBe('function')

-    expect(typeof tokenizer.addSpecialTokens).toBe("function");
-    expect(typeof tokenizer.addTokens).toBe("function");
-    expect(typeof tokenizer.decode).toBe("function");
-    expect(typeof tokenizer.decodeBatch).toBe("function");
-    expect(typeof tokenizer.disablePadding).toBe("function");
-    expect(typeof tokenizer.disableTruncation).toBe("function");
-    expect(typeof tokenizer.encode).toBe("function");
-    expect(typeof tokenizer.encodeBatch).toBe("function");
-    expect(typeof tokenizer.getDecoder).toBe("function");
-    expect(typeof tokenizer.getNormalizer).toBe("function");
-    expect(typeof tokenizer.getPostProcessor).toBe("function");
-    expect(typeof tokenizer.getPreTokenizer).toBe("function");
-    expect(typeof tokenizer.getVocab).toBe("function");
-    expect(typeof tokenizer.getVocabSize).toBe("function");
-    expect(typeof tokenizer.idToToken).toBe("function");
-    expect(typeof tokenizer.runningTasks).toBe("function");
-    expect(typeof tokenizer.save).toBe("function");
-    expect(typeof tokenizer.setDecoder).toBe("function");
-    expect(typeof tokenizer.setModel).toBe("function");
-    expect(typeof tokenizer.setNormalizer).toBe("function");
-    expect(typeof tokenizer.setPadding).toBe("function");
-    expect(typeof tokenizer.setPostProcessor).toBe("function");
-    expect(typeof tokenizer.setPreTokenizer).toBe("function");
-    expect(typeof tokenizer.setTruncation).toBe("function");
-    expect(typeof tokenizer.tokenToId).toBe("function");
-    expect(typeof tokenizer.toString).toBe("function");
-    expect(typeof tokenizer.train).toBe("function");
-  });
+    expect(typeof tokenizer.addSpecialTokens).toBe('function')
+    expect(typeof tokenizer.addTokens).toBe('function')
+    expect(typeof tokenizer.decode).toBe('function')
+    expect(typeof tokenizer.decodeBatch).toBe('function')
+    expect(typeof tokenizer.disablePadding).toBe('function')
+    expect(typeof tokenizer.disableTruncation).toBe('function')
+    expect(typeof tokenizer.encode).toBe('function')
+    expect(typeof tokenizer.encodeBatch).toBe('function')
+    expect(typeof tokenizer.getDecoder).toBe('function')
+    expect(typeof tokenizer.getNormalizer).toBe('function')
+    expect(typeof tokenizer.getPostProcessor).toBe('function')
+    expect(typeof tokenizer.getPreTokenizer).toBe('function')
+    expect(typeof tokenizer.getVocab).toBe('function')
+    expect(typeof tokenizer.getVocabSize).toBe('function')
+    expect(typeof tokenizer.idToToken).toBe('function')
+    expect(typeof tokenizer.runningTasks).toBe('function')
+    expect(typeof tokenizer.save).toBe('function')
+    expect(typeof tokenizer.setDecoder).toBe('function')
+    expect(typeof tokenizer.setModel).toBe('function')
+    expect(typeof tokenizer.setNormalizer).toBe('function')
+    expect(typeof tokenizer.setPadding).toBe('function')
+    expect(typeof tokenizer.setPostProcessor).toBe('function')
+    expect(typeof tokenizer.setPreTokenizer).toBe('function')
+    expect(typeof tokenizer.setTruncation).toBe('function')
+    expect(typeof tokenizer.tokenToId).toBe('function')
+    expect(typeof tokenizer.toString).toBe('function')
+    expect(typeof tokenizer.train).toBe('function')
+  })

-  it("can be instantiated from the hub", async () => {
-    let tokenizer: Tokenizer;
-    let encode: (
-      sequence: InputSequence,
-      pair?: InputSequence | null,
-      options?: EncodeOptions | null
-    ) => Promise<RawEncoding>;
-    let output: RawEncoding;
+  // it('can be instantiated from the hub', async () => {
+  //   let tokenizer: Tokenizer
+  //   let output: Encoding

-    tokenizer = Tokenizer.fromPretrained("bert-base-cased");
-    encode = promisify(tokenizer.encode.bind(tokenizer));
-    output = await encode("Hey there dear friend!", null, { addSpecialTokens: false });
-    expect(output.getTokens()).toEqual(["Hey", "there", "dear", "friend", "!"]);
+  //   tokenizer = Tokenizer.fromPretrained('bert-base-cased')
+  //   output = await tokenizer.encode('Hey there dear friend!', null, { addSpecialTokens: false })
+  //   expect(output.getTokens()).toEqual(['Hey', 'there', 'dear', 'friend', '!'])

-    tokenizer = Tokenizer.fromPretrained("anthony/tokenizers-test");
-    encode = promisify(tokenizer.encode.bind(tokenizer));
-    output = await encode("Hey there dear friend!", null, { addSpecialTokens: false });
-    expect(output.getTokens()).toEqual(["hey", "there", "dear", "friend", "!"]);
+  //   tokenizer = Tokenizer.fromPretrained('anthony/tokenizers-test')
+  //   output = await tokenizer.encode('Hey there dear friend!', null, { addSpecialTokens: false })
+  //   expect(output.getTokens()).toEqual(['hey', 'there', 'dear', 'friend', '!'])

-    tokenizer = Tokenizer.fromPretrained("anthony/tokenizers-test", {
-      revision: "gpt-2",
-    });
-    encode = promisify(tokenizer.encode.bind(tokenizer));
-    output = await encode("Hey there dear friend!", null, { addSpecialTokens: false });
-    expect(output.getTokens()).toEqual(["Hey", "Ġthere", "Ġdear", "Ġfriend", "!"]);
-  });
+  //   tokenizer = Tokenizer.fromPretrained('anthony/tokenizers-test', {
+  //     revision: 'gpt-2',
+  //   })
+  //   output = await tokenizer.encode('Hey there dear friend!', null, { addSpecialTokens: false })
+  //   expect(output.getTokens()).toEqual(['Hey', 'Ġthere', 'Ġdear', 'Ġfriend', '!'])
+  // }, 10000)

-  describe("addTokens", () => {
-    it("accepts a list of string as new tokens when initial model is empty", () => {
-      const model = BPE.empty();
-      const tokenizer = new Tokenizer(model);
+  describe('addTokens', () => {
+    it('accepts a list of string as new tokens when initial model is empty', () => {
+      const model = BPE.empty()
+      const tokenizer = new Tokenizer(model)

-      const nbAdd = tokenizer.addTokens(["my", "name", "is", "john", "pair"]);
-      expect(nbAdd).toBe(5);
-    });
+      const nbAdd = tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
+      expect(nbAdd).toBe(5)
+    })

-    it("accepts a list of AddedToken as new tokens when initial model is empty", () => {
-      const model = BPE.empty();
-      const tokenizer = new Tokenizer(model);
-      const addedToken = new AddedToken("test", false);
+    it('accepts a list of AddedToken as new tokens when initial model is empty', () => {
+      const model = BPE.empty()
+      const tokenizer = new Tokenizer(model)
+      const addedToken = new AddedToken('test', false)

-      const nbAdd = tokenizer.addTokens([addedToken]);
-      expect(nbAdd).toBe(1);
-    });
-  });
-
-  describe("encode", () => {
-    let tokenizer: Tokenizer;
-    let encode: (
-      sequence: InputSequence,
-      pair?: InputSequence | null,
-      options?: EncodeOptions | null
-    ) => Promise<RawEncoding>;
-    let encodeBatch: (
-      inputs: EncodeInput[],
-      options?: EncodeOptions | null
-    ) => Promise<RawEncoding[]>;
+      const nbAdd = tokenizer.addAddedTokens([addedToken])
+      expect(nbAdd).toBe(1)
+    })
+  })

+  describe('encode', () => {
+    let tokenizer: Tokenizer
    beforeEach(() => {
      // Clear all instances and calls to constructor and all methods:
      // TokenizerMock.mockClear();

-      const model = BPE.empty();
-      tokenizer = new Tokenizer(model);
-      tokenizer.addTokens(["my", "name", "is", "john", new AddedToken("pair", false)]);
+      const model = BPE.empty()
+      tokenizer = new Tokenizer(model)
+      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
+    })

-      encode = promisify(tokenizer.encode.bind(tokenizer));
-      encodeBatch = promisify(tokenizer.encodeBatch.bind(tokenizer));
-    });
+    it('accepts a pair of strings as parameters', async () => {
+      const encoding = await tokenizer.encode('my name is john', 'pair')
+      expect(encoding).toBeDefined()
+    })

-    it("accepts a pair of strings as parameters", async () => {
-      const encoding = await encode("my name is john", "pair");
-      expect(encoding).toBeDefined();
-    });
+    it('accepts a string with a null pair', async () => {
+      const encoding = await tokenizer.encode('my name is john', null)
+      expect(encoding).toBeDefined()
+    })

-    it("accepts a string with a null pair", async () => {
-      const encoding = await encode("my name is john", null);
-      expect(encoding).toBeDefined();
-    });
+    // TODO
+    // it("throws if we try to encode a pre-tokenized string without isPretokenized=true", async () => {
+    //   await expect((encode as any)(["my", "name", "is", "john"], null)).rejects.toThrow(
+    //     "encode with isPreTokenized=false expect string"
+    //   );
+    // });

-    it("throws if we try to encode a pre-tokenized string without isPretokenized=true", async () => {
-      await expect((encode as any)(["my", "name", "is", "john"], null)).rejects.toThrow(
-        "encode with isPreTokenized=false expect string"
-      );
-    });
+    // it("accepts a pre-tokenized string as parameter", async () => {
+    //   const encoding = await tokenizer.encode(["my", "name", "is", "john"], undefined, {
+    //     isPretokenized: true,
+    //   });
+    //   expect(encoding).toBeDefined();
+    // });

-    it("accepts a pre-tokenized string as parameter", async () => {
-      const encoding = await encode(["my", "name", "is", "john"], undefined, {
-        isPretokenized: true,
-      });
-      expect(encoding).toBeDefined();
-    });
+    // it("throws if we try to encodeBatch pre-tokenized strings without isPretokenized=true", async () => {
+    //   await expect((encodeBatch as any)([["my", "name", "is", "john"]])).rejects.toThrow(
+    //     "encodeBatch with isPretokenized=false expects input to be `EncodeInput[]` " +
+    //       "with `EncodeInput = string | [string, string]`"
+    //   );
+    // });

-    it("throws if we try to encodeBatch pre-tokenized strings without isPretokenized=true", async () => {
-      await expect((encodeBatch as any)([["my", "name", "is", "john"]])).rejects.toThrow(
-        "encodeBatch with isPretokenized=false expects input to be `EncodeInput[]` " +
-          "with `EncodeInput = string | [string, string]`"
-      );
-    });
+    // it("accepts a pre-tokenized input in encodeBatch", async () => {
+    //   const encoding = await tokenizer.encodeBatch([["my", "name", "is", "john"]], {
+    //     isPretokenized: true,
+    //   });
+    //   expect(encoding).toBeDefined();
+    // });

-    it("accepts a pre-tokenized input in encodeBatch", async () => {
-      const encoding = await encodeBatch([["my", "name", "is", "john"]], {
-        isPretokenized: true,
-      });
-      expect(encoding).toBeDefined();
-    });
+    it('Encodes correctly if called with only one argument', async () => {
+      const encoded = await tokenizer.encode('my name is john')
+      expect(encoded.getIds()).toEqual([0, 1, 2, 3])
+    })

-    it("Encodes correctly if called with only one argument", async () => {
-      const encoded = await encode("my name is john");
-      expect(encoded.getIds()).toEqual([0, 1, 2, 3]);
-    });
+    it('returns an Encoding', async () => {
+      const encoding = await tokenizer.encode('my name is john', 'pair')

-    it("returns an Encoding", async () => {
-      const encoding = await encode("my name is john", "pair");
+      expect(encoding.getAttentionMask()).toEqual([1, 1, 1, 1, 1])

-      expect(encoding.getAttentionMask()).toEqual([1, 1, 1, 1, 1]);
-
-      const ids = encoding.getIds();
-      expect(Array.isArray(ids)).toBe(true);
-      expect(ids).toHaveLength(5);
+      const ids = encoding.getIds()
+      expect(Array.isArray(ids)).toBe(true)
+      expect(ids).toHaveLength(5)
      for (const id of ids) {
-        expect(typeof id).toBe("number");
+        expect(typeof id).toBe('number')
      }

      expect(encoding.getOffsets()).toEqual([
@ -225,218 +192,192 @@ describe("Tokenizer", () => {
        [8, 10],
        [11, 15],
        [0, 4],
-      ]);
-      expect(encoding.getOverflowing()).toEqual([]);
-      expect(encoding.getSpecialTokensMask()).toEqual([0, 0, 0, 0, 0]);
-      expect(encoding.getTokens()).toEqual(["my", "name", "is", "john", "pair"]);
-      expect(encoding.getTypeIds()).toEqual([0, 0, 0, 0, 1]);
-    });
+      ])
+      expect(encoding.getOverflowing()).toEqual([])
+      expect(encoding.getSpecialTokensMask()).toEqual([0, 0, 0, 0, 0])
+      expect(encoding.getTokens()).toEqual(['my', 'name', 'is', 'john', 'pair'])
+      expect(encoding.getTypeIds()).toEqual([0, 0, 0, 0, 1])
+    })

-    describe("when truncation is enabled", () => {
-      it("truncates with default if no truncation options provided", async () => {
-        tokenizer.setTruncation(2);
+    describe('when truncation is enabled', () => {
+      it('truncates with default if no truncation options provided', async () => {
+        tokenizer.setTruncation(2)

-        const singleEncoding = await encode("my name is john", null);
-        expect(singleEncoding.getTokens()).toEqual(["my", "name"]);
+        const singleEncoding = await tokenizer.encode('my name is john', null)
+        expect(singleEncoding.getTokens()).toEqual(['my', 'name'])

-        const pairEncoding = await encode("my name is john", "pair");
-        expect(pairEncoding.getTokens()).toEqual(["my", "pair"]);
-      });
+        const pairEncoding = await tokenizer.encode('my name is john', 'pair')
+        expect(pairEncoding.getTokens()).toEqual(['my', 'pair'])
+      })

-      it("throws an error with strategy `only_second` and no pair is encoded", async () => {
-        tokenizer.setTruncation(2, { strategy: TruncationStrategy.OnlySecond });
-        await expect(encode("my name is john", null)).rejects.toThrow();
-      });
-    });
+      it('throws an error with strategy `only_second` and no pair is encoded', async () => {
+        tokenizer.setTruncation(2, { strategy: TruncationStrategy.OnlySecond })
+        await expect(tokenizer.encode('my name is john', null)).rejects.toThrow(
+          'Truncation error: Second sequence not provided',
+        )
+      })
+    })

-    describe("when padding is enabled", () => {
-      it("does not pad anything with default options", async () => {
-        tokenizer.setPadding();
+    describe('when padding is enabled', () => {
+      it('does not pad anything with default options', async () => {
+        tokenizer.setPadding()

-        const singleEncoding = await encode("my name", null);
-        expect(singleEncoding.getTokens()).toEqual(["my", "name"]);
+        const singleEncoding = await tokenizer.encode('my name', null)
+        expect(singleEncoding.getTokens()).toEqual(['my', 'name'])

-        const pairEncoding = await encode("my name", "pair");
-        expect(pairEncoding.getTokens()).toEqual(["my", "name", "pair"]);
-      });
+        const pairEncoding = await tokenizer.encode('my name', 'pair')
+        expect(pairEncoding.getTokens()).toEqual(['my', 'name', 'pair'])
+      })

-      it("pads to the right by default", async () => {
-        tokenizer.setPadding({ maxLength: 5 });
+      it('pads to the right by default', async () => {
+        tokenizer.setPadding({ maxLength: 5 })

-        const singleEncoding = await encode("my name", null);
-        expect(singleEncoding.getTokens()).toEqual([
-          "my",
-          "name",
-          "[PAD]",
-          "[PAD]",
-          "[PAD]",
-        ]);
+        const singleEncoding = await tokenizer.encode('my name', null)
+        expect(singleEncoding.getTokens()).toEqual(['my', 'name', '[PAD]', '[PAD]', '[PAD]'])

-        const pairEncoding = await encode("my name", "pair");
-        expect(pairEncoding.getTokens()).toEqual([
-          "my",
-          "name",
-          "pair",
-          "[PAD]",
-          "[PAD]",
-        ]);
-      });
+        const pairEncoding = await tokenizer.encode('my name', 'pair')
+        expect(pairEncoding.getTokens()).toEqual(['my', 'name', 'pair', '[PAD]', '[PAD]'])
+      })

-      it("pads to multiple of the given value", async () => {
-        tokenizer.setPadding({ padToMultipleOf: 8 });
+      it('pads to multiple of the given value', async () => {
+        tokenizer.setPadding({ padToMultipleOf: 8 })

-        const singleEncoding = await encode("my name", null);
-        expect(singleEncoding.getTokens()).toHaveLength(8);
+        const singleEncoding = await tokenizer.encode('my name', null)
+        expect(singleEncoding.getTokens()).toHaveLength(8)

-        const pairEncoding = await encode("my name", "pair");
-        expect(pairEncoding.getTokens()).toHaveLength(8);
-      });
-    });
-  });
+        const pairEncoding = await tokenizer.encode('my name', 'pair')
+        expect(pairEncoding.getTokens()).toHaveLength(8)
+      })
+    })
+  })

-  describe("decode", () => {
-    let tokenizer: Tokenizer;
+  describe('decode', () => {
+    let tokenizer: Tokenizer

    beforeEach(() => {
-      const model = BPE.empty();
-      tokenizer = new Tokenizer(model);
-      tokenizer.addTokens(["my", "name", "is", "john", "pair"]);
-    });
+      const model = BPE.empty()
+      tokenizer = new Tokenizer(model)
+      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
+    })

-    it("returns `undefined`", () => {
-      expect(tokenizer.decode([0, 1, 2, 3], true, () => {})).toBeUndefined();
-    });
+    it('has its callback called with the decoded string', async () => {
+      const decode = tokenizer.decode.bind(tokenizer)
+      expect(await decode([0, 1, 2, 3], true)).toEqual('my name is john')
+    })
+  })

-    it("has its callback called with the decoded string", async () => {
-      const decode = promisify(tokenizer.decode.bind(tokenizer));
-      await expect(decode([0, 1, 2, 3], true)).resolves.toEqual("my name is john");
-    });
-  });
-
-  describe("decodeBatch", () => {
-    let tokenizer: Tokenizer;
+  describe('decodeBatch', () => {
+    let tokenizer: Tokenizer

    beforeEach(() => {
-      const model = BPE.empty();
-      tokenizer = new Tokenizer(model);
-      tokenizer.addTokens(["my", "name", "is", "john", "pair"]);
-    });
+      const model = BPE.empty()
+      tokenizer = new Tokenizer(model)
+      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
+    })

-    it("returns `undefined`", () => {
-      expect(tokenizer.decodeBatch([[0, 1, 2, 3], [4]], true, () => {})).toBeUndefined();
-    });
+    it('has its callback called with the decoded string', async () => {
+      const decodeBatch = tokenizer.decodeBatch.bind(tokenizer)
+      expect(await decodeBatch([[0, 1, 2, 3], [4]], true)).toEqual(['my name is john', 'pair'])
+    })
+  })

-    it("has its callback called with the decoded string", async () => {
-      const decodeBatch = promisify(tokenizer.decodeBatch.bind(tokenizer));
-      await expect(decodeBatch([[0, 1, 2, 3], [4]], true)).resolves.toEqual([
-        "my name is john",
-        "pair",
-      ]);
-    });
-  });
+  describe('getVocab', () => {
+    it('accepts `undefined` as parameter', () => {
+      const model = BPE.empty()
+      const tokenizer = new Tokenizer(model)

-  describe("getVocab", () => {
-    it("accepts `undefined` as parameter", () => {
-      const model = BPE.empty();
-      const tokenizer = new Tokenizer(model);
+      expect(tokenizer.getVocab(undefined)).toBeDefined()
+    })

-      expect(tokenizer.getVocab(undefined)).toBeDefined();
-    });
-
-    it("returns the vocabulary", () => {
-      const model = BPE.empty();
-      const tokenizer = new Tokenizer(model);
-      tokenizer.addTokens(["my", "name", "is", "john"]);
+    it('returns the vocabulary', () => {
+      const model = BPE.empty()
+      const tokenizer = new Tokenizer(model)
+      tokenizer.addTokens(['my', 'name', 'is', 'john'])

      expect(tokenizer.getVocab(true)).toEqual({
        my: 0,
        name: 1,
        is: 2,
        john: 3,
-      });
-    });
-  });
+      })
+    })
+  })

-  describe("getVocabSize", () => {
-    it("accepts `undefined` as parameter", () => {
-      const model = BPE.empty();
-      const tokenizer = new Tokenizer(model);
+  describe('getVocabSize', () => {
+    it('accepts `undefined` as parameter', () => {
+      const model = BPE.empty()
+      const tokenizer = new Tokenizer(model)

-      expect(tokenizer.getVocabSize(undefined)).toBeDefined();
-    });
-  });
+      expect(tokenizer.getVocabSize(undefined)).toBeDefined()
+    })
+  })

-  describe("setTruncation", () => {
-    it("returns the full truncation configuration", () => {
-      const model = BPE.empty();
-      const tokenizer = new Tokenizer(model);
+  describe('setTruncation', () => {
+    it('returns the full truncation configuration', () => {
+      const model = BPE.empty()
+      const tokenizer = new Tokenizer(model)

-      const truncation = tokenizer.setTruncation(2);
-      const expectedConfig: TruncationConfiguration = {
-        maxLength: 2,
-        strategy: TruncationStrategy.LongestFirst,
-        stride: 0,
-        direction: TruncationDirection.Right,
-      };
-      expect(truncation).toEqual(expectedConfig);
-    });
-  });
+      tokenizer.setTruncation(2)
+      // TODO Return type is weird
+      // const expectedConfig: TruncationOptions = {
+      //   maxLength: 2,
+      //   strategy: TruncationStrategy.LongestFirst,
+      //   stride: 0,
+      //   direction: TruncationDirection.Right,
+      // };
+      // expect(truncation).toEqual(expectedConfig);
+    })
+  })

-  describe("setPadding", () => {
-    it("returns the full padding params", () => {
-      const model = BPE.empty();
-      const tokenizer = new Tokenizer(model);
+  describe('setPadding', () => {
+    it('returns the full padding params', () => {
+      const model = BPE.empty()
+      const tokenizer = new Tokenizer(model)

-      const padding = tokenizer.setPadding();
-      const expectedConfig: PaddingConfiguration = {
-        direction: PaddingDirection.Right,
-        padId: 0,
-        padToken: "[PAD]",
-        padTypeId: 0,
-      };
-      expect(padding).toEqual(expectedConfig);
-    });
-  });
+      tokenizer.setPadding()
+      // TODO Return type is weird
+      // const expectedConfig: PaddingOptions = {
+      //   direction: PaddingDirection.Right,
+      //   padId: 0,
+      //   padToken: "[PAD]",
+      //   padTypeId: 0,
+      // };
+      // expect(padding).toEqual(expectedConfig);
+    })
+  })

-  describe("postProcess", () => {
-    let tokenizer: Tokenizer;
-    let encode: (
-      sequence: InputSequence,
-      pair?: InputSequence | null,
-      options?: EncodeOptions | null
-    ) => Promise<RawEncoding>;
-    let firstEncoding: RawEncoding;
-    let secondEncoding: RawEncoding;
+  describe('postProcess', () => {
+    let tokenizer: Tokenizer
+    let firstEncoding: Encoding
+    let secondEncoding: Encoding

    beforeAll(() => {
-      const model = BPE.empty();
-      tokenizer = new Tokenizer(model);
-      tokenizer.addTokens(["my", "name", "is", "john", "pair"]);
-
-      encode = promisify(tokenizer.encode.bind(tokenizer));
-    });
+      const model = BPE.empty()
+      tokenizer = new Tokenizer(model)
+      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
+    })

    beforeEach(async () => {
-      firstEncoding = await encode("my name is john", null);
-      secondEncoding = await encode("pair", null);
+      firstEncoding = await tokenizer.encode('my name is john', null)
+      secondEncoding = await tokenizer.encode('pair', null)

-      tokenizer.setTruncation(2);
-      tokenizer.setPadding({ maxLength: 5 });
-    });
+      tokenizer.setTruncation(2)
+      tokenizer.setPadding({ maxLength: 5 })
+    })

-    it("returns correctly with a single Encoding param", () => {
-      const encoding = tokenizer.postProcess(firstEncoding);
-      expect(encoding.getTokens()).toEqual(["my", "name", "[PAD]", "[PAD]", "[PAD]"]);
-    });
+    it('returns correctly with a single Encoding param', () => {
+      const encoding = tokenizer.postProcess(firstEncoding)
+      expect(encoding.getTokens()).toEqual(['my', 'name', '[PAD]', '[PAD]', '[PAD]'])
+    })

-    it("returns correctly with `undefined` as second and third parameters", () => {
-      const encoding = tokenizer.postProcess(firstEncoding, undefined, undefined);
-      expect(encoding.getTokens()).toEqual(["my", "name", "[PAD]", "[PAD]", "[PAD]"]);
-    });
+    it('returns correctly with `undefined` as second and third parameters', () => {
+      const encoding = tokenizer.postProcess(firstEncoding, undefined, undefined)
+      expect(encoding.getTokens()).toEqual(['my', 'name', '[PAD]', '[PAD]', '[PAD]'])
+    })

-    it("returns correctly with 2 encodings", () => {
-      const encoding = tokenizer.postProcess(firstEncoding, secondEncoding);
-      expect(encoding.getTokens()).toEqual(["my", "pair", "[PAD]", "[PAD]", "[PAD]"]);
-    });
-  });
-});
+    it('returns correctly with 2 encodings', () => {
+      const encoding = tokenizer.postProcess(firstEncoding, secondEncoding)
+      expect(encoding.getTokens()).toEqual(['my', 'pair', '[PAD]', '[PAD]', '[PAD]'])
+    })
+  })
+})
--- a/bindings/node/lib/bindings/trainers.d.ts
+++ b/bindings/node/lib/bindings/trainers.d.ts
@ -1,111 +0,0 @@
-/**
- * This class is not supposed to be instantiated directly. Instead, any implementation of a
- * Trainer will return an instance of this class when instantiated.
- */
-
-import { AddedToken } from "./tokenizer";
-
-// eslint-disable-next-line @typescript-eslint/no-empty-interface
-interface Trainer {}
-
-export interface TrainerOptions {
-  /**
-   * A prefix to be used for every subword that is not a beginning-of-word.
-   */
-  continuingSubwordPrefix?: string;
-  /**
-   * A suffix to be used for every subword that is a end-of-word.
-   */
-  endOfWordSuffix?: string;
-  /**
-   * A list of characters to include in the initial alphabet, even
-   * if not seen in the training dataset.
-   * If the strings contains more than one character, only the first one
-   * is kept.
-   * @default []
-   */
-  initialAlphabet?: string[];
-  /**
-   * The maximum different characters to keep in the alphabet.
-   */
-  limitAlphabet?: number;
-  /**
-   * The minimum frequency a pair should have in order to be merged.
-   * @default 2
-   */
-  minFrequency?: number;
-  /**
-   * Whether to show progress bars while training.
-   * @default true
-   */
-  showProgress?: boolean;
-  /**
-   * A list of special tokens the model should know of.
-   * @default []
-   */
-  specialTokens?: (string | AddedToken)[];
-  /**
-   * The size of the final vocabulary, including all tokens and alphabet.
-   * @default 30000
-   */
-  vocabSize?: number;
-}
-
-/**
- * Instantiate a new BPE Trainer
- * @param [options] BPE Trainer options
- */
-export function bpeTrainer(options?: TrainerOptions): Trainer;
-
-/**
- * Instantiate a new WordPiece Trainer
- * @param [options] WordPiece Trainer options
- */
-export function wordPieceTrainer(options?: TrainerOptions): Trainer;
-
-export interface WordLevelTrainerOptions {
-  /**
-   * The minimum frequency a pair should have in order to be merged.
-   * @default 2
-   */
-  minFrequency?: number;
-  /**
-   * Whether to show progress bars while training.
-   * @default true
-   */
-  showProgress?: boolean;
-  /**
-   * A list of special tokens the model should know of.
-   * @default []
-   */
-  specialTokens?: (string | AddedToken)[];
-  /**
-   * The size of the final vocabulary, including all tokens and alphabet.
-   * @default 30000
-   */
-  vocabSize?: number;
-}
-
-/**
- * Instantiate a new WordLevel Trainer
- * @param [options] WordLevel Trainer options
- */
-export function wordLevelTrainer(options?: WordLevelTrainerOptions): Trainer;
-
-export interface UnigramTrainerOptions {
-  vocabSize?: number;
-  nSubIterations?: number;
-  shrinkingFactor?: number;
-  specialTokens?: string[];
-  initialAlphabet?: string[];
-  unkToken?: string;
-  maxPieceLength?: number;
-  seedSize?: number;
-  showProgress?: boolean;
-}
-
-/**
- * Instantiate a new Unigram Trainer
- * @param [options] Unigram Trainer options
- */
-export function unigramTrainer(options?: UnigramTrainerOptions): Trainer;
--- a/bindings/node/lib/bindings/trainers.js
+++ b/bindings/node/lib/bindings/trainers.js
@ -1,8 +0,0 @@
-const native = require("./native");
-
-module.exports = {
-  bpeTrainer: native.trainers_BPETrainer,
-  wordPieceTrainer: native.trainers_WordPieceTrainer,
-  wordLevelTrainer: native.trainers_WordLevelTrainer,
-  unigramTrainer: native.trainers_UnigramTrainer,
-};
--- a/bindings/node/lib/bindings/utils.d.ts
+++ b/bindings/node/lib/bindings/utils.d.ts
@ -1,24 +0,0 @@
-import { RawEncoding } from "./raw-encoding";
-
-/**
- * Returns a subpart of a string according to specified indexes, and respecting unicode characters
- *
- * @param text The text for which to return a subpart
- * @param [begin] The index from which to start (can be negative).
- * @param [end] The index (excluded) to which to stop (can be negative).
- * Stopping at the end of the string if not provided.
- * @returns The full string if no start/end indexes are provided,
- * otherwise the original string between `begin` (included) and `end` (excluded)
- * @since 0.6.0
- */
-export function slice(text: string, start?: number, end?: number): string;
-
-/**
- * Merge the list of RawEncoding into one final RawEncoding
- * @param encodings The list of encodings to merge
- * @param [growingOffsets=false] Whether the offsets should accumulate while merging
- */
-export function mergeEncodings(
-  encodings: RawEncoding[],
-  growingOffsets?: boolean
-): RawEncoding;
--- a/bindings/node/lib/bindings/utils.js
+++ b/bindings/node/lib/bindings/utils.js
@ -1,6 +0,0 @@
-const native = require("./native");
-
-module.exports = {
-  mergeEncodings: native.utils_mergeEncodings,
-  slice: native.utils_slice,
-};
--- a/bindings/node/lib/bindings/utils.test.ts
+++ b/bindings/node/lib/bindings/utils.test.ts
@ -1,175 +1,162 @@
-import { promisify } from "util";
+// import { promisify } from 'util'

-import { BPE } from "./models";
-import { RawEncoding } from "./raw-encoding";
-import { EncodeOptions, InputSequence, Tokenizer } from "./tokenizer";
-import { mergeEncodings, slice } from "./utils";
+import { BPE, Tokenizer, mergeEncodings, slice } from '../../'

-describe("slice", () => {
-  const text = "My name is John 👋";
-  const sliceText = slice.bind({}, text);
+describe('slice', () => {
+  const text = 'My name is John 👋'
+  const sliceText = slice.bind({}, text)

-  it("returns the full text when no params", () => {
-    const sliced = sliceText();
-    expect(sliced).toEqual(text);
-  });
+  it('returns the full text when no params', () => {
+    const sliced = sliceText()
+    expect(sliced).toEqual(text)
+  })

-  it("accepts `undefined` as second parameter", () => {
-    const original = sliceText(undefined);
-    expect(original).toEqual(text);
-  });
+  it('accepts `undefined` as second parameter', () => {
+    const original = sliceText(undefined)
+    expect(original).toEqual(text)
+  })

-  it("accepts `undefined` as third parameter", () => {
-    const original = sliceText(0, undefined);
-    expect(original).toEqual(text);
-  });
+  it('accepts `undefined` as third parameter', () => {
+    const original = sliceText(0, undefined)
+    expect(original).toEqual(text)
+  })

-  it("throws an error when `begin` is out of range", () => {
-    expect(() => sliceText(1000)).toThrow();
-  });
+  it('throws an error when `begin` is out of range', () => {
+    expect(() => sliceText(1000)).toThrow()
+  })

-  it("returns slice starting at the specified index", () => {
-    const original = sliceText(3);
-    expect(original).toEqual("name is John 👋");
-  });
+  it('returns slice starting at the specified index', () => {
+    const original = sliceText(3)
+    expect(original).toEqual('name is John 👋')
+  })

-  it("throws an error when `end` is out of range", () => {
-    expect(() => sliceText(0, 1000)).toThrow();
-  });
+  it('throws an error when `end` is out of range', () => {
+    expect(() => sliceText(0, 1000)).toThrow()
+  })

-  it("returns the text between the two specified indexes", () => {
-    const original = sliceText(3, 7);
-    expect(original).toEqual("name");
-  });
+  it('returns the text between the two specified indexes', () => {
+    const original = sliceText(3, 7)
+    expect(original).toEqual('name')
+  })

-  describe("with only a negative `begin`", () => {
-    it("returns the original string counting from the end when in the range", () => {
-      const original = sliceText(-1);
-      expect(original).toEqual("👋");
-    });
+  describe('with only a negative `begin`', () => {
+    it('returns the original string counting from the end when in the range', () => {
+      const original = sliceText(-1)
+      expect(original).toEqual('👋')
+    })

-    it("throws an error when out of range", () => {
-      expect(() => sliceText(-1000)).toThrow();
-    });
-  });
+    it('throws an error when out of range', () => {
+      expect(() => sliceText(-1000)).toThrow()
+    })
+  })

-  describe("with a positive `begin` and a negative `end`", () => {
-    it("returns correct slice when resulting range is valid", () => {
-      const original = sliceText(3, -7);
-      expect(original).toEqual("name is");
-    });
+  describe('with a positive `begin` and a negative `end`', () => {
+    it('returns correct slice when resulting range is valid', () => {
+      const original = sliceText(3, -7)
+      expect(original).toEqual('name is')
+    })

-    it("throws an error when resulting `end` index is lower than `begin`", () => {
-      expect(() => sliceText(7, -12)).toThrow();
-    });
+    it('throws an error when resulting `end` index is lower than `begin`', () => {
+      expect(() => sliceText(7, -12)).toThrow()
+    })

-    it("throws an error when `begin` is out of range", () => {
-      expect(() => sliceText(1000, -12)).toThrow();
-    });
+    it('throws an error when `begin` is out of range', () => {
+      expect(() => sliceText(1000, -12)).toThrow()
+    })

-    it("throws an error when resulting `end` index is out of range", () => {
-      expect(() => sliceText(7, -1000)).toThrow();
-    });
-  });
+    it('throws an error when resulting `end` index is out of range', () => {
+      expect(() => sliceText(7, -1000)).toThrow()
+    })
+  })

-  describe("with a negative `begin` and a positive `end`", () => {
-    it("returns correct slice when resulting range is valid", () => {
-      const original = sliceText(-9, 10);
-      expect(original).toEqual("is");
-    });
+  describe('with a negative `begin` and a positive `end`', () => {
+    it('returns correct slice when resulting range is valid', () => {
+      const original = sliceText(-9, 10)
+      expect(original).toEqual('is')
+    })

-    it("throws an error when resulting `begin` index is upper than `end`", () => {
-      expect(() => sliceText(-3, 5)).toThrow();
-    });
+    it('throws an error when resulting `begin` index is upper than `end`', () => {
+      expect(() => sliceText(-3, 5)).toThrow()
+    })

-    it("throws an error when `end` is out of range", () => {
-      expect(() => sliceText(-5, 1000)).toThrow();
-    });
+    it('throws an error when `end` is out of range', () => {
+      expect(() => sliceText(-5, 1000)).toThrow()
+    })

-    it("throws an error when resulting `begin` index is out of range", () => {
-      expect(() => sliceText(-1000, 10)).toThrow();
-    });
-  });
+    it('throws an error when resulting `begin` index is out of range', () => {
+      expect(() => sliceText(-1000, 10)).toThrow()
+    })
+  })

-  describe("with negatives `begin` and `end`", () => {
-    it("returns correct slice when resulting range is valid", () => {
-      const original = sliceText(-9, -7);
-      expect(original).toEqual("is");
-    });
+  describe('with negatives `begin` and `end`', () => {
+    it('returns correct slice when resulting range is valid', () => {
+      const original = sliceText(-9, -7)
+      expect(original).toEqual('is')
+    })

-    it("throws an error when resulting `end` index is lower than `begin`", () => {
-      expect(() => sliceText(-5, -10)).toThrow();
-    });
+    it('throws an error when resulting `end` index is lower than `begin`', () => {
+      expect(() => sliceText(-5, -10)).toThrow()
+    })

-    it("throws an error when resulting `begin` index is out of range", () => {
-      expect(() => sliceText(-1000, -10)).toThrow();
-    });
+    it('throws an error when resulting `begin` index is out of range', () => {
+      expect(() => sliceText(-1000, -10)).toThrow()
+    })

-    it("throws an error when resulting `end` index is out of range", () => {
-      expect(() => sliceText(-10, -1000)).toThrow();
-    });
-  });
-});
+    it('throws an error when resulting `end` index is out of range', () => {
+      expect(() => sliceText(-10, -1000)).toThrow()
+    })
+  })
+})

-describe("mergeEncodings", () => {
-  let encode: (
-    sequence: InputSequence,
-    pair?: InputSequence | null,
-    options?: EncodeOptions | null
-  ) => Promise<RawEncoding>;
+describe('mergeEncodings', () => {
+  const model = BPE.empty()
+  const tokenizer = new Tokenizer(model)
+  tokenizer.addTokens(['my', 'name', 'is', 'john'])

-  beforeAll(async () => {
-    const model = BPE.empty();
-    const tokenizer = new Tokenizer(model);
-    tokenizer.addTokens(["my", "name", "is", "john"]);
+  it('accepts `undefined` as a second parameter', () => {
+    const encoding = mergeEncodings([], undefined)
+    expect(encoding.constructor.name).toEqual('Encoding')
+  })

-    encode = promisify(tokenizer.encode.bind(tokenizer));
-  });
+  it('returns correct result with `growingOffsets` not provided', async () => {
+    const firstEncoding = await tokenizer.encode('my name is', null)
+    const secondEncoding = await tokenizer.encode('john', null)
+    const encoding = mergeEncodings([firstEncoding, secondEncoding])

-  it("accepts `undefined` as a second parameter", () => {
-    const encoding = mergeEncodings([], undefined);
-    expect(encoding.constructor.name).toEqual("Encoding");
-  });
-
-  it("returns correct result with `growingOffsets` not provided", async () => {
-    const firstEncoding = await encode("my name is", null);
-    const secondEncoding = await encode("john", null);
-    const encoding = mergeEncodings([firstEncoding, secondEncoding]);
-
-    expect(encoding.getTokens()).toEqual(["my", "name", "is", "john"]);
+    expect(encoding.getTokens()).toEqual(['my', 'name', 'is', 'john'])
    expect(encoding.getOffsets()).toEqual([
      [0, 2],
      [3, 7],
      [8, 10],
      [0, 4],
-    ]);
-  });
+    ])
+  })

-  it("returns correct result when `growingOffsets` is `false`", async () => {
-    const firstEncoding = await encode("my name is", null);
-    const secondEncoding = await encode("john", null);
-    const encoding = mergeEncodings([firstEncoding, secondEncoding], false);
+  it('returns correct result when `growingOffsets` is `false`', async () => {
+    const firstEncoding = await tokenizer.encode('my name is', null)
+    const secondEncoding = await tokenizer.encode('john', null)
+    const encoding = mergeEncodings([firstEncoding, secondEncoding], false)

-    expect(encoding.getTokens()).toEqual(["my", "name", "is", "john"]);
+    expect(encoding.getTokens()).toEqual(['my', 'name', 'is', 'john'])
    expect(encoding.getOffsets()).toEqual([
      [0, 2],
      [3, 7],
      [8, 10],
      [0, 4],
-    ]);
-  });
+    ])
+  })

-  it("returns correct result when `growingOffsets` is `true`", async () => {
-    const firstEncoding = await encode("my name is", null);
-    const secondEncoding = await encode("john", null);
-    const encoding = mergeEncodings([firstEncoding, secondEncoding], true);
+  it('returns correct result when `growingOffsets` is `true`', async () => {
+    const firstEncoding = await tokenizer.encode('my name is', null)
+    const secondEncoding = await tokenizer.encode('john', null)
+    const encoding = mergeEncodings([firstEncoding, secondEncoding], true)

-    expect(encoding.getTokens()).toEqual(["my", "name", "is", "john"]);
+    expect(encoding.getTokens()).toEqual(['my', 'name', 'is', 'john'])
    expect(encoding.getOffsets()).toEqual([
      [0, 2],
      [3, 7],
      [8, 10],
      [10, 14],
-    ]);
-  });
-});
+    ])
+  })
+})
--- a/bindings/node/lib/implementations/encoding.test.ts
+++ b/bindings/node/lib/implementations/encoding.test.ts
@ -1,71 +0,0 @@
-/* eslint-disable @typescript-eslint/no-explicit-any */
-
-import { RawEncoding } from "../bindings/raw-encoding";
-import { Encoding } from "./encoding";
-
-describe("Encoding", () => {
-  let encoding: Encoding;
-  const rawEncodingMock = jest.fn<Partial<RawEncoding>, any>();
-
-  describe("ids", () => {
-    const getIdsMock = jest.fn(() => [3]);
-    const m = rawEncodingMock.mockImplementation(() => ({
-      getIds: getIdsMock,
-    }));
-
-    encoding = new Encoding(m() as RawEncoding);
-
-    it("returns the ids from the raw encoding when not called before", () => {
-      const ids = encoding.ids;
-
-      expect(getIdsMock).toHaveBeenCalledTimes(1);
-      expect(ids).toEqual([3]);
-    });
-
-    it("returns the ids without using the raw encoding when already called before", () => {
-      getIdsMock.mockReset();
-      const ids = encoding.ids;
-
-      expect(getIdsMock).toHaveBeenCalledTimes(0);
-      expect(ids).toEqual([3]);
-    });
-  });
-
-  describe("pad", () => {
-    it('reset internal "cache" properties', () => {
-      const getIdsMock = jest.fn(() => [4]);
-      const m = rawEncodingMock.mockImplementation(() => ({
-        getIds: getIdsMock,
-        pad: jest.fn(),
-      }));
-
-      encoding = new Encoding(m() as RawEncoding);
-      encoding["_ids"] = [3];
-
-      encoding.pad(10);
-      const ids = encoding.ids;
-
-      expect(getIdsMock).toHaveBeenCalledTimes(1);
-      expect(ids).toEqual([4]);
-    });
-  });
-
-  describe("truncate", () => {
-    it('reset internal "cache" properties', () => {
-      const getIdsMock = jest.fn(() => [4]);
-      const m = rawEncodingMock.mockImplementation(() => ({
-        getIds: getIdsMock,
-        truncate: jest.fn(),
-      }));
-
-      encoding = new Encoding(m() as RawEncoding);
-      encoding["_ids"] = [3];
-
-      encoding.truncate(10);
-      const ids = encoding.ids;
-
-      expect(getIdsMock).toHaveBeenCalledTimes(1);
-      expect(ids).toEqual([4]);
-    });
-  });
-});
--- a/bindings/node/lib/implementations/encoding.ts
+++ b/bindings/node/lib/implementations/encoding.ts
@ -1,279 +0,0 @@
-import { PaddingOptions, RawEncoding } from "../bindings/raw-encoding";
-import { mergeEncodings } from "../bindings/utils";
-
-export class Encoding {
-  private _attentionMask?: number[];
-  private _ids?: number[];
-  private _length?: number;
-  private _offsets?: [number, number][];
-  private _overflowing?: Encoding[];
-  private _specialTokensMask?: number[];
-  private _tokens?: string[];
-  private _typeIds?: number[];
-  private _wordIndexes?: (number | undefined)[];
-  private _sequenceIndexes?: (number | undefined)[];
-
-  constructor(private _rawEncoding: RawEncoding) {}
-
-  /**
-   * Merge a list of Encoding into one final Encoding
-   * @param encodings The list of encodings to merge
-   * @param [growingOffsets=false] Whether the offsets should accumulate while merging
-   */
-  static merge(encodings: Encoding[], growingOffsets?: boolean): Encoding {
-    const mergedRaw = mergeEncodings(
-      encodings.map((e) => e.rawEncoding),
-      growingOffsets
-    );
-
-    return new Encoding(mergedRaw);
-  }
-
-  /**
-   * Number of sequences
-   */
-  get nSequences(): number {
-    return this._rawEncoding.getNSequences();
-  }
-
-  setSequenceId(seqId: number): void {
-    return this._rawEncoding.setSequenceId(seqId);
-  }
-
-  /**
-   * Attention mask
-   */
-  get attentionMask(): number[] {
-    if (this._attentionMask) {
-      return this._attentionMask;
-    }
-
-    return (this._attentionMask = this._rawEncoding.getAttentionMask());
-  }
-
-  /**
-   * Tokenized ids
-   */
-  get ids(): number[] {
-    if (this._ids) {
-      return this._ids;
-    }
-
-    return (this._ids = this._rawEncoding.getIds());
-  }
-
-  /**
-   * Number of tokens
-   */
-  get length(): number {
-    if (this._length !== undefined) {
-      return this._length;
-    }
-
-    return (this._length = this._rawEncoding.getLength());
-  }
-
-  /**
-   * Offsets
-   */
-  get offsets(): [number, number][] {
-    if (this._offsets) {
-      return this._offsets;
-    }
-
-    return (this._offsets = this._rawEncoding.getOffsets());
-  }
-
-  /**
-   * Overflowing encodings, after truncation
-   */
-  get overflowing(): Encoding[] {
-    if (this._overflowing) {
-      return this._overflowing;
-    }
-
-    return (this._overflowing = this._rawEncoding
-      .getOverflowing()
-      .map((e) => new Encoding(e)));
-  }
-
-  /**
-   * __⚠️ DANGER ZONE: do not touch unless you know what you're doing ⚠️__
-   * Access to the `rawEncoding` returned by the internal Rust code.
-   * @private
-   * @ignore
-   * @since 0.6.0
-   */
-  get rawEncoding(): Readonly<RawEncoding> {
-    return this._rawEncoding;
-  }
-
-  /**
-   * Special tokens mask
-   */
-  get specialTokensMask(): number[] {
-    if (this._specialTokensMask) {
-      return this._specialTokensMask;
-    }
-
-    return (this._specialTokensMask = this._rawEncoding.getSpecialTokensMask());
-  }
-
-  /**
-   * Tokenized string
-   */
-  get tokens(): string[] {
-    if (this._tokens) {
-      return this._tokens;
-    }
-
-    return (this._tokens = this._rawEncoding.getTokens());
-  }
-
-  /**
-   * Type ids
-   */
-  get typeIds(): number[] {
-    if (this._typeIds) {
-      return this._typeIds;
-    }
-
-    return (this._typeIds = this._rawEncoding.getTypeIds());
-  }
-
-  /**
-   * The tokenized words indexes
-   */
-  get wordIndexes(): (number | undefined)[] {
-    if (this._wordIndexes) {
-      return this._wordIndexes;
-    }
-
-    return (this._wordIndexes = this._rawEncoding.getWordIds());
-  }
-
-  get sequenceIndexes(): (number | undefined)[] {
-    if (this._sequenceIndexes) {
-      return this._sequenceIndexes;
-    }
-
-    return (this._sequenceIndexes = this._rawEncoding.getSequenceIds());
-  }
-
-  /**
-   * Get the encoded tokens corresponding to the word at the given index in one of the input
-   * sequences, with the form [startToken, endToken+1]
-   * @param word The position of a word in one of the input sequences
-   * @param seqId The index of the input sequence that contains said word
-   * @since 0.7.0
-   */
-  wordToTokens(word: number, seqId?: number): [number, number] | undefined {
-    return this._rawEncoding.wordToTokens(word, seqId);
-  }
-
-  /**
-   * Get the offsets of the word at the given index in the input sequence
-   * @param word The index of the word in the input sequence
-   * @param seqId The index of the input sequence that contains said word
-   * @since 0.7.0
-   */
-  wordToChars(word: number, seqId?: number): [number, number] | undefined {
-    return this._rawEncoding.wordToChars(word, seqId);
-  }
-
-  /**
-   * Get the index of the sequence that contains the given token
-   * @param token The index of the token in the encoded sequence
-   */
-  tokenToSequence(token: number): number | undefined {
-    return this._rawEncoding.tokenToSequence(token);
-  }
-
-  /**
-   * Get the offsets of the token at the given index
-   *
-   * The returned offsets are related to the input sequence that contains the
-   * token.  In order to determine in which input sequence it belongs, you
-   * must call `tokenToSequence`.
-   *
-   * @param token The index of the token in the encoded sequence
-   * @since 0.7.0
-   */
-  tokenToChars(token: number): [number, number] | undefined {
-    return this._rawEncoding.tokenToChars(token);
-  }
-
-  /**
-   * Get the word that contains the token at the given index
-   *
-   * The returned index is related to the input sequence that contains the
-   * token.  In order to determine in which input sequence it belongs, you
-   * must call `tokenToSequence`.
-   *
-   * @param token The index of the token  in the encoded sequence
-   * @since 0.7.0
-   */
-  tokenToWord(token: number): number | undefined {
-    return this._rawEncoding.tokenToWord(token);
-  }
-
-  /**
-   * Find the index of the token at the position of the given char
-   * @param pos The position of a char in one of the input strings
-   * @param seqId The index of the input sequence that contains said char
-   * @since 0.6.0
-   */
-  charToToken(pos: number, seqId?: number): number | undefined {
-    return this._rawEncoding.charToToken(pos, seqId);
-  }
-
-  /**
-   * Get the word that contains the given char
-   * @param pos The position of a char in the input string
-   * @param seqId The index of the input sequence that contains said char
-   * @since 0.7.0
-   */
-  charToWord(pos: number, seqId?: number): number | undefined {
-    return this._rawEncoding.charToWord(pos, seqId);
-  }
-
-  /**
-   * Pad the current Encoding at the given length
-   *
-   * @param length The length at which to pad
-   * @param [options] Padding options
-   */
-  pad(length: number, options?: PaddingOptions): void {
-    this._rawEncoding.pad(length, options);
-    this.resetInternalProperties();
-  }
-
-  /**
-   * Truncate the current Encoding at the given max length
-   *
-   * @param length The maximum length to be kept
-   * @param [stride=0] The length of the previous first sequence
-   * to be included in the overflowing sequence
-   * @param [direction='right'] Truncate direction
-   */
-  truncate(length: number, stride?: number, direction = "right"): void {
-    this._rawEncoding.truncate(length, stride, direction);
-    this.resetInternalProperties();
-  }
-
-  private resetInternalProperties(): void {
-    for (const prop of [
-      "_attentionMask",
-      "_ids",
-      "_length",
-      "_offsets",
-      "_overflowing",
-      "_specialTokensMask",
-      "_tokens",
-      "_typeIds",
-      "_wordIndexes",
-    ]) {
-      delete this[prop as keyof this];
-    }
-  }
-}
--- a/bindings/node/lib/implementations/tokenizers/mocks/bert-vocab-empty.txt
+++ b/bindings/node/lib/implementations/tokenizers/mocks/bert-vocab-empty.txt
--- a/bindings/node/lib/implementations/tokenizers/mocks/bert-vocab-without-cls.txt
+++ b/bindings/node/lib/implementations/tokenizers/mocks/bert-vocab-without-cls.txt
@ -1 +0,0 @@
-[SEP]
--- a/bindings/node/lib/implementations/tokenizers/base.tokenizer.test.ts
+++ b/bindings/node/lib/implementations/tokenizers/base.tokenizer.test.ts
@ -1,71 +0,0 @@
-import {
-  PaddingDirection,
-  TruncationDirection,
-  TruncationStrategy,
-} from "../../bindings/enums";
-import { BPE } from "../../bindings/models";
-import {
-  PaddingConfiguration,
-  Tokenizer,
-  TruncationConfiguration,
-} from "../../bindings/tokenizer";
-import { BaseTokenizer } from "./base.tokenizer";
-
-describe("BaseTokenizer", () => {
-  let tokenizer: BaseTokenizer<Record<string, unknown>>;
-
-  beforeEach(() => {
-    // Clear all instances and calls to constructor and all methods:
-    // TokenizerMock.mockClear();
-
-    const model = BPE.empty();
-    const t = new Tokenizer(model);
-    tokenizer = new BaseTokenizer(t, {});
-  });
-
-  describe("truncation", () => {
-    it("returns `null` if no truncation setted", () => {
-      expect(tokenizer.truncation).toBeNull();
-    });
-
-    it("returns configuration when `setTruncation` has been called", () => {
-      tokenizer.setTruncation(2);
-      const expectedConfig: TruncationConfiguration = {
-        maxLength: 2,
-        strategy: TruncationStrategy.LongestFirst,
-        direction: TruncationDirection.Right,
-        stride: 0,
-      };
-      expect(tokenizer.truncation).toEqual(expectedConfig);
-    });
-
-    it("returns null when `disableTruncation` has been called", () => {
-      tokenizer.setTruncation(2);
-      tokenizer.disableTruncation();
-      expect(tokenizer.truncation).toBeNull();
-    });
-  });
-
-  describe("padding", () => {
-    it("returns `null` if no padding setted", () => {
-      expect(tokenizer.padding).toBeNull();
-    });
-
-    it("returns configuration when `setPadding` has been called", () => {
-      tokenizer.setPadding();
-      const expectedConfig: PaddingConfiguration = {
-        direction: PaddingDirection.Right,
-        padId: 0,
-        padToken: "[PAD]",
-        padTypeId: 0,
-      };
-      expect(tokenizer.padding).toEqual(expectedConfig);
-    });
-
-    it("returns null when `disablePadding` has been called", () => {
-      tokenizer.setPadding();
-      tokenizer.disablePadding();
-      expect(tokenizer.padding).toBeNull();
-    });
-  });
-});
--- a/bindings/node/lib/implementations/tokenizers/base.tokenizer.ts
+++ b/bindings/node/lib/implementations/tokenizers/base.tokenizer.ts
@ -1,259 +0,0 @@
-import { promisify } from "util";
-
-import { PostProcessor } from "../../bindings/post-processors";
-import {
-  AddedToken,
-  EncodeInput,
-  EncodeOptions,
-  InputSequence,
-  PaddingConfiguration,
-  PaddingOptions,
-  Tokenizer,
-  TruncationConfiguration,
-  TruncationOptions,
-} from "../../bindings/tokenizer";
-import { Encoding } from "../encoding";
-
-export type Token = string | AddedToken;
-
-// eslint-disable-next-line @typescript-eslint/ban-types
-export class BaseTokenizer<TConfig extends object> {
-  private _truncation?: TruncationConfiguration;
-  private _padding?: PaddingConfiguration;
-
-  constructor(
-    protected tokenizer: Tokenizer,
-    /**
-     * @since 0.4.0
-     */
-    readonly configuration: Readonly<TConfig>
-  ) {}
-
-  /**
-   * Instantiate a new Tokenizer from the given file
-   * @param path Path to a file containing a Tokenizer
-   */
-  static fromFile = Tokenizer.fromFile;
-
-  /**
-   * Instantiate a new Tokenizer from the given JSON string
-   * @param s A JSON string representation of the Tokenizer
-   */
-  static fromString = Tokenizer.fromString;
-
-  /**
-   * Truncation configuration if enabled, `null` otherwise.
-   *
-   * @see {@link BaseTokenizer#setTruncation} to change truncation configuration
-   * @see {@link BaseTokenizer#disableTruncation} to disable truncation
-   * @since 0.4.0
-   */
-  get truncation(): Readonly<TruncationConfiguration> | null {
-    return this._truncation ?? null;
-  }
-
-  /**
-   * Padding configuration if enabled, `null` otherwise
-   *
-   * @see {@link BaseTokenizer#setPadding} to change padding configuration
-   * @see {@link BaseTokenizer#disablePadding} to disable padding
-   * @since 0.4.0
-   */
-  get padding(): Readonly<PaddingConfiguration> | null {
-    return this._padding ?? null;
-  }
-
-  /**
-   * Add the given tokens to the vocabulary
-   *
-   * @param tokens A list of tokens to add to the vocabulary.
-   * Each token can either be a string, or an instance of AddedToken.
-   */
-  addTokens(tokens: Token[]): number {
-    return this.tokenizer.addTokens(tokens);
-  }
-
-  /**
-   * Add the given special tokens to the vocabulary, and treat them as special tokens.
-   * The special tokens will never be processed by the model, and will be removed while decoding.
-   *
-   * @param tokens The list of special tokens to add.
-   * Each token can either be a string, or an instance of AddedToken
-   * @returns The number of tokens that were added to the vocabulary
-   */
-  addSpecialTokens(tokens: Token[]): number {
-    return this.tokenizer.addSpecialTokens(tokens);
-  }
-
-  /**
-   * Encode the given sequence
-   *
-   * @param sequence The sequence to encode
-   * @param [pair] The optional pair sequence
-   * @param [options] Some options to customize the encoding
-   */
-  async encode(
-    sequence: InputSequence,
-    pair?: InputSequence,
-    options?: EncodeOptions
-  ): Promise<Encoding> {
-    const encode = promisify(this.tokenizer.encode.bind(this.tokenizer));
-    const rawEncoding = await encode(sequence, pair ?? null, options ?? null);
-    return new Encoding(rawEncoding);
-  }
-
-  /**
-   * Encode the given sequences or pair of sequences
-   *
-   * @param sequences A list of sequences or pair of sequences.
-   * The list can contain both at the same time.
-   * @param [options] Sope options to customize the encoding
-   */
-  async encodeBatch(
-    sequences: EncodeInput[],
-    options?: EncodeOptions
-  ): Promise<Encoding[]> {
-    const encodeBatch = promisify(this.tokenizer.encodeBatch.bind(this.tokenizer));
-    const rawEncodings = await encodeBatch(sequences, options);
-    return rawEncodings.map((e) => new Encoding(e));
-  }
-
-  /**
-   * Decode the given list of ids to a string sequence
-   *
-   * @param ids A list of ids to be decoded
-   * @param [skipSpecialTokens=true] Whether to remove all the special tokens from the output string
-   */
-  decode(ids: number[], skipSpecialTokens = true): Promise<string> {
-    const decode = promisify(this.tokenizer.decode.bind(this.tokenizer));
-    return decode(ids, skipSpecialTokens);
-  }
-
-  /**
-   * Decode the list of sequences to a list of string sequences
-   *
-   * @param sequences A list of sequences of ids to be decoded
-   * @param [skipSpecialTokens=true] Whether to remove all the special tokens from the output strings
-   */
-  decodeBatch(ids: number[][], skipSpecialTokens = true): Promise<string[]> {
-    const decodeBatch = promisify(this.tokenizer.decodeBatch.bind(this.tokenizer));
-    return decodeBatch(ids, skipSpecialTokens);
-  }
-
-  /**
-   * Enable/change truncation with specified options
-   *
-   * @param maxLength The maximum length at which to truncate
-   * @param [options] Additional truncation options
-   * @returns Full truncation configuration
-   */
-  setTruncation(
-    maxLength: number,
-    options?: TruncationOptions
-  ): Readonly<TruncationConfiguration> {
-    const result = this.tokenizer.setTruncation(maxLength, options);
-    return (this._truncation = result);
-  }
-
-  /**
-   * Disable truncation
-   */
-  disableTruncation(): void {
-    this.tokenizer.disableTruncation();
-    delete this._truncation;
-  }
-
-  /**
-   * Enable/change padding with specified options
-   * @param [options] Padding options
-   * @returns Full padding configuration
-   */
-  setPadding(options?: PaddingOptions): Readonly<PaddingConfiguration> {
-    const result = this.tokenizer.setPadding(options);
-    return (this._padding = result);
-  }
-
-  /**
-   * Disable padding
-   */
-  disablePadding(): void {
-    this.tokenizer.disablePadding();
-    delete this._padding;
-  }
-
-  /**
-   * Convert the given token id to its corresponding string
-   *
-   * @param id The token id to convert
-   * @returns The corresponding string if it exists
-   */
-  idToToken(id: number): string | undefined {
-    return this.tokenizer.idToToken(id);
-  }
-
-  /**
-   * Convert the given token to its corresponding id
-   *
-   * @param token The token to convert
-   * @returns The corresponding id if it exists
-   */
-  tokenToId(token: string): number | undefined {
-    return this.tokenizer.tokenToId(token);
-  }
-
-  /**
-   * Apply all the post-processing steps to the given encodings.
-   * The various steps are:
-   * 1. Truncate according to global params (@see setTruncation)
-   * 2. Apply the PostProcessor
-   * 3. Pad according to global params (@see setPadding)
-   * @param encoding The main Encoding to post process
-   * @param [pair] An optional pair Encoding
-   * @param [addSpecialTokens=true] Whether to add special tokens. Default to `true`.
-   * @since 0.6.0
-   */
-  postProcess(encoding: Encoding, pair?: Encoding, addSpecialTokens?: boolean): Encoding {
-    const rawEncoding = this.tokenizer.postProcess(
-      encoding.rawEncoding,
-      pair?.rawEncoding,
-      addSpecialTokens
-    );
-
-    return new Encoding(rawEncoding);
-  }
-
-  /**
-   * Change the post-processor to use with this Tokenizer
-   * @param postProcessor New post-processor to use
-   * @throws Will throw an error if any task is running
-   * @throws Will throw an error if the post-processor is already used in another Tokenizer
-   */
-  setPostProcessor(processor: PostProcessor): void {
-    return this.tokenizer.setPostProcessor(processor);
-  }
-
-  /**
-   * Save the Tokenizer as JSON to the given path
-   * @param path Path to the JSON file to write
-   * @param [pretty=false] Whether the JSON string should be prettified
-   */
-  save(path: string, pretty?: boolean): void {
-    return this.tokenizer.save(path, pretty);
-  }
-
-  /**
-   * Get a serialized JSON version of the Tokenizer as a string
-   * @param [pretty=false] Whether the JSON string should be prettified
-   */
-  toString(pretty?: boolean): string {
-    return this.tokenizer.toString(pretty);
-  }
-}
-
-/**
- * Get the string content from a token, which can be a string or AddedToken
- * @param token The token from which get the content
- */
-export function getTokenContent(token: Token): string {
-  return typeof token === "string" ? token : token.getContent();
-}
--- a/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.test.ts
+++ b/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.test.ts
@ -1,34 +0,0 @@
-import { BertWordPieceOptions, BertWordPieceTokenizer } from "./bert-wordpiece.tokenizer";
-
-const MOCKS_DIR = __dirname + "/__mocks__";
-
-describe("BertWordPieceTokenizer", () => {
-  describe("fromOptions", () => {
-    it("does not throw any error if no vocabFile is provided", async () => {
-      const tokenizer = await BertWordPieceTokenizer.fromOptions();
-      expect(tokenizer).toBeDefined();
-    });
-
-    describe("when a vocabFile is provided and `addSpecialTokens === true`", () => {
-      it("throws a `sepToken error` if no `sepToken` is provided", async () => {
-        const options: BertWordPieceOptions = {
-          vocabFile: MOCKS_DIR + "/bert-vocab-empty.txt",
-        };
-
-        await expect(BertWordPieceTokenizer.fromOptions(options)).rejects.toThrow(
-          "sepToken not found in the vocabulary"
-        );
-      });
-
-      it("throws a `clsToken error` if no `clsToken` is provided", async () => {
-        const options: BertWordPieceOptions = {
-          vocabFile: MOCKS_DIR + "/bert-vocab-without-cls.txt",
-        };
-
-        await expect(BertWordPieceTokenizer.fromOptions(options)).rejects.toThrow(
-          "clsToken not found in the vocabulary"
-        );
-      });
-    });
-  });
-});
--- a/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.ts
+++ b/bindings/node/lib/implementations/tokenizers/bert-wordpiece.tokenizer.ts
@ -1,198 +0,0 @@
-import { promisify } from "util";
-
-import { wordPieceDecoder } from "../../bindings/decoders";
-import { Model, WordPiece, WordPieceOptions } from "../../bindings/models";
-import { bertNormalizer } from "../../bindings/normalizers";
-import { bertProcessing } from "../../bindings/post-processors";
-import { bertPreTokenizer } from "../../bindings/pre-tokenizers";
-import { Tokenizer } from "../../bindings/tokenizer";
-import { wordPieceTrainer } from "../../bindings/trainers";
-import { BaseTokenizer, getTokenContent, Token } from "./base.tokenizer";
-
-export interface BertWordPieceOptions {
-  /**
-   * @default true
-   */
-  cleanText?: boolean;
-  /**
-   * @default "[CLS]"
-   */
-  clsToken?: Token;
-  /**
-   * @default true
-   */
-  handleChineseChars?: boolean;
-  /**
-   * @default true
-   */
-  lowercase?: boolean;
-  /**
-   * @default "[MASK]"
-   */
-  maskToken?: Token;
-  /**
-   * @default "[PAD]"
-   */
-  padToken?: Token;
-  /**
-   * @default "[SEP]"
-   */
-  sepToken?: Token;
-  /**
-   * @default true
-   */
-  stripAccents?: boolean;
-  /**
-   * @default "[UNK]"
-   */
-  unkToken?: Token;
-  vocabFile?: string;
-  /**
-   * The prefix to attach to subword units that don't represent a beginning of word
-   * @default "##"
-   */
-  wordpiecesPrefix?: string;
-}
-
-export interface BertWordPieceTrainOptions {
-  /**
-   * @default []
-   */
-  initialAlphabet?: string[];
-  /**
-   * @default 1000
-   */
-  limitAlphabet?: number;
-  /**
-   * @default 2
-   */
-  minFrequency?: number;
-  /**
-   * @default true
-   */
-  showProgress?: boolean;
-  /**
-   * @default ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"]
-   */
-  specialTokens?: Token[];
-  /**
-   * @default 30000
-   */
-  vocabSize?: number;
-  /**
-   * The prefix to attach to subword units that don't represent a beginning of word
-   * @default "##"
-   */
-  wordpiecesPrefix?: string;
-}
-
-type BertTokenizerConfig = Required<Omit<BertWordPieceOptions, "vocabFile">> & {
-  vocabFile?: string;
-};
-
-/**
- * Bert WordPiece Tokenizer
- */
-export class BertWordPieceTokenizer extends BaseTokenizer<BertTokenizerConfig> {
-  private static readonly defaultBertOptions: BertTokenizerConfig = {
-    cleanText: true,
-    clsToken: "[CLS]",
-    handleChineseChars: true,
-    lowercase: true,
-    maskToken: "[MASK]",
-    padToken: "[PAD]",
-    sepToken: "[SEP]",
-    stripAccents: true,
-    unkToken: "[UNK]",
-    wordpiecesPrefix: "##",
-  };
-
-  private readonly defaultTrainOptions: Required<BertWordPieceTrainOptions> = {
-    initialAlphabet: [],
-    limitAlphabet: 1000,
-    minFrequency: 2,
-    showProgress: true,
-    specialTokens: ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"],
-    vocabSize: 30000,
-    wordpiecesPrefix: "##",
-  };
-
-  private constructor(tokenizer: Tokenizer, configuration: BertTokenizerConfig) {
-    super(tokenizer, configuration);
-  }
-
-  /**
-   * Instantiate and returns a new Bert WordPiece tokenizer
-   * @param [options] Optional tokenizer options
-   */
-  static async fromOptions(
-    options?: BertWordPieceOptions
-  ): Promise<BertWordPieceTokenizer> {
-    const opts = { ...this.defaultBertOptions, ...options };
-
-    let model: Model;
-    if (opts.vocabFile) {
-      const fromFile = promisify<string, WordPieceOptions, Model>(WordPiece.fromFile);
-      model = await fromFile(opts.vocabFile, {
-        unkToken: getTokenContent(opts.unkToken),
-        continuingSubwordPrefix: opts.wordpiecesPrefix,
-      });
-    } else {
-      model = WordPiece.empty();
-    }
-
-    const tokenizer = new Tokenizer(model);
-
-    for (const token of [
-      opts.clsToken,
-      opts.sepToken,
-      opts.unkToken,
-      opts.padToken,
-      opts.maskToken,
-    ]) {
-      if (tokenizer.tokenToId(getTokenContent(token)) !== undefined) {
-        tokenizer.addSpecialTokens([token]);
-      }
-    }
-
-    const normalizer = bertNormalizer(opts);
-    tokenizer.setNormalizer(normalizer);
-    tokenizer.setPreTokenizer(bertPreTokenizer());
-
-    if (opts.vocabFile) {
-      const sepTokenId = tokenizer.tokenToId(getTokenContent(opts.sepToken));
-      if (sepTokenId === undefined) {
-        throw new Error("sepToken not found in the vocabulary");
-      }
-
-      const clsTokenId = tokenizer.tokenToId(getTokenContent(opts.clsToken));
-      if (clsTokenId === undefined) {
-        throw new Error("clsToken not found in the vocabulary");
-      }
-
-      const processor = bertProcessing(
-        [getTokenContent(opts.sepToken), sepTokenId],
-        [getTokenContent(opts.clsToken), clsTokenId]
-      );
-      tokenizer.setPostProcessor(processor);
-    }
-
-    const decoder = wordPieceDecoder(opts.wordpiecesPrefix);
-    tokenizer.setDecoder(decoder);
-
-    return new BertWordPieceTokenizer(tokenizer, opts);
-  }
-
-  /**
-   * Train the model using the given files
-   *
-   * @param files Files to use for training
-   * @param [options] Training options
-   */
-  async train(files: string[], options?: BertWordPieceTrainOptions): Promise<void> {
-    const mergedOptions = { ...this.defaultTrainOptions, ...options };
-    const trainer = wordPieceTrainer(mergedOptions);
-
-    this.tokenizer.train(trainer, files);
-  }
-}
--- a/bindings/node/lib/implementations/tokenizers/bpe.tokenizer.ts
+++ b/bindings/node/lib/implementations/tokenizers/bpe.tokenizer.ts
@ -1,150 +0,0 @@
-import { promisify } from "util";
-
-import { bpeDecoder } from "../../bindings/decoders";
-import { BPE, BPEOptions, Model } from "../../bindings/models";
-import {
-  lowercaseNormalizer,
-  nfkcNormalizer,
-  sequenceNormalizer,
-} from "../../bindings/normalizers";
-import { whitespaceSplitPreTokenizer } from "../../bindings/pre-tokenizers";
-import { Tokenizer } from "../../bindings/tokenizer";
-import { bpeTrainer } from "../../bindings/trainers";
-import { BaseTokenizer, getTokenContent, Token } from "./base.tokenizer";
-
-export interface BPETokenizerOptions {
-  /**
-   * The BPE dropout to use. Must be an float between 0 and 1
-   */
-  dropout?: number;
-  /**
-   * @default false
-   */
-  lowercase?: boolean;
-  mergesFile?: string;
-  /**
-   * @default "</w>"
-   */
-  suffix?: string;
-  /**
-   * The unknown token to be used by the model
-   * @default "<unk>"
-   */
-  unkToken?: Token;
-  vocabFile?: string;
-}
-
-export interface BPETokenizerTrainOptions {
-  /**
-   * @default []
-   */
-  initialAlphabet?: string[];
-  /**
-   * @default 1000
-   */
-  limitAlphabet?: number;
-  /**
-   * @default 2
-   */
-  minFrequency?: number;
-  /**
-   * @default true
-   */
-  showProgress?: boolean;
-  /**
-   * @default ["<unk>"]
-   */
-  specialTokens?: Token[];
-  /**
-   * @default "</w>"
-   */
-  suffix?: string;
-  /**
-   * @default 30000
-   */
-  vocabSize?: number;
-}
-
-type BPETokenizerConfig = BPETokenizerOptions &
-  Required<Pick<BPETokenizerOptions, "unkToken" | "suffix">>;
-
-/**
- * Original BPE Tokenizer.
- * Represents the BPE algorithm, as introduced by Rico Sennrich (https://arxiv.org/abs/1508.07909)
- */
-export class BPETokenizer extends BaseTokenizer<BPETokenizerConfig> {
-  private static readonly defaultBPEOptions: BPETokenizerConfig = {
-    suffix: "</w>",
-    unkToken: "<unk>",
-  };
-
-  private readonly defaultTrainOptions: Required<BPETokenizerTrainOptions> = {
-    initialAlphabet: [],
-    limitAlphabet: 1000,
-    minFrequency: 2,
-    showProgress: true,
-    specialTokens: ["<unk>"],
-    suffix: "</w>",
-    vocabSize: 30000,
-  };
-
-  private constructor(tokenizer: Tokenizer, configuration: BPETokenizerConfig) {
-    super(tokenizer, configuration);
-  }
-
-  /**
-   * Instantiate and returns a new BPE tokenizer
-   * @param [options] Optional tokenizer options
-   */
-  static async fromOptions(options?: BPETokenizerOptions): Promise<BPETokenizer> {
-    const opts = { ...this.defaultBPEOptions, ...options };
-    const unkToken = getTokenContent(opts.unkToken);
-
-    let model: Model;
-    if (opts.vocabFile && opts.mergesFile) {
-      const modelOptions: BPEOptions = {
-        dropout: opts.dropout,
-        endOfWordSuffix: opts.suffix,
-        unkToken: unkToken,
-      };
-
-      const fromFile = promisify<string, string, BPEOptions, Model>(BPE.fromFile);
-      model = await fromFile(opts.vocabFile, opts.mergesFile, modelOptions);
-    } else {
-      model = BPE.empty();
-    }
-
-    const tokenizer = new Tokenizer(model);
-    if (tokenizer.tokenToId(unkToken) !== undefined) {
-      tokenizer.addSpecialTokens([opts.unkToken]);
-    }
-
-    if (opts.lowercase) {
-      tokenizer.setNormalizer(
-        sequenceNormalizer([nfkcNormalizer(), lowercaseNormalizer()])
-      );
-    } else {
-      tokenizer.setNormalizer(nfkcNormalizer());
-    }
-
-    tokenizer.setPreTokenizer(whitespaceSplitPreTokenizer());
-
-    const decoder = bpeDecoder(opts.suffix);
-    tokenizer.setDecoder(decoder);
-
-    return new BPETokenizer(tokenizer, opts);
-  }
-
-  /**
-   * Train the model using the given files
-   *
-   * @param files Files to use for training
-   * @param [options] Training options
-   */
-  async train(files: string[], options?: BPETokenizerTrainOptions): Promise<void> {
-    const mergedOptions = { ...this.defaultTrainOptions, ...options };
-    const trainer = bpeTrainer(mergedOptions);
-
-    this.tokenizer.train(trainer, files);
-  }
-}
--- a/bindings/node/lib/implementations/tokenizers/byte-level-bpe.tokenizer.ts
+++ b/bindings/node/lib/implementations/tokenizers/byte-level-bpe.tokenizer.ts
@ -1,135 +0,0 @@
-import { promisify } from "util";
-
-import { byteLevelDecoder } from "../../bindings/decoders";
-import { BPE, BPEOptions, Model } from "../../bindings/models";
-import {
-  lowercaseNormalizer,
-  nfkcNormalizer,
-  sequenceNormalizer,
-} from "../../bindings/normalizers";
-import { byteLevelProcessing } from "../../bindings/post-processors";
-import { byteLevelAlphabet, byteLevelPreTokenizer } from "../../bindings/pre-tokenizers";
-import { Tokenizer } from "../../bindings/tokenizer";
-import { bpeTrainer } from "../../bindings/trainers";
-import { BaseTokenizer, Token } from "./base.tokenizer";
-
-export interface ByteLevelBPETokenizerOptions {
-  /**
-   * @default false
-   */
-  addPrefixSpace?: boolean;
-  /**
-   * The prefix to attach to subword units that don't represent a beginning of word
-   */
-  continuingSubwordPrefix?: string;
-  /**
-   * @default false
-   */
-  lowercase?: boolean;
-  /**
-   * The BPE dropout to use. Must be an float between 0 and 1
-   */
-  dropout?: number;
-  /**
-   * The suffix to attach to subword units that represent an end of word
-   */
-  endOfWordSuffix?: string;
-  mergesFile?: string;
-  unicodeNormalizer?: string;
-  /**
-   * Whether to trim the whitespaces from the produced offsets
-   * @default false
-   */
-  trimOffsets?: boolean;
-  vocabFile?: string;
-}
-
-export interface ByteLevelBPETrainOptions {
-  /**
-   * @default 2
-   */
-  minFrequency?: number;
-  /**
-   * @default true
-   */
-  showProgress?: boolean;
-  /**
-   * @default []
-   */
-  specialTokens?: Token[];
-  /**
-   * @default 30000
-   */
-  vocabSize?: number;
-}
-
-type ByteLevelBPETokenizerConfig = ByteLevelBPETokenizerOptions &
-  Required<Pick<ByteLevelBPETokenizerOptions, "addPrefixSpace">>;
-
-/**
- * Represents a Byte-level BPE as introduced by OpenAI with their GPT-2 model
- */
-export class ByteLevelBPETokenizer extends BaseTokenizer<ByteLevelBPETokenizerConfig> {
-  private static readonly defaultOptions: ByteLevelBPETokenizerConfig = {
-    addPrefixSpace: false,
-    trimOffsets: false,
-  };
-
-  private readonly defaultTrainOptions: Required<ByteLevelBPETrainOptions> = {
-    minFrequency: 2,
-    showProgress: true,
-    specialTokens: ["<unk>"],
-    vocabSize: 30000,
-  };
-
-  private constructor(tokenizer: Tokenizer, configuration: ByteLevelBPETokenizerConfig) {
-    super(tokenizer, configuration);
-  }
-
-  static async fromOptions(
-    options?: ByteLevelBPETokenizerOptions
-  ): Promise<ByteLevelBPETokenizer> {
-    const opts = { ...this.defaultOptions, ...options };
-
-    let model: Model;
-    if (opts.vocabFile && opts.mergesFile) {
-      const fromFile = promisify<string, string, BPEOptions, Model>(BPE.fromFile);
-      model = await fromFile(opts.vocabFile, opts.mergesFile, opts);
-    } else {
-      model = BPE.empty();
-    }
-
-    const tokenizer = new Tokenizer(model);
-
-    if (opts.lowercase) {
-      tokenizer.setNormalizer(
-        sequenceNormalizer([nfkcNormalizer(), lowercaseNormalizer()])
-      );
-    } else {
-      tokenizer.setNormalizer(nfkcNormalizer());
-    }
-
-    const preTokenizer = byteLevelPreTokenizer(opts.addPrefixSpace);
-    tokenizer.setPreTokenizer(preTokenizer);
-    tokenizer.setDecoder(byteLevelDecoder());
-    tokenizer.setPostProcessor(byteLevelProcessing(opts.trimOffsets));
-
-    return new ByteLevelBPETokenizer(tokenizer, opts);
-  }
-
-  /**
-   * Train the model using the given files
-   *
-   * @param files Files to use for training
-   * @param [options] Training options
-   */
-  async train(files: string[], options?: ByteLevelBPETrainOptions): Promise<void> {
-    const mergedOptions = { ...this.defaultTrainOptions, ...options };
-    const trainer = bpeTrainer({
-      ...mergedOptions,
-      initialAlphabet: byteLevelAlphabet(),
-    });
-
-    this.tokenizer.train(trainer, files);
-  }
-}
--- a/bindings/node/lib/implementations/tokenizers/index.ts
+++ b/bindings/node/lib/implementations/tokenizers/index.ts
@ -1,5 +0,0 @@
-export * from "./bert-wordpiece.tokenizer";
-export * from "./bpe.tokenizer";
-export * from "./byte-level-bpe.tokenizer";
-export * from "./sentence-piece-bpe.tokenizer";
-export { getTokenContent, BaseTokenizer, Token } from "./base.tokenizer";
--- a/bindings/node/lib/implementations/tokenizers/sentence-piece-bpe.tokenizer.ts
+++ b/bindings/node/lib/implementations/tokenizers/sentence-piece-bpe.tokenizer.ts
@ -1,135 +0,0 @@
-import { promisify } from "util";
-
-import { metaspaceDecoder } from "../../bindings/decoders";
-import { BPE, BPEOptions, Model } from "../../bindings/models";
-import { nfkcNormalizer } from "../../bindings/normalizers";
-import { metaspacePreTokenizer } from "../../bindings/pre-tokenizers";
-import { Tokenizer } from "../../bindings/tokenizer";
-import { bpeTrainer } from "../../bindings/trainers";
-import { BaseTokenizer, getTokenContent, Token } from "./base.tokenizer";
-
-export interface SentencePieceBPETokenizerOptions extends OptionsWithDefaults {
-  dropout?: number;
-  mergesFile?: string;
-  vocabFile?: string;
-}
-
-interface OptionsWithDefaults {
-  /**
-   * @default true
-   */
-  addPrefixSpace?: boolean;
-  /**
-   * @default "▁"
-   */
-  replacement?: string;
-  /**
-   * @default "<unk>"
-   */
-  unkToken?: Token;
-}
-
-export interface SentencePieceBPETrainOptions {
-  /**
-   * @default []
-   */
-  initialAlphabet?: string[];
-  /**
-   * @default 1000
-   */
-  limitAlphabet?: number;
-  /**
-   * @default 2
-   */
-  minFrequency?: number;
-  /**
-   * @default true
-   */
-  showProgress?: boolean;
-  /**
-   * @default ["<unk>"]
-   */
-  specialTokens?: Token[];
-  /**
-   * @default 30000
-   */
-  vocabSize?: number;
-}
-
-type SentencePieceBPETokenizerConfig = SentencePieceBPETokenizerOptions &
-  Required<OptionsWithDefaults>;
-
-/**
- * Represents the BPE algorithm, with the pretokenization used by SentencePiece
- */
-export class SentencePieceBPETokenizer extends BaseTokenizer<SentencePieceBPETokenizerConfig> {
-  private static readonly defaultOptions: SentencePieceBPETokenizerConfig = {
-    addPrefixSpace: true,
-    replacement: "▁",
-    unkToken: "<unk>",
-  };
-
-  private readonly defaultTrainOptions: Required<SentencePieceBPETrainOptions> = {
-    initialAlphabet: [],
-    limitAlphabet: 1000,
-    minFrequency: 2,
-    showProgress: true,
-    specialTokens: ["<unk>"],
-    vocabSize: 30000,
-  };
-
-  private constructor(
-    tokenizer: Tokenizer,
-    configuration: SentencePieceBPETokenizerConfig
-  ) {
-    super(tokenizer, configuration);
-  }
-
-  static async fromOptions(
-    options?: SentencePieceBPETokenizerOptions
-  ): Promise<SentencePieceBPETokenizer> {
-    const opts = { ...this.defaultOptions, ...options };
-    const unkToken = getTokenContent(opts.unkToken);
-
-    let model: Model;
-    if (opts.vocabFile && opts.mergesFile) {
-      const modelOptions: BPEOptions = {
-        dropout: opts.dropout,
-        unkToken: unkToken,
-      };
-
-      const fromFile = promisify<string, string, BPEOptions, Model>(BPE.fromFile);
-      model = await fromFile(opts.vocabFile, opts.mergesFile, modelOptions);
-    } else {
-      model = BPE.empty();
-    }
-
-    const tokenizer = new Tokenizer(model);
-    if (tokenizer.tokenToId(unkToken) !== undefined) {
-      tokenizer.addSpecialTokens([opts.unkToken]);
-    }
-
-    tokenizer.setNormalizer(nfkcNormalizer());
-
-    const preTokenizer = metaspacePreTokenizer(opts.replacement, opts.addPrefixSpace);
-    tokenizer.setPreTokenizer(preTokenizer);
-
-    const decoder = metaspaceDecoder(opts.replacement, opts.addPrefixSpace);
-    tokenizer.setDecoder(decoder);
-
-    return new SentencePieceBPETokenizer(tokenizer, opts);
-  }
-
-  /**
-   * Train the model using the given files
-   *
-   * @param files Files to use for training
-   * @param [options] Training options
-   */
-  async train(files: string[], options?: SentencePieceBPETrainOptions): Promise<void> {
-    const mergedOptions = { ...this.defaultTrainOptions, ...options };
-    const trainer = bpeTrainer(mergedOptions);
-
-    this.tokenizer.train(trainer, files);
-  }
-}
--- a/bindings/node/lib/index.ts
+++ b/bindings/node/lib/index.ts
@ -1,23 +0,0 @@
-// export * from "./bindings";
-export * from "./implementations/tokenizers";
-export * from "./bindings/enums";
-export { slice } from "./bindings/utils";
-export {
-  AddedToken,
-  AddedTokenOptions,
-  PaddingConfiguration,
-  PaddingOptions,
-  InputSequence,
-  EncodeInput,
-  EncodeOptions,
-  Tokenizer,
-  TruncationConfiguration,
-  TruncationOptions,
-} from "./bindings/tokenizer";
-export * as models from "./bindings/models";
-export * as normalizers from "./bindings/normalizers";
-export * as pre_tokenizers from "./bindings/pre-tokenizers";
-export * as decoders from "./bindings/decoders";
-export * as post_processors from "./bindings/post-processors";
-export * as trainers from "./bindings/trainers";
-export { Encoding } from "./implementations/encoding";
--- a/bindings/node/native/Cargo.toml
+++ b/bindings/node/native/Cargo.toml
@ -1,22 +0,0 @@
-[package]
-name = "node"
-version = "0.13.4"
-authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
-license = "Apache-2.0"
-build = "build.rs"
-exclude = ["artifacts.json", "index.node"]
-
-[lib]
-name = "node"
-crate-type = ["cdylib"]
-
-[build-dependencies]
-neon-build = "0.3.3"
-
-[dependencies]
-neon = "0.3"
-neon-runtime = "0.3"
-neon-serde = "0.3"
-serde = { version = "1.0", features = [ "rc", "derive" ] }
-tokenizers = { path = "../../../tokenizers" }
-serde_json = "1.0"
--- a/bindings/node/native/build.rs
+++ b/bindings/node/native/build.rs
@ -1,7 +0,0 @@
-extern crate neon_build;
-
-fn main() {
-    neon_build::setup(); // must be called in build.rs
-
-    // add project-specific build logic here...
-}
--- a/bindings/node/native/src/decoders.rs
+++ b/bindings/node/native/src/decoders.rs
@ -1,204 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::extraction::*;
-use neon::prelude::*;
-use std::sync::Arc;
-
-use tk::decoders::DecoderWrapper;
-
-/// Decoder
-#[derive(Clone, Serialize, Deserialize)]
-pub struct Decoder {
-    #[serde(flatten)]
-    pub decoder: Option<Arc<DecoderWrapper>>,
-}
-
-impl tk::Decoder for Decoder {
-    fn decode_chain(&self, tokens: Vec<String>) -> tk::Result<Vec<String>> {
-        self.decoder
-            .as_ref()
-            .ok_or("Uninitialized Decoder")?
-            .decode_chain(tokens)
-    }
-}
-
-declare_types! {
-    pub class JsDecoder for Decoder {
-        init(_) {
-             // This should not be called from JS
-             Ok(Decoder { decoder: None })
-        }
-
-        method decode(mut cx) {
-            use tk::Decoder;
-
-            let tokens = cx.extract_vec::<String>(0)?;
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let output = this.borrow(&guard)
-                .decoder.as_ref().unwrap()
-                .decode(tokens)
-                .map_err(|e| Error(format!("{}", e)))?;
-
-            Ok(cx.string(output).upcast())
-
-        }
-    }
-}
-
-/// byte_level()
-fn byte_level(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(
-        tk::decoders::byte_level::ByteLevel::default().into(),
-    ));
-    Ok(decoder)
-}
-
-/// replace()
-fn replace(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let pattern: String = cx.extract::<String>(0)?;
-    let content: String = cx.extract::<String>(1)?;
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(
-        tk::normalizers::replace::Replace::new(pattern, content)
-            .map_err(|e| Error(e.to_string()))?
-            .into(),
-    ));
-    Ok(decoder)
-}
-
-/// wordpiece(prefix: String = "##", cleanup: bool)
-fn wordpiece(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let prefix = cx
-        .extract_opt::<String>(0)?
-        .unwrap_or_else(|| String::from("##"));
-    let cleanup = cx.extract_opt::<bool>(1)?.unwrap_or(true);
-
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(
-        tk::decoders::wordpiece::WordPiece::new(prefix, cleanup).into(),
-    ));
-    Ok(decoder)
-}
-
-/// byte_fallback()
-fn byte_fallback(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(
-        tk::decoders::byte_fallback::ByteFallback::new().into(),
-    ));
-    Ok(decoder)
-}
-
-/// fuse()
-fn fuse(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(tk::decoders::fuse::Fuse::new().into()));
-    Ok(decoder)
-}
-
-/// strip(content: char, left: usize, right: usize)
-fn strip(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let content: char = cx.extract(0)?;
-    let left: usize = cx.extract(1)?;
-    let right: usize = cx.extract(2)?;
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(
-        tk::decoders::strip::Strip::new(content, left, right).into(),
-    ));
-    Ok(decoder)
-}
-
-/// metaspace(replacement: String = "_", add_prefix_space: bool = true)
-fn metaspace(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let replacement = cx.extract_opt::<char>(0)?.unwrap_or('▁');
-    let add_prefix_space = cx.extract_opt::<bool>(1)?.unwrap_or(true);
-
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(
-        tk::decoders::metaspace::Metaspace::new(replacement, add_prefix_space).into(),
-    ));
-    Ok(decoder)
-}
-
-/// bpe_decoder(suffix: String = "</w>")
-fn bpe_decoder(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let suffix = cx
-        .extract_opt::<String>(0)?
-        .unwrap_or_else(|| String::from("</w>"));
-
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder =
-        Some(Arc::new(tk::decoders::bpe::BPEDecoder::new(suffix).into()));
-    Ok(decoder)
-}
-
-/// ctc_decoder(pad_token: String = "<pad>", word_delimiter_token: String = "|", cleanup = true)
-fn ctc_decoder(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let pad_token = cx
-        .extract_opt::<String>(0)?
-        .unwrap_or_else(|| String::from("<pad>"));
-    let word_delimiter_token = cx
-        .extract_opt::<String>(1)?
-        .unwrap_or_else(|| String::from("|"));
-    let cleanup = cx.extract_opt::<bool>(2)?.unwrap_or(true);
-
-    let mut decoder = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    decoder.borrow_mut(&guard).decoder = Some(Arc::new(
-        tk::decoders::ctc::CTC::new(pad_token, word_delimiter_token, cleanup).into(),
-    ));
-    Ok(decoder)
-}
-
-/// sequence()
-fn sequence(mut cx: FunctionContext) -> JsResult<JsDecoder> {
-    let decoders = cx.argument::<JsArray>(0)?.to_vec(&mut cx)?;
-    let mut sequence = Vec::with_capacity(decoders.len());
-
-    decoders.into_iter().try_for_each(|decoder| {
-        match decoder.downcast::<JsDecoder>().or_throw(&mut cx) {
-            Ok(decoder) => {
-                let guard = cx.lock();
-                if let Some(decoder_arc) = &decoder.borrow(&guard).decoder {
-                    let decoder: DecoderWrapper = (**decoder_arc).clone();
-                    sequence.push(decoder);
-                }
-                Ok(())
-            }
-            Err(e) => Err(e),
-        }
-    })?;
-
-    let mut pretok = JsDecoder::new::<_, JsDecoder, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).decoder = Some(Arc::new(tk::DecoderWrapper::Sequence(
-        tk::decoders::sequence::Sequence::new(sequence),
-    )));
-    Ok(pretok)
-}
-
-/// Register everything here
-pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
-    m.export_function(&format!("{}_ByteLevel", prefix), byte_level)?;
-    m.export_function(&format!("{}_Replace", prefix), replace)?;
-    m.export_function(&format!("{}_WordPiece", prefix), wordpiece)?;
-    m.export_function(&format!("{}_ByteFallback", prefix), byte_fallback)?;
-    m.export_function(&format!("{}_Fuse", prefix), fuse)?;
-    m.export_function(&format!("{}_Strip", prefix), strip)?;
-    m.export_function(&format!("{}_Metaspace", prefix), metaspace)?;
-    m.export_function(&format!("{}_BPEDecoder", prefix), bpe_decoder)?;
-    m.export_function(&format!("{}_CTC", prefix), ctc_decoder)?;
-    m.export_function(&format!("{}_Sequence", prefix), sequence)?;
-    Ok(())
-}
--- a/bindings/node/native/src/encoding.rs
+++ b/bindings/node/native/src/encoding.rs
@ -1,366 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::extraction::*;
-use crate::tokenizer::PaddingParams;
-use neon::prelude::*;
-
-use tk::utils::truncation::TruncationDirection;
-
-/// Encoding
-pub struct Encoding {
-    pub encoding: Option<tk::tokenizer::Encoding>,
-}
-
-declare_types! {
-    pub class JsEncoding for Encoding {
-        init(_) {
-            // This should never be called from JavaScript
-            Ok(Encoding { encoding: None })
-        }
-
-        method getLength(mut cx) {
-            let this = cx.this();
-            let guard = cx.lock();
-            let length = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_ids()
-                .len();
-
-            Ok(cx.number(length as f64).upcast())
-        }
-
-        method getNSequences(mut cx) {
-            let this = cx.this();
-            let guard = cx.lock();
-            let n = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .n_sequences();
-
-            Ok(cx.number(n as f64).upcast())
-        }
-
-        method setSequenceId(mut cx) {
-            let seq_id = cx.extract::<usize>(0)?;
-
-            let mut this = cx.this();
-            let guard = cx.lock();
-            this.borrow_mut(&guard)
-                .encoding.as_mut().expect("Uninitialized Encoding")
-                .set_sequence_id(seq_id);
-
-            Ok(cx.undefined().upcast())
-        }
-
-        method getIds(mut cx) {
-            // getIds(): number[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let ids = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_ids()
-                .to_vec();
-
-            Ok(neon_serde::to_value(&mut cx, &ids)?)
-        }
-
-        method getTypeIds(mut cx) {
-            // getTypeIds(): number[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let ids = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_type_ids()
-                .to_vec();
-
-            Ok(neon_serde::to_value(&mut cx, &ids)?)
-        }
-
-        method getAttentionMask(mut cx) {
-            // getAttentionMask(): number[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let ids = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_attention_mask()
-                .to_vec();
-
-            Ok(neon_serde::to_value(&mut cx, &ids)?)
-        }
-
-        method getSpecialTokensMask(mut cx) {
-            // getSpecialTokensMask(): number[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let ids = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_special_tokens_mask()
-                .to_vec();
-
-            Ok(neon_serde::to_value(&mut cx, &ids)?)
-        }
-
-        method getTokens(mut cx) {
-            // getTokens(): string[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let tokens = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_tokens()
-                .to_vec();
-
-            Ok(neon_serde::to_value(&mut cx, &tokens)?)
-        }
-
-        method getWordIds(mut cx) {
-            // getWordIds(): (number | undefined)[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let ids = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_word_ids()
-                .to_vec();
-
-            Ok(neon_serde::to_value(&mut cx, &ids)?)
-        }
-
-        method getSequenceIds(mut cx) {
-            // getSequenceIds(): (number | undefined)[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let ids = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_sequence_ids();
-
-            Ok(neon_serde::to_value(&mut cx, &ids)?)
-        }
-
-        method getOffsets(mut cx) {
-            // getOffsets(): [number, number][]
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let offsets = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_offsets()
-                .to_vec();
-            let js_offsets = neon_serde::to_value(&mut cx, &offsets)?;
-
-            Ok(js_offsets)
-        }
-
-        method getOverflowing(mut cx) {
-            // getOverflowing(): Encoding[]
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            let overflowings = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .get_overflowing()
-                .clone();
-            let js_overflowings = JsArray::new(&mut cx, overflowings.len() as u32);
-
-            for (index, overflowing) in overflowings.iter().enumerate() {
-                let mut js_overflowing = JsEncoding::new::<_, JsEncoding, _>(&mut cx, vec![])?;
-
-                // Set the content
-                let guard = cx.lock();
-                js_overflowing.borrow_mut(&guard).encoding = Some(overflowing.clone());
-
-                js_overflowings.set(&mut cx, index as u32, js_overflowing)?;
-            }
-
-            Ok(js_overflowings.upcast())
-        }
-
-        method wordToTokens(mut cx) {
-            // wordToTokens(word: number, seqId: number = 0): [number, number] | undefined
-
-            let word = cx.extract::<u32>(0)?;
-            let seq_id = cx.extract_opt::<usize>(1)?.unwrap_or(0);
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            let res = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .word_to_tokens(word, seq_id);
-
-            if let Some(tokens) = res {
-                Ok(neon_serde::to_value(&mut cx, &tokens)?)
-            } else {
-                Ok(cx.undefined().upcast())
-            }
-        }
-
-        method wordToChars(mut cx) {
-            // wordToChars(word: number, seqId: number = 0): [number, number] | undefined
-
-            let word = cx.extract::<u32>(0)?;
-            let seq_id = cx.extract_opt::<usize>(1)?.unwrap_or(0);
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            let res = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .word_to_chars(word, seq_id);
-
-            if let Some(offsets) = res {
-                Ok(neon_serde::to_value(&mut cx, &offsets)?)
-            } else {
-                Ok(cx.undefined().upcast())
-            }
-        }
-
-        method tokenToSequence(mut cx) {
-            // tokenToSequence(token: number): number | undefined
-
-            let token = cx.extract::<usize>(0)?;
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            let res = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .token_to_sequence(token);
-
-            if let Some(seq) = res {
-                Ok(neon_serde::to_value(&mut cx, &seq)?)
-            } else {
-                Ok(cx.undefined().upcast())
-            }
-        }
-
-        method tokenToChars(mut cx) {
-            // tokenToChars(token: number): [number, number] [number, [number, number]] | undefined
-
-            let token = cx.extract::<usize>(0)?;
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            let res = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .token_to_chars(token);
-
-            if let Some((_, offsets)) = res {
-                Ok(neon_serde::to_value(&mut cx, &offsets)?)
-            } else {
-                Ok(cx.undefined().upcast())
-            }
-        }
-
-        method tokenToWord(mut cx) {
-            // tokenToWord(token: number): number | [number, number] | undefined
-
-            let token = cx.argument::<JsNumber>(0)?.value() as usize;
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            let res = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .token_to_word(token);
-
-            if let Some((_, index)) = res {
-                Ok(cx.number(index as f64).upcast())
-            } else {
-                Ok(cx.undefined().upcast())
-            }
-        }
-
-        method charToToken(mut cx) {
-            // charToToken(pos: number, seqId: number = 0): number | undefined
-
-            let pos = cx.extract::<usize>(0)?;
-            let seq_id = cx.extract_opt::<usize>(1)?.unwrap_or(0);
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let index = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .char_to_token(pos, seq_id);
-
-            if let Some(index) = index {
-                Ok(cx.number(index as f64).upcast())
-            } else {
-                Ok(cx.undefined().upcast())
-            }
-        }
-
-        method charToWord(mut cx) {
-            // charToWord(pos: number, seqId: number = 0): number | undefined
-
-            let pos = cx.extract::<usize>(0)?;
-            let seq_id = cx.extract_opt::<usize>(1)?.unwrap_or(0);
-
-            let this = cx.this();
-            let guard = cx.lock();
-            let index = this.borrow(&guard)
-                .encoding.as_ref().expect("Uninitialized Encoding")
-                .char_to_word(pos, seq_id);
-
-            if let Some(index) = index {
-                Ok(cx.number(index as f64).upcast())
-            } else {
-                Ok(cx.undefined().upcast())
-            }
-        }
-
-        method pad(mut cx) {
-            // pad(length: number, options?: {
-            //   direction?: 'left' | 'right' = 'right',
-            //   padId?: number = 0,
-            //   padTypeId?: number = 0,
-            //   padToken?: string = "[PAD]"
-            // }
-            let length = cx.extract::<usize>(0)?;
-            let params = cx.extract_opt::<PaddingParams>(1)?
-                .map_or_else(tk::PaddingParams::default, |p| p.0);
-
-            let mut this = cx.this();
-            let guard = cx.lock();
-            this.borrow_mut(&guard)
-                .encoding.as_mut().expect("Uninitialized Encoding")
-                .pad(
-                    length,
-                    params.pad_id,
-                    params.pad_type_id,
-                    &params.pad_token,
-                    params.direction
-                );
-
-            Ok(cx.undefined().upcast())
-        }
-
-        method truncate(mut cx) {
-            // truncate(length: number, stride: number = 0, direction: string = 'right')
-
-            let length = cx.extract::<usize>(0)?;
-            let stride = cx.extract_opt::<usize>(1)?.unwrap_or(0);
-            let direction = cx.extract_opt::<String>(2)?.unwrap_or_else(|| String::from("right"));
-
-            let tdir = match direction.as_str() {
-                "left" => Ok(TruncationDirection::Left),
-                "right" => Ok(TruncationDirection::Right),
-                _ => cx.throw_error(format!("Invalid truncation direction value : {}", direction)),
-            }?;
-
-            let mut this = cx.this();
-            let guard = cx.lock();
-            this.borrow_mut(&guard)
-                .encoding.as_mut().expect("Uninitialized Encoding")
-                .truncate(length, stride, tdir);
-
-            Ok(cx.undefined().upcast())
-        }
-    }
-}
--- a/bindings/node/native/src/extraction.rs
+++ b/bindings/node/native/src/extraction.rs
@ -1,98 +0,0 @@
-use neon::prelude::*;
-use serde::de::DeserializeOwned;
-
-/// Common Error that can be converted to a neon::result::Throw and put
-/// the js engine in a throwing state. Makes it way easier to manage errors
-pub struct Error(pub String);
-impl<T> From<T> for Error
-where
-    T: std::fmt::Display,
-{
-    fn from(e: T) -> Self {
-        Self(format!("{}", e))
-    }
-}
-impl From<Error> for neon::result::Throw {
-    fn from(err: Error) -> Self {
-        let msg = err.0;
-        unsafe {
-            neon_runtime::error::throw_error_from_utf8(msg.as_ptr(), msg.len() as i32);
-            neon::result::Throw
-        }
-    }
-}
-
-pub type LibResult<T> = std::result::Result<T, Error>;
-
-/// This trait is to be implemented for any type that we want to extract from
-/// a JsValue.
-pub trait FromJsValue: Sized {
-    fn from_value<'c, C: Context<'c>>(from: Handle<'c, JsValue>, cx: &mut C) -> LibResult<Self>;
-}
-/// Any type that implements DeserializeOwned from serde can easily be converted
-impl<T> FromJsValue for T
-where
-    T: DeserializeOwned,
-{
-    fn from_value<'c, C: Context<'c>>(from: Handle<'c, JsValue>, cx: &mut C) -> LibResult<Self> {
-        let val: T = neon_serde::from_value(cx, from)?;
-        Ok(val)
-    }
-}
-
-/// This trait provides some extraction helpers, and we implement it for CallContext
-/// so that we can easily extract any type that implements FromJsValue from the arguments.
-pub trait Extract {
-    fn extract<T: FromJsValue>(&mut self, pos: i32) -> LibResult<T>;
-    fn extract_opt<T: FromJsValue>(&mut self, pos: i32) -> LibResult<Option<T>>;
-    fn extract_vec<T: FromJsValue>(&mut self, pos: i32) -> LibResult<Vec<T>>;
-    fn extract_vec_opt<T: FromJsValue>(&mut self, pos: i32) -> LibResult<Option<Vec<T>>>;
-}
-impl<'c, T: neon::object::This> Extract for CallContext<'c, T> {
-    fn extract<E: FromJsValue>(&mut self, pos: i32) -> LibResult<E> {
-        let val = self
-            .argument_opt(pos)
-            .ok_or_else(|| Error(format!("Argument {} is missing", pos)))?;
-        let ext = E::from_value(val, self)?;
-        Ok(ext)
-    }
-
-    fn extract_opt<E: FromJsValue>(&mut self, pos: i32) -> LibResult<Option<E>> {
-        let val = self.argument_opt(pos);
-        match val {
-            None => Ok(None),
-            Some(v) => {
-                // For any optional value, we accept both `undefined` and `null`
-                if v.downcast::<JsNull>().is_ok() || v.downcast::<JsUndefined>().is_ok() {
-                    Ok(None)
-                } else if v.downcast::<JsFunction>().is_ok() {
-                    // Could be parsed as an empty object, so we don't accept JsFunction here
-                    Err(Error("Cannot extract from JsFunction".into()))
-                } else {
-                    Ok(Some(E::from_value(v, self)?))
-                }
-            }
-        }
-    }
-
-    fn extract_vec<E: FromJsValue>(&mut self, pos: i32) -> LibResult<Vec<E>> {
-        let vec = self
-            .argument_opt(pos)
-            .ok_or_else(|| Error(format!("Argument {} is missing", pos)))?
-            .downcast::<JsArray>()?
-            .to_vec(self)?;
-
-        vec.into_iter().map(|v| E::from_value(v, self)).collect()
-    }
-
-    fn extract_vec_opt<E: FromJsValue>(&mut self, pos: i32) -> LibResult<Option<Vec<E>>> {
-        self.argument_opt(pos)
-            .map(|v| {
-                let vec = v.downcast::<JsArray>()?.to_vec(self)?;
-                vec.into_iter()
-                    .map(|v| E::from_value(v, self))
-                    .collect::<LibResult<Vec<_>>>()
-            })
-            .map_or(Ok(None), |v| v.map(Some))
-    }
-}
--- a/bindings/node/native/src/lib.rs
+++ b/bindings/node/native/src/lib.rs
@ -1,47 +0,0 @@
-#![warn(clippy::all)]
-// We need to allow these to use !declare_types
-#![allow(clippy::unnecessary_wraps)]
-#![allow(clippy::upper_case_acronyms)]
-
-extern crate neon;
-extern crate neon_serde;
-#[macro_use]
-extern crate serde;
-extern crate tokenizers as tk;
-
-mod decoders;
-mod encoding;
-mod extraction;
-mod models;
-mod normalizers;
-mod pre_tokenizers;
-mod processors;
-mod tasks;
-mod tokenizer;
-mod trainers;
-mod utils;
-
-use neon::prelude::*;
-
-pub const VERSION: &str = env!("CARGO_PKG_VERSION");
-
-register_module!(mut m, {
-    // Tokenizer
-    tokenizer::register(&mut m, "tokenizer")?;
-    // Models
-    models::register(&mut m, "models")?;
-    // Decoders
-    decoders::register(&mut m, "decoders")?;
-    // Processors
-    processors::register(&mut m, "processors")?;
-    // Normalizers
-    normalizers::register(&mut m, "normalizers")?;
-    // PreTokenizers
-    pre_tokenizers::register(&mut m, "pre_tokenizers")?;
-    // Trainers
-    trainers::register(&mut m, "trainers")?;
-    // Utils
-    utils::register(&mut m, "utils")?;
-
-    Ok(())
-});
--- a/bindings/node/native/src/models.rs
+++ b/bindings/node/native/src/models.rs
@ -1,423 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::extraction::*;
-use crate::tasks::models::{BPEFromFilesTask, WordLevelFromFilesTask, WordPieceFromFilesTask};
-use crate::trainers::Trainer;
-use neon::prelude::*;
-use std::collections::HashMap;
-use std::path::Path;
-use std::path::PathBuf;
-use std::sync::{Arc, RwLock};
-
-use tk::models::{
-    bpe::{BpeBuilder, Merges, Vocab},
-    wordlevel::WordLevelBuilder,
-    wordpiece::WordPieceBuilder,
-    ModelWrapper,
-};
-use tk::Model as ModelTrait;
-use tk::Token;
-
-/// Model
-#[derive(Clone, Serialize, Deserialize)]
-pub struct Model {
-    #[serde(flatten)]
-    pub model: Option<Arc<RwLock<ModelWrapper>>>,
-}
-
-impl<M> From<M> for Model
-where
-    M: Into<ModelWrapper>,
-{
-    fn from(wrapper: M) -> Self {
-        Self {
-            model: Some(Arc::new(RwLock::new(wrapper.into()))),
-        }
-    }
-}
-
-impl tk::Model for Model {
-    type Trainer = Trainer;
-
-    fn tokenize(&self, sequence: &str) -> tk::Result<Vec<Token>> {
-        self.model
-            .as_ref()
-            .ok_or("Uninitialized Model")?
-            .read()
-            .unwrap()
-            .tokenize(sequence)
-    }
-
-    fn token_to_id(&self, token: &str) -> Option<u32> {
-        self.model.as_ref()?.read().unwrap().token_to_id(token)
-    }
-
-    fn id_to_token(&self, id: u32) -> Option<String> {
-        self.model.as_ref()?.read().unwrap().id_to_token(id)
-    }
-
-    fn get_vocab(&self) -> HashMap<String, u32> {
-        self.model
-            .as_ref()
-            .expect("Uninitialized Model")
-            .read()
-            .unwrap()
-            .get_vocab()
-    }
-
-    fn get_vocab_size(&self) -> usize {
-        self.model
-            .as_ref()
-            .expect("Uninitialized Model")
-            .read()
-            .unwrap()
-            .get_vocab_size()
-    }
-
-    fn save(&self, folder: &Path, name: Option<&str>) -> tk::Result<Vec<PathBuf>> {
-        self.model
-            .as_ref()
-            .ok_or("Uninitialized Model")?
-            .read()
-            .unwrap()
-            .save(folder, name)
-    }
-
-    fn get_trainer(&self) -> Self::Trainer {
-        self.model
-            .as_ref()
-            .expect("Uninitialized Model")
-            .read()
-            .unwrap()
-            .get_trainer()
-            .into()
-    }
-}
-
-declare_types! {
-    pub class JsModel for Model {
-        init(_) {
-            // This should not be called from JS
-            Ok(Model { model: None })
-        }
-
-        method save(mut cx) {
-            // save(folder: string, name?: string)
-            let folder = cx.extract::<String>(0)?;
-            let name = cx.extract_opt::<String>(1)?;
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            let files = this.borrow(&guard)
-                .model.as_ref().expect("Uninitialized Model")
-                .read().unwrap()
-                .save(
-                    Path::new(&folder),
-                    name.as_deref()
-                )
-                .map_err(|e| Error(format!("{}", e)))?;
-
-            Ok(neon_serde::to_value(&mut cx, &files)?)
-        }
-    }
-}
-
-#[derive(Serialize, Deserialize, Default)]
-#[serde(rename_all = "camelCase")]
-struct BpeOptions {
-    cache_capacity: Option<usize>,
-    dropout: Option<f32>,
-    unk_token: Option<String>,
-    continuing_subword_prefix: Option<String>,
-    end_of_word_suffix: Option<String>,
-    fuse_unk: Option<bool>,
-    byte_fallback: Option<bool>,
-}
-impl BpeOptions {
-    fn apply_to_bpe_builder(self, mut builder: BpeBuilder) -> BpeBuilder {
-        if let Some(cache_capacity) = self.cache_capacity {
-            builder = builder.cache_capacity(cache_capacity);
-        }
-        if let Some(dropout) = self.dropout {
-            builder = builder.dropout(dropout);
-        }
-        if let Some(unk_token) = self.unk_token {
-            builder = builder.unk_token(unk_token);
-        }
-        if let Some(continuing_subword_prefix) = self.continuing_subword_prefix {
-            builder = builder.continuing_subword_prefix(continuing_subword_prefix);
-        }
-        if let Some(end_of_word_suffix) = self.end_of_word_suffix {
-            builder = builder.end_of_word_suffix(end_of_word_suffix);
-        }
-        if let Some(fuse_unk) = self.fuse_unk {
-            builder = builder.fuse_unk(fuse_unk);
-        }
-        if let Some(byte_fallback) = self.byte_fallback {
-            builder = builder.byte_fallback(byte_fallback);
-        }
-
-        builder
-    }
-}
-
-/// bpe_init(vocab: {[token: string]: number}, merges: [string, string][], options: {
-///   cacheCapacity?: number,
-///   dropout?: number,
-///   unkToken?: string,
-///   continuingSubwordPrefix?: string,
-///   endOfWordSuffix?: string
-/// })
-fn bpe_init(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let vocab = cx.extract::<Vocab>(0)?;
-    let merges = cx.extract::<Merges>(1)?;
-    let options = cx.extract_opt::<BpeOptions>(2)?.unwrap_or_default();
-
-    let mut builder = tk::models::bpe::BPE::builder().vocab_and_merges(vocab, merges);
-    builder = options.apply_to_bpe_builder(builder);
-    let model = builder.build().map_err(|e| Error(e.to_string()))?;
-
-    let mut js_model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(model.into())));
-
-    Ok(js_model)
-}
-
-/// bpe_from_file(vocab: string, merges: string, options: {
-///   cacheCapacity?: number,
-///   dropout?: number,
-///   unkToken?: string,
-///   continuingSubwordPrefix?: string,
-///   endOfWordSuffix?: string
-///   byteFallback?: bool
-/// }, callback)
-fn bpe_from_file(mut cx: FunctionContext) -> JsResult<JsUndefined> {
-    let (options, callback) = match cx.extract_opt::<BpeOptions>(2) {
-        // Options were there, and extracted
-        Ok(Some(options)) => (options, cx.argument::<JsFunction>(3)?),
-        // Options were undefined or null
-        Ok(None) => (BpeOptions::default(), cx.argument::<JsFunction>(3)?),
-        // Options not specified, callback instead
-        Err(_) => (BpeOptions::default(), cx.argument::<JsFunction>(2)?),
-    };
-    let vocab = cx.extract::<String>(0)?;
-    let merges = cx.extract::<String>(1)?;
-    let mut builder = tk::models::bpe::BPE::from_file(&vocab, &merges);
-
-    builder = options.apply_to_bpe_builder(builder);
-
-    let task = BPEFromFilesTask::new(builder);
-    task.schedule(callback);
-    Ok(cx.undefined())
-}
-
-/// bpe_empty()
-fn bpe_empty(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let mut model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let bpe = tk::models::bpe::BPE::default();
-
-    let guard = cx.lock();
-    model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(bpe.into())));
-
-    Ok(model)
-}
-
-#[derive(Serialize, Deserialize, Default)]
-#[serde(rename_all = "camelCase")]
-struct WordPieceOptions {
-    unk_token: Option<String>,
-    continuing_subword_prefix: Option<String>,
-    max_input_chars_per_word: Option<usize>,
-}
-impl WordPieceOptions {
-    fn apply_to_wordpiece_builder(self, mut builder: WordPieceBuilder) -> WordPieceBuilder {
-        if let Some(token) = self.unk_token {
-            builder = builder.unk_token(token);
-        }
-        if let Some(prefix) = self.continuing_subword_prefix {
-            builder = builder.continuing_subword_prefix(prefix);
-        }
-        if let Some(max) = self.max_input_chars_per_word {
-            builder = builder.max_input_chars_per_word(max);
-        }
-
-        builder
-    }
-}
-
-/// wordpiece_init(vocab: {[token: string]: number}, options: {
-///   unkToken?: string = "[UNK]",
-///   maxInputCharsPerWord?: number = 100,
-///   continuingSubwordPrefix?: "##",
-/// })
-fn wordpiece_init(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let vocab = cx.extract::<HashMap<String, u32>>(0)?;
-    let options = cx.extract_opt::<WordPieceOptions>(1)?.unwrap_or_default();
-
-    let mut builder = tk::models::wordpiece::WordPiece::builder().vocab(vocab);
-    builder = options.apply_to_wordpiece_builder(builder);
-    let model = builder.build().map_err(|e| Error(e.to_string()))?;
-
-    let mut js_model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(model.into())));
-
-    Ok(js_model)
-}
-
-/// wordpiece_from_file(vocab: string, options: {
-///   unkToken?: string = "[UNK]",
-///   maxInputCharsPerWord?: number = 100,
-///   continuingSubwordPrefix?: "##",
-/// }, callback)
-fn wordpiece_from_file(mut cx: FunctionContext) -> JsResult<JsUndefined> {
-    let (options, callback) = match cx.extract_opt::<WordPieceOptions>(1) {
-        // Options were there, and extracted
-        Ok(Some(options)) => (options, cx.argument::<JsFunction>(2)?),
-        // Options were undefined or null
-        Ok(None) => (WordPieceOptions::default(), cx.argument::<JsFunction>(2)?),
-        // Options not specified, callback instead
-        Err(_) => (WordPieceOptions::default(), cx.argument::<JsFunction>(1)?),
-    };
-    let vocab = cx.extract::<String>(0)?;
-    let mut builder = tk::models::wordpiece::WordPiece::from_file(&vocab);
-    builder = options.apply_to_wordpiece_builder(builder);
-    let task = WordPieceFromFilesTask::new(builder);
-    task.schedule(callback);
-
-    Ok(cx.undefined())
-}
-
-/// wordpiece_empty()
-fn wordpiece_empty(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let mut model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let wordpiece = tk::models::wordpiece::WordPiece::default();
-
-    let guard = cx.lock();
-    model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(wordpiece.into())));
-
-    Ok(model)
-}
-
-#[derive(Serialize, Deserialize, Default)]
-#[serde(rename_all = "camelCase")]
-struct WordLevelOptions {
-    unk_token: Option<String>,
-}
-impl WordLevelOptions {
-    fn apply_to_wordlevel_builder(self, mut builder: WordLevelBuilder) -> WordLevelBuilder {
-        if let Some(token) = self.unk_token {
-            builder = builder.unk_token(token);
-        }
-
-        builder
-    }
-}
-
-/// wordlevel_init(vocab: {[token: string]: number}, options: {
-///   unkToken?: String,
-/// }, callback)
-fn wordlevel_init(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let vocab = cx.extract::<HashMap<String, u32>>(0)?;
-    let options = cx.extract_opt::<WordLevelOptions>(1)?.unwrap_or_default();
-
-    let mut builder = tk::models::wordlevel::WordLevel::builder().vocab(vocab);
-    builder = options.apply_to_wordlevel_builder(builder);
-    let model = builder.build().map_err(|e| Error(e.to_string()))?;
-
-    let mut js_model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(model.into())));
-
-    Ok(js_model)
-}
-
-/// wordlevel_from_file(vocab: string, options: {
-///   unkToken?: String,
-/// }, callback)
-fn wordlevel_from_file(mut cx: FunctionContext) -> JsResult<JsUndefined> {
-    let (options, callback) = match cx.extract_opt::<WordLevelOptions>(1) {
-        // Options were there, and extracted
-        Ok(Some(options)) => (options, cx.argument::<JsFunction>(2)?),
-        // Options were undefined or null
-        Ok(None) => (WordLevelOptions::default(), cx.argument::<JsFunction>(2)?),
-        // Options not specified, callback instead
-        Err(_) => (WordLevelOptions::default(), cx.argument::<JsFunction>(1)?),
-    };
-    let vocab = cx.extract::<String>(0)?;
-    let mut builder = tk::models::wordlevel::WordLevel::builder().files(vocab);
-    builder = options.apply_to_wordlevel_builder(builder);
-    let task = WordLevelFromFilesTask::new(builder);
-    task.schedule(callback);
-
-    Ok(cx.undefined())
-}
-
-/// wordlevel_empty()
-fn wordlevel_empty(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let mut model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let wordlevel = tk::models::wordlevel::WordLevel::default();
-
-    let guard = cx.lock();
-    model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(wordlevel.into())));
-
-    Ok(model)
-}
-
-#[derive(Serialize, Deserialize, Default)]
-#[serde(rename_all = "camelCase")]
-struct UnigramOptions {
-    unk_id: Option<usize>,
-    byte_fallback: Option<bool>,
-}
-/// unigram_init(vocab: [string, number][], options?: {
-///   unkId?: number
-/// })
-fn unigram_init(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let vocab = cx.extract::<Vec<(String, f64)>>(0)?;
-    let options = cx.extract_opt::<UnigramOptions>(1)?.unwrap_or_default();
-    let byte_fallback = options.byte_fallback.unwrap_or(false);
-    let unigram = tk::models::unigram::Unigram::from(vocab, options.unk_id, byte_fallback)
-        .map_err(|e| Error(e.to_string()))?;
-
-    let mut js_model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(unigram.into())));
-
-    Ok(js_model)
-}
-
-/// unigram_empty()
-fn unigram_empty(mut cx: FunctionContext) -> JsResult<JsModel> {
-    let mut model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-    let unigram = tk::models::unigram::Unigram::default();
-
-    let guard = cx.lock();
-    model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(unigram.into())));
-
-    Ok(model)
-}
-
-/// Register everything here
-pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
-    m.export_function(&format!("{}_BPE_init", prefix), bpe_init)?;
-    m.export_function(&format!("{}_BPE_from_file", prefix), bpe_from_file)?;
-    m.export_function(&format!("{}_BPE_empty", prefix), bpe_empty)?;
-    m.export_function(&format!("{}_WordPiece_init", prefix), wordpiece_init)?;
-    m.export_function(
-        &format!("{}_WordPiece_from_file", prefix),
-        wordpiece_from_file,
-    )?;
-    m.export_function(&format!("{}_WordPiece_empty", prefix), wordpiece_empty)?;
-    m.export_function(&format!("{}_WordLevel_init", prefix), wordlevel_init)?;
-    m.export_function(
-        &format!("{}_WordLevel_from_file", prefix),
-        wordlevel_from_file,
-    )?;
-    m.export_function(&format!("{}_WordLevel_empty", prefix), wordlevel_empty)?;
-    m.export_function(&format!("{}_Unigram_init", prefix), unigram_init)?;
-    m.export_function(&format!("{}_Unigram_empty", prefix), unigram_empty)?;
-    Ok(())
-}
--- a/bindings/node/native/src/normalizers.rs
+++ b/bindings/node/native/src/normalizers.rs
@ -1,331 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::extraction::*;
-use neon::prelude::*;
-use serde::{ser::SerializeStruct, Serialize, Serializer};
-use std::sync::Arc;
-
-use tk::normalizers::NormalizerWrapper;
-use tk::NormalizedString;
-
-#[derive(Clone, Debug, Deserialize)]
-#[serde(untagged)]
-pub enum JsNormalizerWrapper {
-    Sequence(Vec<Arc<NormalizerWrapper>>),
-    Wrapped(Arc<NormalizerWrapper>),
-}
-
-impl Serialize for JsNormalizerWrapper {
-    fn serialize<S>(&self, serializer: S) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
-    where
-        S: Serializer,
-    {
-        match self {
-            JsNormalizerWrapper::Sequence(seq) => {
-                let mut ser = serializer.serialize_struct("Sequence", 2)?;
-                ser.serialize_field("type", "Sequence")?;
-                ser.serialize_field("normalizers", seq)?;
-                ser.end()
-            }
-            JsNormalizerWrapper::Wrapped(inner) => inner.serialize(serializer),
-        }
-    }
-}
-
-impl<I> From<I> for JsNormalizerWrapper
-where
-    I: Into<NormalizerWrapper>,
-{
-    fn from(norm: I) -> Self {
-        JsNormalizerWrapper::Wrapped(Arc::new(norm.into()))
-    }
-}
-
-/// Normalizer
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Normalizer {
-    #[serde(flatten)]
-    pub normalizer: Option<JsNormalizerWrapper>,
-}
-
-impl tk::Normalizer for Normalizer {
-    fn normalize(&self, normalized: &mut NormalizedString) -> tk::Result<()> {
-        match self.normalizer.as_ref().ok_or("Uninitialized Normalizer")? {
-            JsNormalizerWrapper::Sequence(seq) => {
-                for norm in seq {
-                    norm.normalize(normalized)?;
-                }
-            }
-            JsNormalizerWrapper::Wrapped(norm) => norm.normalize(normalized)?,
-        };
-
-        Ok(())
-    }
-}
-
-declare_types! {
-    pub class JsNormalizer for Normalizer {
-        init(_) {
-            // This should not be called from JS
-            Ok(Normalizer { normalizer: None })
-        }
-
-        method normalizeString(mut cx) {
-            use tk::Normalizer;
-
-            let sequence = cx.extract::<String>(0)?;
-            let mut normalized = NormalizedString::from(sequence);
-
-            let this = cx.this();
-            let guard = cx.lock();
-            this.borrow(&guard)
-                .normalize(&mut normalized)
-                .map_err(|e| Error(format!("{}", e)))?;
-
-            Ok(cx.string(normalized.get()).upcast())
-        }
-    }
-}
-
-#[derive(Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct BertNormalizerOptions {
-    clean_text: bool,
-    handle_chinese_chars: bool,
-    strip_accents: Option<bool>,
-    lowercase: bool,
-}
-impl Default for BertNormalizerOptions {
-    fn default() -> Self {
-        Self {
-            clean_text: true,
-            handle_chinese_chars: true,
-            strip_accents: None,
-            lowercase: true,
-        }
-    }
-}
-
-/// bert_normalizer(options?: {
-///   cleanText?: bool = true,
-///   handleChineseChars?: bool = true,
-///   stripAccents?: bool = true,
-///   lowercase?: bool = true
-/// })
-fn bert_normalizer(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let options = cx
-        .extract_opt::<BertNormalizerOptions>(0)?
-        .unwrap_or_default();
-
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(
-        tk::normalizers::bert::BertNormalizer::new(
-            options.clean_text,
-            options.handle_chinese_chars,
-            options.strip_accents,
-            options.lowercase,
-        )
-        .into(),
-    );
-    Ok(normalizer)
-}
-
-/// nfd()
-fn nfd(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(tk::normalizers::unicode::NFD.into());
-    Ok(normalizer)
-}
-
-/// nfkd()
-fn nfkd(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(tk::normalizers::unicode::NFKD.into());
-    Ok(normalizer)
-}
-
-/// nfc()
-fn nfc(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(tk::normalizers::unicode::NFC.into());
-    Ok(normalizer)
-}
-
-/// nfkc()
-fn nfkc(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(tk::normalizers::unicode::NFKC.into());
-    Ok(normalizer)
-}
-
-/// strip(left?: boolean, right?: boolean)
-fn strip(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let left = cx.extract_opt::<bool>(0)?.unwrap_or(true);
-    let right = cx.extract_opt::<bool>(1)?.unwrap_or(true);
-
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer =
-        Some(tk::normalizers::strip::Strip::new(left, right).into());
-
-    Ok(normalizer)
-}
-
-/// prepend(prepend: string)
-fn prepend(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let prepend: String = cx.extract::<String>(0)?;
-
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer =
-        Some(tk::normalizers::prepend::Prepend::new(prepend).into());
-
-    Ok(normalizer)
-}
-/// strip_accents()
-fn strip_accents(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(tk::normalizers::strip::StripAccents.into());
-
-    Ok(normalizer)
-}
-
-/// sequence(normalizers: Normalizer[])
-fn sequence(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let normalizers = cx.argument::<JsArray>(0)?.to_vec(&mut cx)?;
-    let mut sequence = Vec::with_capacity(normalizers.len());
-
-    normalizers.into_iter().try_for_each(|normalizer| {
-        match normalizer.downcast::<JsNormalizer>().or_throw(&mut cx) {
-            Ok(normalizer) => {
-                let guard = cx.lock();
-                let normalizer = normalizer.borrow(&guard).normalizer.clone();
-                if let Some(normalizer) = normalizer {
-                    match normalizer {
-                        JsNormalizerWrapper::Sequence(seq) => sequence.extend(seq),
-                        JsNormalizerWrapper::Wrapped(inner) => sequence.push(inner),
-                    }
-                    Ok(())
-                } else {
-                    cx.throw_error("Uninitialized Normalizer")
-                }
-            }
-            Err(e) => Err(e),
-        }
-    })?;
-
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(JsNormalizerWrapper::Sequence(sequence));
-    Ok(normalizer)
-}
-
-/// lowercase()
-fn lowercase(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(tk::normalizers::utils::Lowercase.into());
-    Ok(normalizer)
-}
-
-/// replace()
-fn replace(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let pattern: String = cx.extract::<String>(0)?;
-    let content: String = cx.extract::<String>(1)?;
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(
-        tk::normalizers::replace::Replace::new(pattern, content)
-            .map_err(|e| Error(e.to_string()))?
-            .into(),
-    );
-    Ok(normalizer)
-}
-
-/// nmt()
-fn nmt(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(tk::normalizers::unicode::Nmt.into());
-    Ok(normalizer)
-}
-
-/// precompiled()
-fn precompiled(mut cx: FunctionContext) -> JsResult<JsNormalizer> {
-    let bytes = cx.extract::<Vec<u8>>(0)?;
-    let mut normalizer = JsNormalizer::new::<_, JsNormalizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    normalizer.borrow_mut(&guard).normalizer = Some(
-        tk::normalizers::precompiled::Precompiled::from(&bytes)
-            .map_err(|e| Error(e.to_string()))?
-            .into(),
-    );
-    Ok(normalizer)
-}
-
-/// Register everything here
-pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
-    m.export_function(&format!("{}_BertNormalizer", prefix), bert_normalizer)?;
-    m.export_function(&format!("{}_NFD", prefix), nfd)?;
-    m.export_function(&format!("{}_NFKD", prefix), nfkd)?;
-    m.export_function(&format!("{}_NFC", prefix), nfc)?;
-    m.export_function(&format!("{}_NFKC", prefix), nfkc)?;
-    m.export_function(&format!("{}_Sequence", prefix), sequence)?;
-    m.export_function(&format!("{}_Lowercase", prefix), lowercase)?;
-    m.export_function(&format!("{}_Strip", prefix), strip)?;
-    m.export_function(&format!("{}_Prepend", prefix), prepend)?;
-    m.export_function(&format!("{}_StripAccents", prefix), strip_accents)?;
-    m.export_function(&format!("{}_Nmt", prefix), nmt)?;
-    m.export_function(&format!("{}_Precompiled", prefix), precompiled)?;
-    m.export_function(&format!("{}_Replace", prefix), replace)?;
-    Ok(())
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use tk::normalizers::unicode::{NFC, NFKC};
-    use tk::normalizers::utils::Sequence;
-    use tk::normalizers::NormalizerWrapper;
-
-    #[test]
-    fn serialize() {
-        let js_wrapped: JsNormalizerWrapper = NFKC.into();
-        let js_ser = serde_json::to_string(&js_wrapped).unwrap();
-
-        let rs_wrapped = NormalizerWrapper::NFKC(NFKC);
-        let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
-        assert_eq!(js_ser, rs_ser);
-
-        let js_norm: Normalizer = serde_json::from_str(&rs_ser).unwrap();
-        match js_norm.normalizer.unwrap() {
-            JsNormalizerWrapper::Wrapped(nfc) => match nfc.as_ref() {
-                NormalizerWrapper::NFKC(_) => {}
-                _ => panic!("Expected NFKC"),
-            },
-            _ => panic!("Expected wrapped, not sequence."),
-        }
-
-        let js_seq: JsNormalizerWrapper = Sequence::new(vec![NFC.into(), NFKC.into()]).into();
-        let js_wrapper_ser = serde_json::to_string(&js_seq).unwrap();
-        let rs_wrapped = NormalizerWrapper::Sequence(Sequence::new(vec![NFC.into(), NFKC.into()]));
-        let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
-        assert_eq!(js_wrapper_ser, rs_ser);
-
-        let js_seq = Normalizer {
-            normalizer: Some(js_seq),
-        };
-        let js_ser = serde_json::to_string(&js_seq).unwrap();
-        assert_eq!(js_wrapper_ser, js_ser);
-
-        let rs_seq = Sequence::new(vec![NFC.into(), NFKC.into()]);
-        let rs_ser = serde_json::to_string(&rs_seq).unwrap();
-        assert_eq!(js_wrapper_ser, rs_ser);
-    }
-}
--- a/bindings/node/native/src/pre_tokenizers.rs
+++ b/bindings/node/native/src/pre_tokenizers.rs
@ -1,341 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::extraction::*;
-use neon::prelude::*;
-use std::sync::Arc;
-
-use serde::{ser::SerializeStruct, Serialize, Serializer};
-use tk::normalizer::SplitDelimiterBehavior;
-use tk::pre_tokenizers::PreTokenizerWrapper;
-use tk::PreTokenizedString;
-
-#[derive(Clone)]
-struct JsSplitDelimiterBehavior(SplitDelimiterBehavior);
-
-impl FromJsValue for JsSplitDelimiterBehavior {
-    fn from_value<'c, C: Context<'c>>(from: Handle<'c, JsValue>, _cx: &mut C) -> LibResult<Self> {
-        let s = from.downcast::<JsString>()?.value();
-
-        Ok(Self(match s.as_ref() {
-            "removed" => Ok(SplitDelimiterBehavior::Removed),
-            "isolated" => Ok(SplitDelimiterBehavior::Isolated),
-            "mergedWithPrevious" => Ok(SplitDelimiterBehavior::MergedWithPrevious),
-            "mergedWithNext" => Ok(SplitDelimiterBehavior::MergedWithNext),
-            "contiguous" => Ok(SplitDelimiterBehavior::Contiguous),
-            _ => Err(Error(
-                "Wrong value for SplitDelimiterBehavior, expected one of: \
-                 `removed, isolated, mergedWithPrevious, mergedWithNext, contiguous`"
-                    .into(),
-            )),
-        }?))
-    }
-}
-
-impl From<JsSplitDelimiterBehavior> for SplitDelimiterBehavior {
-    fn from(v: JsSplitDelimiterBehavior) -> Self {
-        v.0
-    }
-}
-
-#[derive(Clone, Debug, Deserialize)]
-#[serde(untagged)]
-pub enum JsPreTokenizerWrapper {
-    Sequence(Vec<Arc<PreTokenizerWrapper>>),
-    Wrapped(Arc<PreTokenizerWrapper>),
-}
-
-impl Serialize for JsPreTokenizerWrapper {
-    fn serialize<S>(&self, serializer: S) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
-    where
-        S: Serializer,
-    {
-        match self {
-            JsPreTokenizerWrapper::Sequence(seq) => {
-                let mut ser = serializer.serialize_struct("Sequence", 2)?;
-                ser.serialize_field("type", "Sequence")?;
-                ser.serialize_field("pretokenizers", seq)?;
-                ser.end()
-            }
-            JsPreTokenizerWrapper::Wrapped(inner) => inner.serialize(serializer),
-        }
-    }
-}
-
-impl<I> From<I> for JsPreTokenizerWrapper
-where
-    I: Into<PreTokenizerWrapper>,
-{
-    fn from(norm: I) -> Self {
-        JsPreTokenizerWrapper::Wrapped(Arc::new(norm.into()))
-    }
-}
-
-/// PreTokenizers
-#[derive(Clone, Serialize, Deserialize, Debug)]
-pub struct PreTokenizer {
-    #[serde(flatten)]
-    pub pretok: Option<JsPreTokenizerWrapper>,
-}
-
-impl tk::PreTokenizer for PreTokenizer {
-    fn pre_tokenize(&self, pretokenized: &mut PreTokenizedString) -> tk::Result<()> {
-        match self.pretok.as_ref().ok_or("Uninitialized PreTokenizer")? {
-            JsPreTokenizerWrapper::Sequence(seq) => {
-                for pretokenizer in seq {
-                    pretokenizer.pre_tokenize(pretokenized)?;
-                }
-            }
-            JsPreTokenizerWrapper::Wrapped(pretokenizer) => {
-                pretokenizer.pre_tokenize(pretokenized)?
-            }
-        };
-
-        Ok(())
-    }
-}
-
-declare_types! {
-    pub class JsPreTokenizer for PreTokenizer {
-        init(_) {
-            // This should not be called from JS
-            Ok(PreTokenizer { pretok: None })
-        }
-
-        method preTokenizeString(mut cx) {
-            use tk::PreTokenizer;
-
-            let sequence = cx.extract::<String>(0)?;
-            let mut pretokenized = PreTokenizedString::from(sequence);
-
-            let this = cx.this();
-            let guard = cx.lock();
-
-            this.borrow(&guard)
-                .pre_tokenize(&mut pretokenized)
-                .map_err(|e| Error(format!("{}", e)))?;
-
-            let splits = pretokenized
-                .get_splits(tk::OffsetReferential::Original, tk::OffsetType::Char)
-                .into_iter()
-                .map(|(s, o, _)| (s.to_owned(), o))
-                .collect::<Vec<_>>();
-
-            Ok(neon_serde::to_value(&mut cx, &splits)?.upcast())
-        }
-    }
-}
-
-/// byte_level(addPrefixSpace: bool = true, useRegex: bool = true)
-fn byte_level(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let mut byte_level = tk::pre_tokenizers::byte_level::ByteLevel::default();
-    if let Some(add_prefix_space) = cx.extract_opt::<bool>(0)? {
-        byte_level = byte_level.add_prefix_space(add_prefix_space);
-    }
-    if let Some(use_regex) = cx.extract_opt::<bool>(1)? {
-        byte_level = byte_level.use_regex(use_regex);
-    }
-
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok = Some(byte_level.into());
-    Ok(pretok)
-}
-
-/// byte_level_alphabet()
-fn byte_level_alphabet(mut cx: FunctionContext) -> JsResult<JsValue> {
-    let chars = tk::pre_tokenizers::byte_level::ByteLevel::alphabet()
-        .into_iter()
-        .map(|c| c.to_string())
-        .collect::<Vec<_>>();
-
-    Ok(neon_serde::to_value(&mut cx, &chars)?)
-}
-
-/// whitespace()
-fn whitespace(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok = Some(tk::pre_tokenizers::whitespace::Whitespace {}.into());
-    Ok(pretok)
-}
-
-/// whitespace_split()
-fn whitespace_split(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok = Some(tk::pre_tokenizers::whitespace::WhitespaceSplit.into());
-    Ok(pretok)
-}
-
-/// bert_pre_tokenizer()
-fn bert_pre_tokenizer(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok = Some(tk::pre_tokenizers::bert::BertPreTokenizer.into());
-    Ok(pretok)
-}
-
-/// metaspace(replacement: string = '_', addPrefixSpace: bool = true)
-fn metaspace(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let replacement = cx.extract_opt::<char>(0)?.unwrap_or('▁');
-    let add_prefix_space = cx.extract_opt::<bool>(1)?.unwrap_or(true);
-
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok =
-        Some(tk::pre_tokenizers::metaspace::Metaspace::new(replacement, add_prefix_space).into());
-    Ok(pretok)
-}
-
-/// split(invert: bool = false)
-fn split(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let pattern: String = cx.extract::<String>(0)?;
-    let behavior: JsSplitDelimiterBehavior = cx.extract::<JsSplitDelimiterBehavior>(1)?;
-    let invert: bool = cx.extract_opt::<bool>(2)?.unwrap_or(false);
-
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok = Some(
-        tk::pre_tokenizers::split::Split::new(pattern, behavior.into(), invert)
-            .map_err(|e| Error(e.to_string()))?
-            .into(),
-    );
-    Ok(pretok)
-}
-
-/// punctuation()
-fn punctuation(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let behavior: JsSplitDelimiterBehavior = cx
-        .extract_opt::<JsSplitDelimiterBehavior>(0)?
-        .unwrap_or(JsSplitDelimiterBehavior(SplitDelimiterBehavior::Isolated));
-
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok =
-        Some(tk::pre_tokenizers::punctuation::Punctuation::new(behavior.into()).into());
-
-    Ok(pretok)
-}
-
-/// sequence()
-fn sequence(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let pretokenizers = cx.argument::<JsArray>(0)?.to_vec(&mut cx)?;
-    let mut sequence = Vec::with_capacity(pretokenizers.len());
-
-    pretokenizers.into_iter().try_for_each(|pretokenizer| {
-        match pretokenizer.downcast::<JsPreTokenizer>().or_throw(&mut cx) {
-            Ok(pretokenizer) => {
-                let guard = cx.lock();
-                let pretok = pretokenizer.borrow(&guard).pretok.clone();
-                if let Some(pretokenizer) = pretok {
-                    match pretokenizer {
-                        JsPreTokenizerWrapper::Sequence(seq) => sequence.extend(seq),
-                        JsPreTokenizerWrapper::Wrapped(inner) => sequence.push(inner),
-                    }
-                    Ok(())
-                } else {
-                    cx.throw_error("Uninitialized PreTokenizer")
-                }
-            }
-            Err(e) => Err(e),
-        }
-    })?;
-
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok = Some(JsPreTokenizerWrapper::Sequence(sequence));
-    Ok(pretok)
-}
-
-/// char_delimiter_split(delimiter: string)
-fn char_delimiter_split(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let delimiter = cx.extract::<char>(0)?;
-
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok =
-        Some(tk::pre_tokenizers::delimiter::CharDelimiterSplit::new(delimiter).into());
-
-    Ok(pretok)
-}
-
-/// digits(individualDigits: bool)
-fn digits(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
-    let individual_digits = cx.extract_opt::<bool>(0)?.unwrap_or(false);
-
-    let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).pretok =
-        Some(tk::pre_tokenizers::digits::Digits::new(individual_digits).into());
-
-    Ok(pretok)
-}
-
-/// Register everything here
-pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
-    m.export_function(&format!("{}_ByteLevel", prefix), byte_level)?;
-    m.export_function(
-        &format!("{}_ByteLevel_Alphabet", prefix),
-        byte_level_alphabet,
-    )?;
-    m.export_function(&format!("{}_Whitespace", prefix), whitespace)?;
-    m.export_function(&format!("{}_WhitespaceSplit", prefix), whitespace_split)?;
-    m.export_function(&format!("{}_BertPreTokenizer", prefix), bert_pre_tokenizer)?;
-    m.export_function(&format!("{}_Metaspace", prefix), metaspace)?;
-    m.export_function(&format!("{}_Split", prefix), split)?;
-    m.export_function(
-        &format!("{}_CharDelimiterSplit", prefix),
-        char_delimiter_split,
-    )?;
-    m.export_function(&format!("{}_Punctuation", prefix), punctuation)?;
-    m.export_function(&format!("{}_Sequence", prefix), sequence)?;
-    m.export_function(&format!("{}_Digits", prefix), digits)?;
-    Ok(())
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use tk::pre_tokenizers::sequence::Sequence;
-    use tk::pre_tokenizers::whitespace::{Whitespace, WhitespaceSplit};
-    use tk::pre_tokenizers::PreTokenizerWrapper;
-
-    #[test]
-    fn serialize() {
-        let js_wrapped: JsPreTokenizerWrapper = Whitespace {}.into();
-        let js_ser = serde_json::to_string(&js_wrapped).unwrap();
-
-        let rs_wrapped = PreTokenizerWrapper::Whitespace(Whitespace {});
-        let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
-        assert_eq!(js_ser, rs_ser);
-
-        let js_pretok: PreTokenizer = serde_json::from_str(&rs_ser).unwrap();
-        match js_pretok.pretok.unwrap() {
-            JsPreTokenizerWrapper::Wrapped(pretok) => match pretok.as_ref() {
-                PreTokenizerWrapper::Whitespace(_) => {}
-                _ => panic!("Expected Whitespace"),
-            },
-            _ => panic!("Expected wrapped, not sequence."),
-        }
-
-        let js_seq: JsPreTokenizerWrapper =
-            Sequence::new(vec![WhitespaceSplit.into(), Whitespace {}.into()]).into();
-        let js_wrapper_ser = serde_json::to_string(&js_seq).unwrap();
-        let rs_wrapped = PreTokenizerWrapper::Sequence(Sequence::new(vec![
-            WhitespaceSplit.into(),
-            Whitespace {}.into(),
-        ]));
-        let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
-        assert_eq!(js_wrapper_ser, rs_ser);
-
-        let js_seq = PreTokenizer {
-            pretok: Some(js_seq),
-        };
-        let js_ser = serde_json::to_string(&js_seq).unwrap();
-        assert_eq!(js_wrapper_ser, js_ser);
-
-        let rs_seq = Sequence::new(vec![WhitespaceSplit.into(), Whitespace {}.into()]);
-        let rs_ser = serde_json::to_string(&rs_seq).unwrap();
-        assert_eq!(js_wrapper_ser, rs_ser);
-    }
-}
--- a/bindings/node/native/src/processors.rs
+++ b/bindings/node/native/src/processors.rs
@ -1,170 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::extraction::*;
-use neon::prelude::*;
-use std::sync::Arc;
-
-use tk::processors::PostProcessorWrapper;
-use tk::Encoding;
-
-/// Processor
-#[derive(Clone, Serialize, Deserialize)]
-pub struct Processor {
-    #[serde(flatten)]
-    pub processor: Option<Arc<PostProcessorWrapper>>,
-}
-
-impl tk::PostProcessor for Processor {
-    fn added_tokens(&self, is_pair: bool) -> usize {
-        self.processor
-            .as_ref()
-            .expect("Uninitialized PostProcessor")
-            .added_tokens(is_pair)
-    }
-
-    fn process_encodings(
-        &self,
-        encodings: Vec<Encoding>,
-        add_special_tokens: bool,
-    ) -> tk::Result<Vec<Encoding>> {
-        self.processor
-            .as_ref()
-            .ok_or("Uninitialized PostProcessor")?
-            .process_encodings(encodings, add_special_tokens)
-    }
-}
-
-declare_types! {
-    pub class JsPostProcessor for Processor {
-        init(_) {
-            // This should not be called from JS
-            Ok(Processor { processor: None })
-        }
-    }
-}
-
-/// bert_processing(sep: [String, number], cls: [String, number])
-fn bert_processing(mut cx: FunctionContext) -> JsResult<JsPostProcessor> {
-    let sep = cx.extract::<(String, u32)>(0)?;
-    let cls = cx.extract::<(String, u32)>(1)?;
-
-    let mut processor = JsPostProcessor::new::<_, JsPostProcessor, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    processor.borrow_mut(&guard).processor = Some(Arc::new(
-        tk::processors::bert::BertProcessing::new(sep, cls).into(),
-    ));
-    Ok(processor)
-}
-
-/// roberta_processing(
-///   sep: [String, number],
-///   cls: [String, number],
-///   trimOffsets: boolean = true,
-///   addPrefixSpace: boolean = true
-/// )
-fn roberta_processing(mut cx: FunctionContext) -> JsResult<JsPostProcessor> {
-    let sep = cx.extract::<(String, u32)>(0)?;
-    let cls = cx.extract::<(String, u32)>(1)?;
-
-    let mut processor = tk::processors::roberta::RobertaProcessing::new(sep, cls);
-    if let Some(trim_offsets) = cx.extract_opt::<bool>(2)? {
-        processor = processor.trim_offsets(trim_offsets);
-    }
-    if let Some(add_prefix_space) = cx.extract_opt::<bool>(3)? {
-        processor = processor.add_prefix_space(add_prefix_space);
-    }
-
-    let mut js_processor = JsPostProcessor::new::<_, JsPostProcessor, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_processor.borrow_mut(&guard).processor = Some(Arc::new(processor.into()));
-    Ok(js_processor)
-}
-
-/// bytelevel(trimOffsets?: boolean)
-fn bytelevel(mut cx: FunctionContext) -> JsResult<JsPostProcessor> {
-    let mut byte_level = tk::processors::byte_level::ByteLevel::default();
-
-    if let Some(trim_offsets) = cx.extract_opt::<bool>(0)? {
-        byte_level = byte_level.trim_offsets(trim_offsets);
-    }
-
-    let mut processor = JsPostProcessor::new::<_, JsPostProcessor, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    processor.borrow_mut(&guard).processor = Some(Arc::new(byte_level.into()));
-    Ok(processor)
-}
-
-/// template_processing(
-///   single: String,
-///   pair?:  String,
-///   special_tokens?: [String, number][] = [],
-/// )
-fn template_processing(mut cx: FunctionContext) -> JsResult<JsPostProcessor> {
-    let mut i = 1;
-    let special_tokens = loop {
-        if let Ok(Some(spe)) = cx.extract_opt::<Vec<(String, u32)>>(i) {
-            break spe;
-        }
-        i += 1;
-        if i == 3 {
-            break vec![];
-        }
-    };
-
-    let single = cx.extract::<String>(0)?;
-    let pair = cx.extract_opt::<String>(1)?;
-
-    let mut builder = tk::processors::template::TemplateProcessing::builder();
-    builder.try_single(single).map_err(Error)?;
-    builder.special_tokens(special_tokens);
-    if let Some(pair) = pair {
-        builder.try_pair(pair).map_err(Error)?;
-    }
-    let processor = builder.build().map_err(|e| Error(e.to_string()))?;
-
-    let mut js_processor = JsPostProcessor::new::<_, JsPostProcessor, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_processor.borrow_mut(&guard).processor = Some(Arc::new(processor.into()));
-
-    Ok(js_processor)
-}
-
-/// sequence(processors: List[Processor])
-fn sequence(mut cx: FunctionContext) -> JsResult<JsPostProcessor> {
-    let processors = cx.argument::<JsArray>(0)?.to_vec(&mut cx)?;
-    let mut sequence = Vec::with_capacity(processors.len());
-
-    processors.into_iter().try_for_each(|processor| {
-        match processor.downcast::<JsPostProcessor>().or_throw(&mut cx) {
-            Ok(processor) => {
-                let guard = cx.lock();
-                if let Some(processor_arc) = &processor.borrow(&guard).processor {
-                    let processor: PostProcessorWrapper = (**processor_arc).clone();
-                    sequence.push(processor);
-                }
-                Ok(())
-            }
-            Err(e) => Err(e),
-        }
-    })?;
-
-    let mut pretok = JsPostProcessor::new::<_, JsPostProcessor, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    pretok.borrow_mut(&guard).processor = Some(Arc::new(PostProcessorWrapper::Sequence(
-        tk::processors::sequence::Sequence::new(sequence),
-    )));
-    Ok(pretok)
-}
-
-/// Register everything here
-pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
-    m.export_function(&format!("{}_BertProcessing", prefix), bert_processing)?;
-    m.export_function(&format!("{}_RobertaProcessing", prefix), roberta_processing)?;
-    m.export_function(&format!("{}_ByteLevel", prefix), bytelevel)?;
-    m.export_function(
-        &format!("{}_TemplateProcessing", prefix),
-        template_processing,
-    )?;
-    m.export_function(&format!("{}_Sequence", prefix), sequence)?;
-    Ok(())
-}
--- a/bindings/node/native/src/tasks/models.rs
+++ b/bindings/node/native/src/tasks/models.rs
@ -1,107 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::models::*;
-use neon::prelude::*;
-use std::sync::{Arc, RwLock};
-use tk::models::bpe::{BpeBuilder, BPE};
-use tk::models::wordlevel::{WordLevel, WordLevelBuilder};
-use tk::models::wordpiece::{WordPiece, WordPieceBuilder};
-
-pub struct WordPieceFromFilesTask(Option<WordPieceBuilder>);
-impl WordPieceFromFilesTask {
-    pub fn new(builder: WordPieceBuilder) -> Self {
-        Self(Some(builder))
-    }
-}
-
-impl Task for WordPieceFromFilesTask {
-    type Output = WordPiece;
-    type Error = String;
-    type JsEvent = JsValue;
-
-    fn perform(&self) -> Result<Self::Output, Self::Error> {
-        let builder: Option<WordPieceBuilder> =
-            unsafe { std::ptr::replace(&self.0 as *const _ as *mut _, None) };
-        builder.unwrap().build().map_err(|e| format!("{}", e))
-    }
-
-    fn complete(
-        self,
-        mut cx: TaskContext,
-        result: Result<Self::Output, Self::Error>,
-    ) -> JsResult<Self::JsEvent> {
-        let wordpiece = result.map_err(|e| cx.throw_error::<_, ()>(e).unwrap_err())?;
-
-        let mut js_model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-        let guard = cx.lock();
-        js_model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(wordpiece.into())));
-
-        Ok(js_model.upcast())
-    }
-}
-
-pub struct WordLevelFromFilesTask(Option<WordLevelBuilder>);
-impl WordLevelFromFilesTask {
-    pub fn new(builder: WordLevelBuilder) -> Self {
-        Self(Some(builder))
-    }
-}
-
-impl Task for WordLevelFromFilesTask {
-    type Output = WordLevel;
-    type Error = String;
-    type JsEvent = JsValue;
-
-    fn perform(&self) -> Result<Self::Output, Self::Error> {
-        let builder: Option<WordLevelBuilder> =
-            unsafe { std::ptr::replace(&self.0 as *const _ as *mut _, None) };
-        builder.unwrap().build().map_err(|e| format!("{}", e))
-    }
-
-    fn complete(
-        self,
-        mut cx: TaskContext,
-        result: Result<Self::Output, Self::Error>,
-    ) -> JsResult<Self::JsEvent> {
-        let wordlevel = result.map_err(|e| cx.throw_error::<_, ()>(e).unwrap_err())?;
-
-        let mut js_model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-        let guard = cx.lock();
-        js_model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(wordlevel.into())));
-
-        Ok(js_model.upcast())
-    }
-}
-
-pub struct BPEFromFilesTask(Option<BpeBuilder>);
-impl BPEFromFilesTask {
-    pub fn new(builder: BpeBuilder) -> Self {
-        Self(Some(builder))
-    }
-}
-
-impl Task for BPEFromFilesTask {
-    type Output = BPE;
-    type Error = String;
-    type JsEvent = JsValue;
-
-    fn perform(&self) -> Result<Self::Output, Self::Error> {
-        let builder: Option<BpeBuilder> =
-            unsafe { std::ptr::replace(&self.0 as *const _ as *mut _, None) };
-        builder.unwrap().build().map_err(|e| format!("{}", e))
-    }
-
-    fn complete(
-        self,
-        mut cx: TaskContext,
-        result: Result<Self::Output, Self::Error>,
-    ) -> JsResult<Self::JsEvent> {
-        let bpe = result.map_err(|e| cx.throw_error::<_, ()>(e).unwrap_err())?;
-
-        let mut js_model = JsModel::new::<_, JsModel, _>(&mut cx, vec![])?;
-        let guard = cx.lock();
-        js_model.borrow_mut(&guard).model = Some(Arc::new(RwLock::new(bpe.into())));
-
-        Ok(js_model.upcast())
-    }
-}
--- a/bindings/node/native/src/tasks/tokenizer.rs
+++ b/bindings/node/native/src/tasks/tokenizer.rs
@ -1,142 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::encoding::*;
-use crate::tokenizer::Tokenizer;
-use neon::prelude::*;
-use tk::tokenizer::{EncodeInput, Encoding};
-
-pub enum EncodeTask<'s> {
-    Single(Tokenizer, Option<EncodeInput<'s>>, bool),
-    Batch(Tokenizer, Option<Vec<EncodeInput<'s>>>, bool),
-}
-
-pub enum EncodeOutput {
-    Single(Box<Encoding>),
-    Batch(Vec<Encoding>),
-}
-
-impl Task for EncodeTask<'static> {
-    type Output = EncodeOutput;
-    type Error = String;
-    type JsEvent = JsValue;
-
-    fn perform(&self) -> Result<Self::Output, Self::Error> {
-        match self {
-            EncodeTask::Single(worker, input, add_special_tokens) => {
-                let mut input: Option<EncodeInput> =
-                    unsafe { std::ptr::replace(input as *const _ as *mut _, None) };
-
-                worker
-                    .tokenizer
-                    .read()
-                    .unwrap()
-                    .encode_char_offsets(
-                        input.take().ok_or("No provided input")?,
-                        *add_special_tokens,
-                    )
-                    .map_err(|e| format!("{}", e))
-                    .map(|item| EncodeOutput::Single(Box::new(item)))
-            }
-            EncodeTask::Batch(worker, input, add_special_tokens) => {
-                let mut input: Option<Vec<EncodeInput>> =
-                    unsafe { std::ptr::replace(input as *const _ as *mut _, None) };
-
-                worker
-                    .tokenizer
-                    .read()
-                    .unwrap()
-                    .encode_batch_char_offsets(
-                        input.take().ok_or("No provided input")?,
-                        *add_special_tokens,
-                    )
-                    .map_err(|e| format!("{}", e))
-                    .map(EncodeOutput::Batch)
-            }
-        }
-    }
-
-    fn complete(
-        self,
-        mut cx: TaskContext,
-        result: Result<Self::Output, Self::Error>,
-    ) -> JsResult<Self::JsEvent> {
-        match result.map_err(|e| cx.throw_error::<_, ()>(e).unwrap_err())? {
-            EncodeOutput::Single(encoding) => {
-                let mut js_encoding = JsEncoding::new::<_, JsEncoding, _>(&mut cx, vec![])?;
-                // Set the actual encoding
-                let guard = cx.lock();
-                js_encoding.borrow_mut(&guard).encoding = Some(*encoding);
-                Ok(js_encoding.upcast())
-            }
-            EncodeOutput::Batch(encodings) => {
-                let result = JsArray::new(&mut cx, encodings.len() as u32);
-                for (i, encoding) in encodings.into_iter().enumerate() {
-                    let mut js_encoding = JsEncoding::new::<_, JsEncoding, _>(&mut cx, vec![])?;
-
-                    // Set the actual encoding
-                    let guard = cx.lock();
-                    js_encoding.borrow_mut(&guard).encoding = Some(encoding);
-
-                    result.set(&mut cx, i as u32, js_encoding)?;
-                }
-                Ok(result.upcast())
-            }
-        }
-    }
-}
-
-pub enum DecodeTask {
-    Single(Tokenizer, Vec<u32>, bool),
-    Batch(Tokenizer, Vec<Vec<u32>>, bool),
-}
-
-pub enum DecodeOutput {
-    Single(String),
-    Batch(Vec<String>),
-}
-
-impl Task for DecodeTask {
-    type Output = DecodeOutput;
-    type Error = String;
-    type JsEvent = JsValue;
-
-    fn perform(&self) -> Result<Self::Output, Self::Error> {
-        match self {
-            DecodeTask::Single(worker, ids, skip_special_tokens) => worker
-                .tokenizer
-                .read()
-                .unwrap()
-                .decode(ids.as_slice(), *skip_special_tokens)
-                .map_err(|e| format!("{}", e))
-                .map(DecodeOutput::Single),
-            DecodeTask::Batch(worker, ids, skip_special_tokens) => worker
-                .tokenizer
-                .read()
-                .unwrap()
-                .decode_batch(
-                    &ids.iter().map(|v| v.as_slice()).collect::<Vec<&[u32]>>(),
-                    *skip_special_tokens,
-                )
-                .map_err(|e| format!("{}", e))
-                .map(DecodeOutput::Batch),
-        }
-    }
-
-    fn complete(
-        self,
-        mut cx: TaskContext,
-        result: Result<Self::Output, Self::Error>,
-    ) -> JsResult<Self::JsEvent> {
-        match result.map_err(|e| cx.throw_error::<_, ()>(e).unwrap_err())? {
-            DecodeOutput::Single(string) => Ok(cx.string(string).upcast()),
-            DecodeOutput::Batch(strings) => {
-                let result = JsArray::new(&mut cx, strings.len() as u32);
-                for (i, string) in strings.into_iter().enumerate() {
-                    let js_string = cx.string(string);
-                    result.set(&mut cx, i as u32, js_string)?;
-                }
-                Ok(result.upcast())
-            }
-        }
-    }
-}
--- a/bindings/node/native/src/tokenizer.rs
+++ b/bindings/node/native/src/tokenizer.rs
--- a/bindings/node/native/src/trainers.rs
+++ b/bindings/node/native/src/trainers.rs
@ -1,444 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::extraction::*;
-use crate::models::Model;
-use crate::tokenizer::AddedToken;
-use neon::prelude::*;
-use std::sync::{Arc, RwLock};
-
-use tk::models::{
-    bpe::BpeTrainer, unigram::UnigramTrainer, wordlevel::WordLevelTrainer,
-    wordpiece::WordPieceTrainer, TrainerWrapper,
-};
-
-/// Trainer
-#[derive(Clone)]
-pub struct Trainer {
-    pub trainer: Option<Arc<RwLock<TrainerWrapper>>>,
-}
-
-impl From<TrainerWrapper> for Trainer {
-    fn from(trainer: TrainerWrapper) -> Self {
-        Self {
-            trainer: Some(Arc::new(RwLock::new(trainer))),
-        }
-    }
-}
-
-impl tk::Trainer for Trainer {
-    type Model = Model;
-
-    fn should_show_progress(&self) -> bool {
-        self.trainer
-            .as_ref()
-            .expect("Uninitialized Trainer")
-            .read()
-            .unwrap()
-            .should_show_progress()
-    }
-
-    fn train(&self, model: &mut Self::Model) -> tk::Result<Vec<tk::AddedToken>> {
-        let special_tokens = self
-            .trainer
-            .as_ref()
-            .ok_or("Uninitialized Trainer")?
-            .read()
-            .unwrap()
-            .train(
-                &mut model
-                    .model
-                    .as_ref()
-                    .ok_or("Uninitialized Model")?
-                    .write()
-                    .unwrap(),
-            )?;
-
-        Ok(special_tokens)
-    }
-
-    fn feed<I, S, F>(&mut self, iterator: I, process: F) -> tk::Result<()>
-    where
-        I: Iterator<Item = S> + Send,
-        S: AsRef<str> + Send,
-        F: Fn(&str) -> tk::Result<Vec<String>> + Sync,
-    {
-        self.trainer
-            .as_ref()
-            .ok_or("Uninitialized Trainer")?
-            .write()
-            .unwrap()
-            .feed(iterator, process)
-    }
-}
-
-declare_types! {
-    pub class JsTrainer for Trainer {
-        init(_) {
-            // This should not be called from JS
-            Ok(Trainer { trainer: None })
-        }
-    }
-}
-
-// BPE
-
-struct BpeTrainerOptions(BpeTrainer);
-impl From<BpeTrainerOptions> for BpeTrainer {
-    fn from(v: BpeTrainerOptions) -> Self {
-        v.0
-    }
-}
-impl FromJsValue for BpeTrainerOptions {
-    fn from_value<'c, C: Context<'c>>(from: Handle<'c, JsValue>, cx: &mut C) -> LibResult<Self> {
-        if let Ok(options) = from.downcast::<JsObject>() {
-            let mut builder = BpeTrainer::builder();
-
-            if let Ok(size) = options.get(cx, "vocabSize") {
-                if let Some(size) = Option::from_value(size, cx)? {
-                    builder = builder.vocab_size(size);
-                }
-            }
-            if let Ok(freq) = options.get(cx, "minFrequency") {
-                if let Some(freq) = Option::from_value(freq, cx)? {
-                    builder = builder.min_frequency(freq);
-                }
-            }
-            if let Ok(tokens) = options.get(cx, "specialTokens") {
-                if tokens.downcast::<JsNull>().is_err() && tokens.downcast::<JsUndefined>().is_err()
-                {
-                    builder = builder.special_tokens(
-                        tokens
-                            .downcast::<JsArray>()
-                            .map_err(|e| Error(format!("{}", e)))?
-                            .to_vec(cx)?
-                            .into_iter()
-                            .map(|token| Ok(AddedToken::from_value(token, cx)?.into()))
-                            .collect::<Result<Vec<_>, Error>>()?,
-                    );
-                }
-            }
-            if let Ok(limit) = options.get(cx, "limitAlphabet") {
-                if let Some(limit) = Option::from_value(limit, cx)? {
-                    builder = builder.limit_alphabet(limit);
-                }
-            }
-            if let Ok(alphabet) = options.get(cx, "initialAlphabet") {
-                if let Some(alphabet) = Option::from_value(alphabet, cx)? {
-                    builder = builder.initial_alphabet(alphabet);
-                }
-            }
-            if let Ok(show) = options.get(cx, "showProgress") {
-                if let Some(show) = Option::from_value(show, cx)? {
-                    builder = builder.show_progress(show);
-                }
-            }
-            if let Ok(prefix) = options.get(cx, "continuingSubwordPrefix") {
-                if let Some(prefix) = Option::from_value(prefix, cx)? {
-                    builder = builder.continuing_subword_prefix(prefix);
-                }
-            }
-            if let Ok(suffix) = options.get(cx, "endOfWordSuffix") {
-                if let Some(suffix) = Option::from_value(suffix, cx)? {
-                    builder = builder.end_of_word_suffix(suffix);
-                }
-            }
-
-            Ok(Self(builder.build()))
-        } else {
-            Err(Error("Expected options type: object".into()))
-        }
-    }
-}
-
-/// bpe_trainer(options?: {
-///   vocabSize?: number = 30000,
-///   minFrequency?: number = 2,
-///   specialTokens?: (string | AddedToken)[] = [],
-///   limitAlphabet?: number = undefined,
-///   initialAlphabet?: string[] = [],
-///   showProgress?: bool = true,
-///   continuingSubwordPrefix?: string = undefined,
-///   endOfWordSuffix?: string = undefined,
-/// })
-fn bpe_trainer(mut cx: FunctionContext) -> JsResult<JsTrainer> {
-    let trainer = cx
-        .extract_opt::<BpeTrainerOptions>(0)?
-        .map_or_else(|| BpeTrainer::builder().build(), |o| o.into());
-
-    let mut js_trainer = JsTrainer::new::<_, JsTrainer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_trainer.borrow_mut(&guard).trainer = Some(Arc::new(RwLock::new(trainer.into())));
-
-    Ok(js_trainer)
-}
-
-// WordPiece
-
-struct WordPieceTrainerOptions(WordPieceTrainer);
-impl From<WordPieceTrainerOptions> for WordPieceTrainer {
-    fn from(v: WordPieceTrainerOptions) -> Self {
-        v.0
-    }
-}
-impl FromJsValue for WordPieceTrainerOptions {
-    fn from_value<'c, C: Context<'c>>(from: Handle<'c, JsValue>, cx: &mut C) -> LibResult<Self> {
-        if let Ok(options) = from.downcast::<JsObject>() {
-            let mut builder = WordPieceTrainer::builder();
-
-            if let Ok(size) = options.get(cx, "vocabSize") {
-                if let Some(size) = Option::from_value(size, cx)? {
-                    builder = builder.vocab_size(size);
-                }
-            }
-            if let Ok(freq) = options.get(cx, "minFrequency") {
-                if let Some(freq) = Option::from_value(freq, cx)? {
-                    builder = builder.min_frequency(freq);
-                }
-            }
-            if let Ok(tokens) = options.get(cx, "specialTokens") {
-                if tokens.downcast::<JsNull>().is_err() && tokens.downcast::<JsUndefined>().is_err()
-                {
-                    builder = builder.special_tokens(
-                        tokens
-                            .downcast::<JsArray>()
-                            .map_err(|e| Error(format!("{}", e)))?
-                            .to_vec(cx)?
-                            .into_iter()
-                            .map(|token| Ok(AddedToken::from_value(token, cx)?.into()))
-                            .collect::<Result<Vec<_>, Error>>()?,
-                    );
-                }
-            }
-            if let Ok(limit) = options.get(cx, "limitAlphabet") {
-                if let Some(limit) = Option::from_value(limit, cx)? {
-                    builder = builder.limit_alphabet(limit);
-                }
-            }
-            if let Ok(alphabet) = options.get(cx, "initialAlphabet") {
-                if let Some(alphabet) = Option::from_value(alphabet, cx)? {
-                    builder = builder.initial_alphabet(alphabet);
-                }
-            }
-            if let Ok(show) = options.get(cx, "showProgress") {
-                if let Some(show) = Option::from_value(show, cx)? {
-                    builder = builder.show_progress(show);
-                }
-            }
-            if let Ok(prefix) = options.get(cx, "continuingSubwordPrefix") {
-                if let Some(prefix) = Option::from_value(prefix, cx)? {
-                    builder = builder.continuing_subword_prefix(prefix);
-                }
-            }
-            if let Ok(suffix) = options.get(cx, "endOfWordSuffix") {
-                if let Some(suffix) = Option::from_value(suffix, cx)? {
-                    builder = builder.end_of_word_suffix(suffix);
-                }
-            }
-
-            Ok(Self(builder.build()))
-        } else {
-            Err(Error("Expected options type: object".into()))
-        }
-    }
-}
-
-/// wordpiece_trainer(options?: {
-///   vocabSize?: number = 30000,
-///   minFrequency?: number = 2,
-///   specialTokens?: string[] = [],
-///   limitAlphabet?: number = undefined,
-///   initialAlphabet?: string[] = [],
-///   showProgress?: bool = true,
-///   continuingSubwordPrefix?: string = undefined,
-///   endOfWordSuffix?: string = undefined,
-/// })
-fn wordpiece_trainer(mut cx: FunctionContext) -> JsResult<JsTrainer> {
-    let trainer = cx
-        .extract_opt::<WordPieceTrainerOptions>(0)?
-        .map_or_else(|| WordPieceTrainer::builder().build(), |o| o.into());
-
-    let mut js_trainer = JsTrainer::new::<_, JsTrainer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_trainer.borrow_mut(&guard).trainer = Some(Arc::new(RwLock::new(trainer.into())));
-
-    Ok(js_trainer)
-}
-
-// WordLevel
-
-struct WordLevelTrainerOptions(WordLevelTrainer);
-impl From<WordLevelTrainerOptions> for WordLevelTrainer {
-    fn from(v: WordLevelTrainerOptions) -> Self {
-        v.0
-    }
-}
-impl FromJsValue for WordLevelTrainerOptions {
-    fn from_value<'c, C: Context<'c>>(from: Handle<'c, JsValue>, cx: &mut C) -> LibResult<Self> {
-        if let Ok(options) = from.downcast::<JsObject>() {
-            let mut builder = WordLevelTrainer::builder();
-
-            if let Ok(size) = options.get(cx, "vocabSize") {
-                if let Some(size) = Option::from_value(size, cx)? {
-                    builder.vocab_size(size);
-                }
-            }
-            if let Ok(freq) = options.get(cx, "minFrequency") {
-                if let Some(freq) = Option::from_value(freq, cx)? {
-                    builder.min_frequency(freq);
-                }
-            }
-            if let Ok(tokens) = options.get(cx, "specialTokens") {
-                if tokens.downcast::<JsNull>().is_err() && tokens.downcast::<JsUndefined>().is_err()
-                {
-                    builder.special_tokens(
-                        tokens
-                            .downcast::<JsArray>()
-                            .map_err(|e| Error(format!("{}", e)))?
-                            .to_vec(cx)?
-                            .into_iter()
-                            .map(|token| Ok(AddedToken::from_value(token, cx)?.into()))
-                            .collect::<Result<Vec<_>, Error>>()?,
-                    );
-                }
-            }
-            if let Ok(show) = options.get(cx, "showProgress") {
-                if let Some(show) = Option::from_value(show, cx)? {
-                    builder.show_progress(show);
-                }
-            }
-
-            Ok(Self(
-                builder
-                    .build()
-                    .expect("WordLevelTrainerBuilder cannot fail"),
-            ))
-        } else {
-            Err(Error("Expected options type: object".into()))
-        }
-    }
-}
-
-/// wordlevel_trainer(options?: {
-///   vocabSize?: number = 30000,
-///   minFrequency?: number = 0,
-///   specialTokens?: string[] = [],
-///   showProgress?: bool = true,
-/// })
-fn wordlevel_trainer(mut cx: FunctionContext) -> JsResult<JsTrainer> {
-    let trainer = cx.extract_opt::<WordLevelTrainerOptions>(0)?.map_or_else(
-        || WordLevelTrainer::builder().build().unwrap(),
-        |o| o.into(),
-    );
-
-    let mut js_trainer = JsTrainer::new::<_, JsTrainer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_trainer.borrow_mut(&guard).trainer = Some(Arc::new(RwLock::new(trainer.into())));
-
-    Ok(js_trainer)
-}
-
-// Unigram
-
-struct UnigramTrainerOptions(UnigramTrainer);
-impl From<UnigramTrainerOptions> for UnigramTrainer {
-    fn from(v: UnigramTrainerOptions) -> Self {
-        v.0
-    }
-}
-impl FromJsValue for UnigramTrainerOptions {
-    fn from_value<'c, C: Context<'c>>(from: Handle<'c, JsValue>, cx: &mut C) -> LibResult<Self> {
-        if let Ok(options) = from.downcast::<JsObject>() {
-            let mut builder = UnigramTrainer::builder();
-
-            if let Ok(size) = options.get(cx, "vocabSize") {
-                if let Some(size) = Option::from_value(size, cx)? {
-                    builder.vocab_size(size);
-                }
-            }
-            if let Ok(nsub) = options.get(cx, "nSubIterations") {
-                if let Some(nsub) = Option::from_value(nsub, cx)? {
-                    builder.n_sub_iterations(nsub);
-                }
-            }
-            if let Ok(factor) = options.get(cx, "shrinkingFactor") {
-                if let Some(factor) = Option::from_value(factor, cx)? {
-                    builder.shrinking_factor(factor);
-                }
-            }
-            if let Ok(tokens) = options.get(cx, "specialTokens") {
-                if tokens.downcast::<JsNull>().is_err() && tokens.downcast::<JsUndefined>().is_err()
-                {
-                    builder.special_tokens(
-                        tokens
-                            .downcast::<JsArray>()
-                            .map_err(|e| Error(format!("{}", e)))?
-                            .to_vec(cx)?
-                            .into_iter()
-                            .map(|token| Ok(AddedToken::from_value(token, cx)?.into()))
-                            .collect::<Result<Vec<_>, Error>>()?,
-                    );
-                }
-            }
-            if let Ok(alphabet) = options.get(cx, "initialAlphabet") {
-                if let Some(alphabet) = Option::from_value(alphabet, cx)? {
-                    builder.initial_alphabet(alphabet);
-                }
-            }
-            if let Ok(unk) = options.get(cx, "unkToken") {
-                let unk = Option::from_value(unk, cx)?;
-                builder.unk_token(unk);
-            }
-            if let Ok(max) = options.get(cx, "maxPieceLength") {
-                if let Some(max) = Option::from_value(max, cx)? {
-                    builder.max_piece_length(max);
-                }
-            }
-            if let Ok(size) = options.get(cx, "seedSize") {
-                if let Some(size) = Option::from_value(size, cx)? {
-                    builder.seed_size(size);
-                }
-            }
-            if let Ok(show) = options.get(cx, "showProgress") {
-                if let Some(show) = Option::from_value(show, cx)? {
-                    builder.show_progress(show);
-                }
-            }
-
-            Ok(Self(builder.build()?))
-        } else {
-            Err(Error("Expected options type: object".into()))
-        }
-    }
-}
-
-/// unigram_trainer(options?: {
-///  vocabSize?: number = 8000,
-///  nSubIterations?: number = 2,
-///  shrinkingFactor?: number = 0.75,
-///  specialTokens?: string[] = [],
-///  initialAlphabet?: string[] = [],
-///  unkToken?: string = undefined,
-///  maxPieceLength?: number = 16,
-///  seedSize?: number = 1000000,
-///  showProgress?: boolean = true,
-/// })
-fn unigram_trainer(mut cx: FunctionContext) -> JsResult<JsTrainer> {
-    let trainer = cx
-        .extract_opt::<UnigramTrainerOptions>(0)?
-        .map_or_else(|| UnigramTrainer::builder().build().unwrap(), |o| o.into());
-
-    let mut js_trainer = JsTrainer::new::<_, JsTrainer, _>(&mut cx, vec![])?;
-    let guard = cx.lock();
-    js_trainer.borrow_mut(&guard).trainer = Some(Arc::new(RwLock::new(trainer.into())));
-
-    Ok(js_trainer)
-}
-
-/// Register everything here
-pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
-    m.export_function(&format!("{}_BPETrainer", prefix), bpe_trainer)?;
-    m.export_function(&format!("{}_WordPieceTrainer", prefix), wordpiece_trainer)?;
-    m.export_function(&format!("{}_WordLevelTrainer", prefix), wordlevel_trainer)?;
-    m.export_function(&format!("{}_UnigramTrainer", prefix), unigram_trainer)?;
-    Ok(())
-}
--- a/bindings/node/native/src/utils.rs
+++ b/bindings/node/native/src/utils.rs
@ -1,54 +0,0 @@
-extern crate tokenizers as tk;
-
-use crate::encoding::JsEncoding;
-use crate::extraction::*;
-use crate::tokenizer::Encoding;
-use neon::prelude::*;
-
-/// slice(s: string, start?: number, end?: number)
-fn slice(mut cx: FunctionContext) -> JsResult<JsString> {
-    let s = cx.extract::<String>(0)?;
-    let len = s.chars().count();
-
-    let get_index = |x: i32| -> usize {
-        if x >= 0 {
-            x as usize
-        } else {
-            (len as i32 + x) as usize
-        }
-    };
-
-    let begin_index = get_index(cx.extract_opt::<i32>(1)?.unwrap_or(0));
-    let end_index = get_index(cx.extract_opt::<i32>(2)?.unwrap_or(len as i32));
-
-    if let Some(slice) = tk::tokenizer::normalizer::get_range_of(&s, begin_index..end_index) {
-        Ok(cx.string(slice))
-    } else {
-        cx.throw_error("Error in offsets")
-    }
-}
-
-/// merge_encodings(encodings: Encoding[], growing_offsets: boolean = false): Encoding
-fn merge_encodings(mut cx: FunctionContext) -> JsResult<JsEncoding> {
-    let encodings: Vec<tk::Encoding> = cx
-        .extract_vec::<Encoding>(0)?
-        .into_iter()
-        .map(|e| e.into())
-        .collect();
-    let growing_offsets = cx.extract_opt::<bool>(1)?.unwrap_or(false);
-
-    let new_encoding = tk::tokenizer::Encoding::merge(encodings, growing_offsets);
-    let mut js_encoding = JsEncoding::new::<_, JsEncoding, _>(&mut cx, vec![])?;
-
-    let guard = cx.lock();
-    js_encoding.borrow_mut(&guard).encoding = Some(new_encoding);
-
-    Ok(js_encoding)
-}
-
-/// Register everything here
-pub fn register(m: &mut ModuleContext, prefix: &str) -> NeonResult<()> {
-    m.export_function(&format!("{}_slice", prefix), slice)?;
-    m.export_function(&format!("{}_mergeEncodings", prefix), merge_encodings)?;
-    Ok(())
-}
--- a/bindings/node/npm/android-arm-eabi/README.md
+++ b/bindings/node/npm/android-arm-eabi/README.md
@ -0,0 +1,3 @@
+# `tokenizers-android-arm-eabi`
+
+This is the **armv7-linux-androideabi** binary for `tokenizers`
--- a/bindings/node/npm/android-arm-eabi/package.json
+++ b/bindings/node/npm/android-arm-eabi/package.json
@ -0,0 +1,32 @@
+{
+  "name": "tokenizers-android-arm-eabi",
+  "version": "0.13.4-rc1",
+  "os": [
+    "android"
+  ],
+  "cpu": [
+    "arm"
+  ],
+  "main": "tokenizers.android-arm-eabi.node",
+  "files": [
+    "tokenizers.android-arm-eabi.node"
+  ],
+  "description": "Tokenizers platform specific bindings",
+  "keywords": [
+    "napi-rs",
+    "NAPI",
+    "N-API",
+    "Rust",
+    "node-addon",
+    "node-addon-api"
+  ],
+  "license": "MIT",
+  "engines": {
+    "node": ">= 10"
+  },
+  "publishConfig": {
+    "registry": "https://registry.npmjs.org/",
+    "access": "public"
+  },
+  "repository": "tokenizers"
+}
--- a/bindings/node/npm/android-arm64/README.md
+++ b/bindings/node/npm/android-arm64/README.md
@ -0,0 +1,3 @@
+# `tokenizers-android-arm64`
+
+This is the **aarch64-linux-android** binary for `tokenizers`
--- a/bindings/node/npm/android-arm64/package.json
+++ b/bindings/node/npm/android-arm64/package.json
@ -0,0 +1,32 @@
+{
+  "name": "tokenizers-android-arm64",
+  "version": "0.13.4-rc1",
+  "os": [
+    "android"
+  ],
+  "cpu": [
+    "arm64"
+  ],
+  "main": "tokenizers.android-arm64.node",
+  "files": [
+    "tokenizers.android-arm64.node"
+  ],
+  "description": "Tokenizers platform specific bindings",
+  "keywords": [
+    "napi-rs",
+    "NAPI",
+    "N-API",
+    "Rust",
+    "node-addon",
+    "node-addon-api"
+  ],
+  "license": "MIT",
+  "engines": {
+    "node": ">= 10"
+  },
+  "publishConfig": {
+    "registry": "https://registry.npmjs.org/",
+    "access": "public"
+  },
+  "repository": "tokenizers"
+}
--- a/bindings/node/npm/darwin-arm64/README.md
+++ b/bindings/node/npm/darwin-arm64/README.md
@ -0,0 +1,3 @@
+# `tokenizers-darwin-arm64`
+
+This is the **aarch64-apple-darwin** binary for `tokenizers`
--- a/bindings/node/npm/darwin-arm64/package.json
+++ b/bindings/node/npm/darwin-arm64/package.json
@ -0,0 +1,32 @@
+{
+  "name": "tokenizers-darwin-arm64",
+  "version": "0.13.4-rc1",
+  "os": [
+    "darwin"
+  ],
+  "cpu": [
+    "arm64"
+  ],
+  "main": "tokenizers.darwin-arm64.node",
+  "files": [
+    "tokenizers.darwin-arm64.node"
+  ],
+  "description": "Tokenizers platform specific bindings",
+  "keywords": [
+    "napi-rs",
+    "NAPI",
+    "N-API",
+    "Rust",
+    "node-addon",
+    "node-addon-api"
+  ],
+  "license": "MIT",
+  "engines": {
+    "node": ">= 10"
+  },
+  "publishConfig": {
+    "registry": "https://registry.npmjs.org/",
+    "access": "public"
+  },
+  "repository": "tokenizers"
+}
--- a/bindings/node/npm/darwin-x64/README.md
+++ b/bindings/node/npm/darwin-x64/README.md
@ -0,0 +1,3 @@
+# `tokenizers-darwin-x64`
+
+This is the **x86_64-apple-darwin** binary for `tokenizers`
--- a/bindings/node/npm/darwin-x64/package.json
+++ b/bindings/node/npm/darwin-x64/package.json
@ -0,0 +1,32 @@
+{
+  "name": "tokenizers-darwin-x64",
+  "version": "0.13.4-rc1",
+  "os": [
+    "darwin"
+  ],
+  "cpu": [
+    "x64"
+  ],
+  "main": "tokenizers.darwin-x64.node",
+  "files": [
+    "tokenizers.darwin-x64.node"
+  ],
+  "description": "Tokenizers platform specific bindings",
+  "keywords": [
+    "napi-rs",
+    "NAPI",
+    "N-API",
+    "Rust",
+    "node-addon",
+    "node-addon-api"
+  ],
+  "license": "MIT",
+  "engines": {
+    "node": ">= 10"
+  },
+  "publishConfig": {
+    "registry": "https://registry.npmjs.org/",
+    "access": "public"
+  },
+  "repository": "tokenizers"
+}
--- a/bindings/node/npm/freebsd-x64/README.md
+++ b/bindings/node/npm/freebsd-x64/README.md
@ -0,0 +1,3 @@
+# `tokenizers-freebsd-x64`
+
+This is the **x86_64-unknown-freebsd** binary for `tokenizers`
--- a/bindings/node/npm/freebsd-x64/package.json
+++ b/bindings/node/npm/freebsd-x64/package.json
@ -0,0 +1,32 @@
+{
+  "name": "tokenizers-freebsd-x64",
+  "version": "0.13.4-rc1",
+  "os": [
+    "freebsd"
+  ],
+  "cpu": [
+    "x64"
+  ],
+  "main": "tokenizers.freebsd-x64.node",
+  "files": [
+    "tokenizers.freebsd-x64.node"
+  ],
+  "description": "Tokenizers platform specific bindings",
+  "keywords": [
+    "napi-rs",
+    "NAPI",
+    "N-API",
+    "Rust",
+    "node-addon",
+    "node-addon-api"
+  ],
+  "license": "MIT",
+  "engines": {
+    "node": ">= 10"
+  },
+  "publishConfig": {
+    "registry": "https://registry.npmjs.org/",
+    "access": "public"
+  },
+  "repository": "tokenizers"
+}
--- a/Show More
+++ b/Show More