Fix documentation build (#1642)

* use v4 * fix ruff * style
2025-12-03 03:08:21 +00:00 · 2024-10-01 14:48:02 +02:00
parent 294ab86fe0
commit 3d51a1695f
2 changed files with 13 additions and 10 deletions
--- a/.github/workflows/docs-check.yml
+++ b/.github/workflows/docs-check.yml
@@ -35,7 +35,7 @@ jobs:
        run: make clean && make html_all O="-W --keep-going"

      - name: Upload built doc
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
        with:
          name: documentation
          path: ./docs/build/*
--- a/bindings/python/examples/using_the_visualizer.ipynb
+++ b/bindings/python/examples/using_the_visualizer.ipynb
@@ -35,7 +35,7 @@
   "outputs": [],
   "source": [
    "from tokenizers import BertWordPieceTokenizer\n",
-    "from tokenizers.tools import EncodingVisualizer\n"
+    "from tokenizers.tools import EncodingVisualizer"
   ]
  },
  {
@@ -305,7 +305,7 @@
    "anno2 = Annotation(start=2, end=4, label=\"bar\")\n",
    "anno3 = Annotation(start=6, end=8, label=\"poo\")\n",
    "anno4 = Annotation(start=9, end=12, label=\"shoe\")\n",
-    "annotations=[\n",
+    "annotations = [\n",
    "    anno1,\n",
    "    anno2,\n",
    "    anno3,\n",
@@ -315,8 +315,7 @@
    "    Annotation(start=80, end=95, label=\"bar\"),\n",
    "    Annotation(start=120, end=128, label=\"bar\"),\n",
    "    Annotation(start=152, end=155, label=\"poo\"),\n",
-    "]\n",
-    "\n"
+    "]"
   ]
  },
  {
@@ -521,7 +520,7 @@
    }
   ],
   "source": [
-    "visualizer(text,annotations=annotations)"
+    "visualizer(text, annotations=annotations)"
   ]
  },
  {
@@ -553,7 +552,7 @@
    }
   ],
   "source": [
-    "funnyAnnotations = [dict(startPlace=i,endPlace=i+3,theTag=str(i)) for i in range(0,20,4)]\n",
+    "funnyAnnotations = [dict(startPlace=i, endPlace=i + 3, theTag=str(i)) for i in range(0, 20, 4)]\n",
    "funnyAnnotations"
   ]
  },
@@ -563,7 +562,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "converter = lambda funny: Annotation(start=funny['startPlace'], end=funny['endPlace'], label=funny['theTag'])\n",
+    "def converter(funny):\n",
+    "    return Annotation(start=funny[\"startPlace\"], end=funny[\"endPlace\"], label=funny[\"theTag\"])\n",
+    "\n",
+    "\n",
    "visualizer = EncodingVisualizer(tokenizer=tokenizer, default_to_notebook=True, annotation_converter=converter)"
   ]
  },
@@ -817,7 +819,7 @@
   ],
   "source": [
    "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json\" -O /tmp/roberta-base-vocab.json\n",
-    "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt\n"
+    "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt"
   ]
  },
  {
@@ -1023,7 +1025,8 @@
   ],
   "source": [
    "from tokenizers import ByteLevelBPETokenizer\n",
-    "roberta_tokenizer = ByteLevelBPETokenizer.from_file('/tmp/roberta-base-vocab.json', '/tmp/roberta-base-merges.txt')\n",
+    "\n",
+    "roberta_tokenizer = ByteLevelBPETokenizer.from_file(\"/tmp/roberta-base-vocab.json\", \"/tmp/roberta-base-merges.txt\")\n",
    "roberta_visualizer = EncodingVisualizer(tokenizer=roberta_tokenizer, default_to_notebook=True)\n",
    "roberta_visualizer(text, annotations=annotations)"
   ]