From 3d51a1695f33a498430059ea5d149d1bc94d5fa0 Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:48:02 +0200 Subject: [PATCH] Fix documentation build (#1642) * use v4 * fix ruff * style --- .github/workflows/docs-check.yml | 2 +- .../examples/using_the_visualizer.ipynb | 21 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml index d828294eb..6b1a830f8 100644 --- a/.github/workflows/docs-check.yml +++ b/.github/workflows/docs-check.yml @@ -35,7 +35,7 @@ jobs: run: make clean && make html_all O="-W --keep-going" - name: Upload built doc - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: documentation path: ./docs/build/* diff --git a/bindings/python/examples/using_the_visualizer.ipynb b/bindings/python/examples/using_the_visualizer.ipynb index 2840d2e07..61d6fb845 100644 --- a/bindings/python/examples/using_the_visualizer.ipynb +++ b/bindings/python/examples/using_the_visualizer.ipynb @@ -35,7 +35,7 @@ "outputs": [], "source": [ "from tokenizers import BertWordPieceTokenizer\n", - "from tokenizers.tools import EncodingVisualizer\n" + "from tokenizers.tools import EncodingVisualizer" ] }, { @@ -305,7 +305,7 @@ "anno2 = Annotation(start=2, end=4, label=\"bar\")\n", "anno3 = Annotation(start=6, end=8, label=\"poo\")\n", "anno4 = Annotation(start=9, end=12, label=\"shoe\")\n", - "annotations=[\n", + "annotations = [\n", " anno1,\n", " anno2,\n", " anno3,\n", @@ -315,8 +315,7 @@ " Annotation(start=80, end=95, label=\"bar\"),\n", " Annotation(start=120, end=128, label=\"bar\"),\n", " Annotation(start=152, end=155, label=\"poo\"),\n", - "]\n", - "\n" + "]" ] }, { @@ -521,7 +520,7 @@ } ], "source": [ - "visualizer(text,annotations=annotations)" + "visualizer(text, annotations=annotations)" ] }, { @@ -553,7 +552,7 @@ } ], "source": [ - "funnyAnnotations = [dict(startPlace=i,endPlace=i+3,theTag=str(i)) for i in range(0,20,4)]\n", + "funnyAnnotations = [dict(startPlace=i, endPlace=i + 3, theTag=str(i)) for i in range(0, 20, 4)]\n", "funnyAnnotations" ] }, @@ -563,7 +562,10 @@ "metadata": {}, "outputs": [], "source": [ - "converter = lambda funny: Annotation(start=funny['startPlace'], end=funny['endPlace'], label=funny['theTag'])\n", + "def converter(funny):\n", + " return Annotation(start=funny[\"startPlace\"], end=funny[\"endPlace\"], label=funny[\"theTag\"])\n", + "\n", + "\n", "visualizer = EncodingVisualizer(tokenizer=tokenizer, default_to_notebook=True, annotation_converter=converter)" ] }, @@ -817,7 +819,7 @@ ], "source": [ "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json\" -O /tmp/roberta-base-vocab.json\n", - "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt\n" + "!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt" ] }, { @@ -1023,7 +1025,8 @@ ], "source": [ "from tokenizers import ByteLevelBPETokenizer\n", - "roberta_tokenizer = ByteLevelBPETokenizer.from_file('/tmp/roberta-base-vocab.json', '/tmp/roberta-base-merges.txt')\n", + "\n", + "roberta_tokenizer = ByteLevelBPETokenizer.from_file(\"/tmp/roberta-base-vocab.json\", \"/tmp/roberta-base-merges.txt\")\n", "roberta_visualizer = EncodingVisualizer(tokenizer=roberta_tokenizer, default_to_notebook=True)\n", "roberta_visualizer(text, annotations=annotations)" ]