Skip to content

Commit

Permalink
style
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Oct 1, 2024
1 parent a5973ce commit 598620c
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions bindings/python/examples/using_the_visualizer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"outputs": [],
"source": [
"from tokenizers import BertWordPieceTokenizer\n",
"from tokenizers.tools import EncodingVisualizer\n"
"from tokenizers.tools import EncodingVisualizer"
]
},
{
Expand Down Expand Up @@ -305,7 +305,7 @@
"anno2 = Annotation(start=2, end=4, label=\"bar\")\n",
"anno3 = Annotation(start=6, end=8, label=\"poo\")\n",
"anno4 = Annotation(start=9, end=12, label=\"shoe\")\n",
"annotations=[\n",
"annotations = [\n",
" anno1,\n",
" anno2,\n",
" anno3,\n",
Expand All @@ -315,8 +315,7 @@
" Annotation(start=80, end=95, label=\"bar\"),\n",
" Annotation(start=120, end=128, label=\"bar\"),\n",
" Annotation(start=152, end=155, label=\"poo\"),\n",
"]\n",
"\n"
"]"
]
},
{
Expand Down Expand Up @@ -521,7 +520,7 @@
}
],
"source": [
"visualizer(text,annotations=annotations)"
"visualizer(text, annotations=annotations)"
]
},
{
Expand Down Expand Up @@ -553,7 +552,7 @@
}
],
"source": [
"funnyAnnotations = [dict(startPlace=i,endPlace=i+3,theTag=str(i)) for i in range(0,20,4)]\n",
"funnyAnnotations = [dict(startPlace=i, endPlace=i + 3, theTag=str(i)) for i in range(0, 20, 4)]\n",
"funnyAnnotations"
]
},
Expand All @@ -565,6 +564,8 @@
"source": [
"def converter(funny):\n",
" return Annotation(start=funny[\"startPlace\"], end=funny[\"endPlace\"], label=funny[\"theTag\"])\n",
"\n",
"\n",
"visualizer = EncodingVisualizer(tokenizer=tokenizer, default_to_notebook=True, annotation_converter=converter)"
]
},
Expand Down Expand Up @@ -818,7 +819,7 @@
],
"source": [
"!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json\" -O /tmp/roberta-base-vocab.json\n",
"!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt\n"
"!wget \"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt\" -O /tmp/roberta-base-merges.txt"
]
},
{
Expand Down Expand Up @@ -1024,7 +1025,8 @@
],
"source": [
"from tokenizers import ByteLevelBPETokenizer\n",
"roberta_tokenizer = ByteLevelBPETokenizer.from_file('/tmp/roberta-base-vocab.json', '/tmp/roberta-base-merges.txt')\n",
"\n",
"roberta_tokenizer = ByteLevelBPETokenizer.from_file(\"/tmp/roberta-base-vocab.json\", \"/tmp/roberta-base-merges.txt\")\n",
"roberta_visualizer = EncodingVisualizer(tokenizer=roberta_tokenizer, default_to_notebook=True)\n",
"roberta_visualizer(text, annotations=annotations)"
]
Expand Down

0 comments on commit 598620c

Please sign in to comment.