diff --git a/notebooks/spelling/Spellchecking.ipynb b/notebooks/spelling/Spellchecking.ipynb index b452354..af0d027 100644 --- a/notebooks/spelling/Spellchecking.ipynb +++ b/notebooks/spelling/Spellchecking.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -79,7 +79,7 @@ "915" ] }, - "execution_count": 4, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -110,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -133,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -166,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -184,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -208,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -226,7 +226,7 @@ " (('это', 'ето'), 9)]" ] }, - "execution_count": 10, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -251,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -267,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -277,7 +277,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -287,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -317,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -366,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -384,7 +384,7 @@ " ('не', 23228)]" ] }, - "execution_count": 22, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -402,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -435,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -479,15 +479,15 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 582 µs, sys: 58 µs, total: 640 µs\n", - "Wall time: 908 µs\n" + "CPU times: user 211 µs, sys: 740 µs, total: 951 µs\n", + "Wall time: 955 µs\n" ] }, { @@ -496,7 +496,7 @@ "'солнце'" ] }, - "execution_count": 35, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -508,15 +508,15 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 820 µs, sys: 43 µs, total: 863 µs\n", - "Wall time: 1.06 ms\n" + "CPU times: user 152 µs, sys: 53 µs, total: 205 µs\n", + "Wall time: 208 µs\n" ] }, { @@ -525,7 +525,7 @@ "'чаще'" ] }, - "execution_count": 32, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -537,15 +537,15 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 648 µs, sys: 1 µs, total: 649 µs\n", - "Wall time: 655 µs\n" + "CPU times: user 205 µs, sys: 29 µs, total: 234 µs\n", + "Wall time: 234 µs\n" ] }, { @@ -554,7 +554,7 @@ "'апофеоз'" ] }, - "execution_count": 33, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -583,13 +583,13 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ebbb425047ff455997758fbf2cc686e8", + "model_id": "913b76306c7e484f9d99e764d2caddc4", "version_major": 2, "version_minor": 0 }, @@ -645,15 +645,15 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.8708354177088544\n", - "0.5116459627329193\n", + "0.870935467733867\n", + "0.5124223602484472\n", "0.07603077983231882\n" ] } @@ -673,15 +673,15 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 492 µs, sys: 32 µs, total: 524 µs\n", - "Wall time: 716 µs\n" + "CPU times: user 237 µs, sys: 1.41 ms, total: 1.65 ms\n", + "Wall time: 1.72 ms\n" ] }, { @@ -690,7 +690,7 @@ "'солнце'" ] }, - "execution_count": 36, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -702,15 +702,15 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.49 s, sys: 4.36 ms, total: 1.5 s\n", - "Wall time: 1.52 s\n" + "CPU times: user 1.29 s, sys: 10.5 ms, total: 1.3 s\n", + "Wall time: 1.49 s\n" ] }, { @@ -719,7 +719,7 @@ "'насмехатьсяаававттававаываываы'" ] }, - "execution_count": 37, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -738,7 +738,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -756,7 +756,7 @@ " ('это', 'ето', 'что')]" ] }, - "execution_count": 41, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -800,7 +800,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -808,9 +808,12 @@ "output_type": "stream", "text": [ "Collecting textdistance\n", - " Downloading textdistance-4.5.0-py3-none-any.whl (31 kB)\n", + " Downloading textdistance-4.6.3-py3-none-any.whl (31 kB)\n", "Installing collected packages: textdistance\n", - "Successfully installed textdistance-4.5.0\n" + "Successfully installed textdistance-4.6.3\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], @@ -820,7 +823,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -829,7 +832,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -845,15 +848,15 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 11.8 s, sys: 322 ms, total: 12.1 s\n", - "Wall time: 12 s\n" + "CPU times: user 1.06 s, sys: 154 ms, total: 1.21 s\n", + "Wall time: 1.45 s\n" ] }, { @@ -881,7 +884,7 @@ " ('донцем', 0.6666666666666667)]" ] }, - "execution_count": 45, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -893,15 +896,15 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 15.7 s, sys: 25.6 ms, total: 15.7 s\n", - "Wall time: 15.8 s\n" + "CPU times: user 1.11 s, sys: 11.7 ms, total: 1.12 s\n", + "Wall time: 1.19 s\n" ] }, { @@ -929,7 +932,7 @@ " ('донцем', 0.6666666666666667)]" ] }, - "execution_count": 46, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -948,7 +951,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 35, "metadata": { "scrolled": true }, @@ -957,8 +960,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 20.1 s, sys: 39.2 ms, total: 20.2 s\n", - "Wall time: 20.2 s\n" + "CPU times: user 1.12 s, sys: 15.1 ms, total: 1.13 s\n", + "Wall time: 1.2 s\n" ] }, { @@ -971,7 +974,7 @@ " ('опорной', 0.5714285714285714)]" ] }, - "execution_count": 46, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -983,15 +986,15 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 9.39 s, sys: 18.5 ms, total: 9.41 s\n", - "Wall time: 9.41 s\n" + "CPU times: user 1.07 s, sys: 32 ms, total: 1.1 s\n", + "Wall time: 1.16 s\n" ] }, { @@ -1000,7 +1003,7 @@ "[('кул', 1.0), ('акул', 0.75), ('коул', 0.75), ('куль', 0.75), ('кулл', 0.75)]" ] }, - "execution_count": 47, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1019,7 +1022,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -1028,15 +1031,15 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2.92 s, sys: 6 ms, total: 2.93 s\n", - "Wall time: 2.96 s\n" + "CPU times: user 220 ms, sys: 4.05 ms, total: 224 ms\n", + "Wall time: 233 ms\n" ] }, { @@ -1064,7 +1067,7 @@ " ('синие', 0.6)]" ] }, - "execution_count": 48, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1090,7 +1093,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -1099,15 +1102,15 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 155 ms, sys: 2.25 ms, total: 157 ms\n", - "Wall time: 161 ms\n" + "CPU times: user 135 ms, sys: 2.09 ms, total: 137 ms\n", + "Wall time: 139 ms\n" ] }, { @@ -1116,7 +1119,7 @@ "['солнце', 'соне', 'солнцем', 'сотне']" ] }, - "execution_count": 52, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1160,7 +1163,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -1171,7 +1174,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -1187,16 +1190,1027 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'н': 6052,\n", + " 'о': 6414,\n", + " 'в': 3192,\n", + " 'с': 7493,\n", + " 'т': 7812,\n", + " 'р': 7092,\n", + " 'й': 5093,\n", + " 'к': 5224,\n", + " 'а': 2480,\n", + " 'но': 6223,\n", + " 'ов': 6453,\n", + " 'во': 3319,\n", + " 'ос': 6737,\n", + " 'ст': 7690,\n", + " 'тр': 7991,\n", + " 'ро': 7281,\n", + " 'ой': 6573,\n", + " 'йк': 5132,\n", + " 'ка': 5225,\n", + " 'нов': 6226,\n", + " 'ово': 6463,\n", + " 'вос': 3337,\n", + " 'ост': 6751,\n", + " 'стр': 7702,\n", + " 'тро': 7995,\n", + " 'рой': 7291,\n", + " 'ойк': 6576,\n", + " 'йка': 5133,\n", + " 'и': 4698,\n", + " 'ж': 4355,\n", + " 'е': 3921,\n", + " 'г': 3454,\n", + " 'д': 3640,\n", + " 'я': 9726,\n", + " 'ни': 6165,\n", + " 'иж': 4785,\n", + " 'же': 4390,\n", + " 'ег': 3969,\n", + " 'го': 3558,\n", + " 'ор': 6706,\n", + " 'од': 6491,\n", + " 'дс': 3824,\n", + " 'ск': 7597,\n", + " 'ая': 2940,\n", + " 'ниж': 6172,\n", + " 'иже': 4787,\n", + " 'жег': 4393,\n", + " 'его': 3978,\n", + " 'гор': 3574,\n", + " 'оро': 6720,\n", + " 'род': 7286,\n", + " 'одс': 6508,\n", + " 'дск': 3828,\n", + " 'ска': 7598,\n", + " 'кая': 5253,\n", + " 'б': 2951,\n", + " 'л': 5461,\n", + " 'ь': 9332,\n", + " 'об': 6428,\n", + " 'бл': 3050,\n", + " 'ла': 5462,\n", + " 'ас': 2799,\n", + " 'ть': 8087,\n", + " 'обл': 6438,\n", + " 'бла': 3051,\n", + " 'лас': 5480,\n", + " 'аст': 2813,\n", + " 'сть': 7707,\n", + " 'се': 7544,\n", + " 'ел': 4093,\n", + " 'ль': 5720,\n", + " 'ьс': 9417,\n", + " 'ки': 5285,\n", + " 'ий': 4820,\n", + " 'сел': 7555,\n", + " 'ель': 4110,\n", + " 'льс': 5735,\n", + " 'ьск': 9422,\n", + " 'ски': 7601,\n", + " 'кий': 5293,\n", + " 'п': 6890,\n", + " 'ё': 9881,\n", + " 'по': 6988,\n", + " 'сё': 7808,\n", + " 'ёл': 9904,\n", + " 'ло': 5615,\n", + " 'ок': 6586,\n", + " 'пос': 7005,\n", + " 'осё': 6762,\n", + " 'сёл': 7809,\n", + " 'ёло': 9906,\n", + " 'лок': 5626,\n", + " 'м': 5800,\n", + " 'ди': 3729,\n", + " 'ив': 4728,\n", + " 'ве': 3233,\n", + " 'ее': 4007,\n", + " 'ев': 3946,\n", + " 'вс': 3359,\n", + " 'ко': 5338,\n", + " 'ом': 6629,\n", + " 'див': 3732,\n", + " 'иве': 4730,\n", + " 'вее': 3238,\n", + " 'еев': 4008,\n", + " 'евс': 3959,\n", + " 'вск': 3362,\n", + " 'ско': 7604,\n", + " 'ком': 5351,\n", + " 'ра': 7093,\n", + " 'ай': 2620,\n", + " 'йо': 5162,\n", + " 'он': 6649,\n", + " 'не': 6125,\n", + " 'рай': 7103,\n", + " 'айо': 2633,\n", + " 'йон': 5166,\n", + " 'оне': 6655,\n", + " 'кой': 5348,\n", + " 'ти': 7897,\n", + " 'сти': 7696,\n", + " 'х': 8586,\n", + " 'вх': 3392,\n", + " 'хо': 8674,\n", + " 'ит': 4990,\n", + " 'вхо': 3393,\n", + " 'ход': 8679,\n", + " 'оди': 6500,\n", + " 'дит': 3747,\n", + " 'со': 7632,\n", + " 'та': 7813,\n", + " 'ав': 2505,\n", + " 'сос': 7650,\n", + " 'ста': 7691,\n", + " 'тав': 7815,\n", + " 'са': 7494,\n", + " 'ат': 2823,\n", + " 'ис': 4968,\n", + " 'сс': 7674,\n", + " 'ог': 6478,\n", + " 'сат': 7511,\n", + " 'ати': 2827,\n", + " 'тис': 7915,\n", + " 'исс': 4980,\n", + " 'сск': 7678,\n", + " 'ког': 5342,\n", + " 'ого': 6487,\n", + " 'ет': 4248,\n", + " 'ьсо': 9423,\n", + " 'сов': 7635,\n", + " 'ове': 6457,\n", + " 'вет': 3247,\n", + " 'ета': 4249,\n", + " 'сп': 7659,\n", + " 'ол': 6601,\n", + " 'ож': 6528,\n", + " 'ен': 4133,\n", + " 'рас': 7111,\n", + " 'асп': 2810,\n", + " 'спо': 7664,\n", + " 'пол': 6999,\n", + " 'оло': 6615,\n", + " 'лож': 5622,\n", + " 'оже': 6531,\n", + " 'жен': 4399,\n", + " '1': 42,\n", + " '2': 129,\n", + " '12': 63,\n", + " '5': 261,\n", + " 'км': 5327,\n", + " 'ю': 9606,\n", + " 'у': 8122,\n", + " 'юг': 9618,\n", + " 'гу': 3604,\n", + " 'от': 6763,\n", + " 'ела': 4094,\n", + " 'ва': 3193,\n", + " 'ева': 3947,\n", + " 'з': 4482,\n", + " 'за': 4483,\n", + " 'ап': 2755,\n", + " 'па': 6891,\n", + " 'ад': 2541,\n", + " 'ду': 3839,\n", + " 'зап': 4498,\n", + " 'апа': 2756,\n", + " 'пад': 6894,\n", + " 'аду': 2560,\n", + " 'да': 3641,\n", + " 'ода': 6492,\n", + " 'ар': 2769,\n", + " 'сар': 7509,\n", + " 'аро': 2783,\n", + " 'ров': 7284,\n", + " 'ова': 6454,\n", + " 'на': 6053,\n", + " 'пр': 7022,\n", + " 'пра': 7023,\n", + " 'рав': 7096,\n", + " 'аво': 2516,\n", + " 'вом': 3332,\n", + " 'бе': 2994,\n", + " 'ер': 4195,\n", + " 'ре': 7164,\n", + " 'бер': 3007,\n", + " 'ере': 4201,\n", + " 'рег': 7168,\n", + " 'егу': 3980,\n", + " 'ек': 4079,\n", + " 'рек': 7175,\n", + " 'еки': 4083,\n", + " 'ч': 8838,\n", + " 'ви': 3264,\n", + " 'ич': 5048,\n", + " 'чк': 8904,\n", + " 'ин': 4880,\n", + " 'нз': 6159,\n", + " 'вич': 3285,\n", + " 'ичк': 5053,\n", + " 'чки': 8907,\n", + " 'кин': 5297,\n", + " 'инз': 4888,\n", + " 'нза': 6160,\n", + " 'ы': 9184,\n", + " 'вы': 3412,\n", + " 'ый': 9228,\n", + " 'авы': 2525,\n", + " 'вый': 3421,\n", + " 'ри': 7208,\n", + " 'то': 7957,\n", + " 'при': 7025,\n", + " 'рит': 7227,\n", + " 'ито': 4999,\n", + " 'ток': 7968,\n", + " 'кр': 5368,\n", + " 'ру': 7355,\n", + " 'уж': 8198,\n", + " 'жё': 4478,\n", + " 'ён': 9913,\n", + " 'окр': 6596,\n", + " 'кру': 5373,\n", + " 'руж': 7362,\n", + " 'ужё': 8208,\n", + " 'жён': 4480,\n", + " 'ш': 8970,\n", + " 'см': 7616,\n", + " 'ме': 5843,\n", + " 'еш': 4327,\n", + " 'ша': 8971,\n", + " 'ан': 2718,\n", + " 'нн': 6214,\n", + " 'ны': 6359,\n", + " 'ым': 9246,\n", + " 'ми': 5871,\n", + " 'сме': 7618,\n", + " 'меш': 5864,\n", + " 'еша': 4328,\n", + " 'шан': 8981,\n", + " 'анн': 2731,\n", + " 'нны': 6221,\n", + " 'ным': 6362,\n", + " 'ыми': 9249,\n", + " 'ле': 5515,\n", + " 'ес': 4225,\n", + " 'ам': 2692,\n", + " 'лес': 5533,\n", + " 'еса': 4226,\n", + " 'сам': 7506,\n", + " 'ами': 2699,\n", + " 'ое': 6521,\n", + " 'ед': 3982,\n", + " 'нё': 6406,\n", + " 'сое': 7638,\n", + " 'оед': 6523,\n", + " 'еди': 3989,\n", + " 'дин': 3742,\n", + " 'инё': 4908,\n", + " 'нён': 6409,\n", + " 'ф': 8459,\n", + " 'сф': 7739,\n", + " 'фа': 8460,\n", + " 'ал': 2667,\n", + " 'ьт': 9426,\n", + " 'асф': 2815,\n", + " 'сфа': 7740,\n", + " 'фал': 8468,\n", + " 'аль': 2688,\n", + " 'льт': 5736,\n", + " 'ьто': 9430,\n", + " 'тов': 7960,\n", + " 'вой': 3329,\n", + " 'до': 3785,\n", + " 'дор': 3801,\n", + " 'рог': 7285,\n", + " 'гой': 3567,\n", + " 'лк': 5582,\n", + " 'ёлк': 9905,\n", + " 'лко': 5587,\n", + " 'ц': 8746,\n", + " 'цы': 8831,\n", + " 'ыг': 9203,\n", + " 'га': 3455,\n", + " 'вк': 3290,\n", + " 'цыг': 8832,\n", + " 'ыга': 9204,\n", + " 'ган': 3468,\n", + " 'ано': 2732,\n", + " 'овк': 6459,\n", + " 'вка': 3291,\n", + " 'гр': 3587,\n", + " 'ун': 8286,\n", + " 'нт': 6280,\n", + " 'гру': 3592,\n", + " 'рун': 7369,\n", + " 'унт': 8297,\n", + " 'нто': 6289,\n", + " 'овы': 6474,\n", + " 'вым': 3424,\n", + " 'оч': 6836,\n", + " 'чн': 8915,\n", + " 'про': 7026,\n", + " 'рос': 7299,\n", + " 'лоч': 5639,\n", + " 'очн': 6843,\n", + " 'чны': 8921,\n", + " 'ога': 6479,\n", + " 'гам': 3467,\n", + " '3': 193,\n", + " 'аз': 2586,\n", + " 'зв': 4519,\n", + " 'ие': 4772,\n", + " 'наз': 6060,\n", + " 'азв': 2589,\n", + " 'зва': 4520,\n", + " 'ван': 3207,\n", + " 'ани': 2727,\n", + " 'ние': 6171,\n", + " 'яв': 9731,\n", + " 'вл': 3297,\n", + " 'ля': 5768,\n", + " 'яе': 9756,\n", + " 'тс': 8003,\n", + " 'ся': 7798,\n", + " 'явл': 9735,\n", + " 'вля': 3304,\n", + " 'ляе': 5773,\n", + " 'яет': 9759,\n", + " 'етс': 4261,\n", + " 'тся': 8014,\n", + " 'су': 7712,\n", + " 'уг': 8161,\n", + " 'уб': 8134,\n", + " 'бо': 3072,\n", + " 'суг': 7716,\n", + " 'угу': 8171,\n", + " 'губ': 3606,\n", + " 'убо': 8143,\n", + " 'оф': 6808,\n", + " 'фи': 8491,\n", + " 'иц': 5040,\n", + " 'ци': 8782,\n", + " 'иа': 4699,\n", + " 'ьн': 9401,\n", + " 'офи': 6811,\n", + " 'фиц': 8508,\n", + " 'ици': 5043,\n", + " 'циа': 8783,\n", + " 'иал': 4705,\n", + " 'льн': 5731,\n", + " 'ьны': 9407,\n", + " 'тн': 7947,\n", + " 'мес': 5859,\n", + " 'ест': 4238,\n", + " 'стн': 7700,\n", + " 'тно': 7951,\n", + " 'ное': 6229,\n", + " 'нас': 6069,\n", + " 'асе': 2803,\n", + " 'еле': 4097,\n", + " 'лен': 5529,\n", + " 'ени': 4141,\n", + " 'ьз': 9374,\n", + " 'зу': 4648,\n", + " 'уе': 8192,\n", + " 'исп': 4979,\n", + " 'оль': 6625,\n", + " 'льз': 5727,\n", + " 'ьзу': 9378,\n", + " 'зуе': 4651,\n", + " 'ует': 8197,\n", + " 'кл': 5316,\n", + " 'лю': 5749,\n", + " 'юч': 9707,\n", + " 'чи': 8883,\n", + " 'те': 7868,\n", + " 'иск': 4974,\n", + " 'скл': 7602,\n", + " 'клю': 5324,\n", + " 'люч': 5765,\n", + " 'ючи': 9710,\n", + " 'чит': 8897,\n", + " 'ите': 4993,\n", + " 'тел': 7880,\n", + " 'ьно': 9405,\n", + " 'рн': 7271,\n", + " 'вн': 3311,\n", + " 'ьте': 9428,\n", + " 'тер': 7885,\n", + " 'ерн': 4208,\n", + " 'рна': 7272,\n", + " 'нат': 6070,\n", + " 'тив': 7900,\n", + " 'ивн': 4734,\n", + " 'вно': 3315,\n", + " 'хи': 8633,\n", + " 'ры': 7421,\n", + " 'хит': 8648,\n", + " 'итр': 5000,\n", + " 'тры': 7998,\n", + " 'рый': 7428,\n", + " 'уп': 8313,\n", + " 'еб': 3932,\n", + " 'упо': 8320,\n", + " 'пот': 7006,\n", + " 'отр': 6778,\n", + " 'тре': 7993,\n", + " 'реб': 7166,\n", + " 'ебл': 3936,\n", + " 'бля': 3058,\n", + " 'яз': 9768,\n", + " 'зы': 4672,\n", + " 'ык': 9229,\n", + " 'язы': 9776,\n", + " 'зык': 4676,\n", + " 'ыко': 9234,\n", + " 'ков': 5341,\n", + " 'обо': 6441,\n", + " 'бор': 3087,\n", + " 'рот': 7300,\n", + " 'ром': 7294,\n", + " 'ран': 7107,\n", + " 'ане': 2724,\n", + " 'нее': 6131,\n", + " 'ем': 4114,\n", + " 'мы': 6015,\n", + " 'ые': 9214,\n", + " 'уем': 8196,\n", + " 'емы': 4129,\n", + " 'мые': 6017,\n", + " 'ия': 5085,\n", + " 'ния': 6194,\n", + " 'пе': 6919,\n", + " 'сч': 7757,\n", + " 'ча': 8839,\n", + " 'пес': 6932,\n", + " 'есч': 4242,\n", + " 'сча': 7758,\n", + " 'чан': 8849,\n", + " 'аны': 2744,\n", + " 'ный': 6361,\n", + " 'из': 4795,\n", + " 'тк': 7927,\n", + " 'изв': 4798,\n", + " 'зве': 4521,\n", + " 'вес': 3246,\n", + " 'стк': 7697,\n", + " 'тко': 7932,\n", + " 'сн': 7624,\n", + " 'осн': 6746,\n", + " 'сно': 7628,\n", + " '9': 402,\n", + " '0': 0,\n", + " '19': 121,\n", + " '92': 407,\n", + " '20': 130,\n", + " '192': 124,\n", + " 'ах': 2878,\n", + " 'год': 3562,\n", + " 'дах': 3660,\n", + " 'нц': 6332,\n", + " 'ца': 8747,\n", + " 'пер': 6931,\n", + " 'рес': 7182,\n", + " 'есе': 4229,\n", + " 'енц': 4153,\n", + " 'нца': 6333,\n", + " 'цам': 8750,\n", + " 'дн': 3777,\n", + " 'их': 5029,\n", + " 'осе': 6741,\n", + " 'сед': 7549,\n", + " 'едн': 3993,\n", + " 'дни': 3780,\n", + " 'них': 6187,\n", + " 'ма': 5801,\n", + " 'ала': 2668,\n", + " 'лам': 5475,\n", + " 'ама': 2693,\n", + " 'мас': 5817,\n", + " 'асо': 2809,\n", + " 'ыш': 9319,\n", + " 'шк': 9039,\n", + " 'нар': 6068,\n", + " 'ары': 2794,\n", + " 'рыш': 7438,\n", + " 'ышк': 9323,\n", + " 'шки': 9042,\n", + " 'ино': 4894,\n", + " 'ых': 9307,\n", + " 'енн': 4144,\n", + " 'ных': 6365,\n", + " 'оо': 6677,\n", + " 'тв': 7848,\n", + " 'соо': 7647,\n", + " 'оот': 6684,\n", + " 'отв': 6766,\n", + " 'тве': 7850,\n", + " 'тст': 8011,\n", + " 'ств': 7693,\n", + " 'вен': 3244,\n", + " 'нно': 6218,\n", + " '8': 344,\n", + " '4': 227,\n", + " '14': 80,\n", + " 'оз': 6540,\n", + " 'зн': 4609,\n", + " 'нс': 6259,\n", + " 'воз': 3327,\n", + " 'озн': 6552,\n", + " 'зне': 4611,\n", + " 'нес': 6143,\n", + " 'сен': 7557,\n", + " 'енс': 4148,\n", + " 'нск': 6266,\n", + " 'ио': 4909,\n", + " 'тра': 7992,\n", + " 'рад': 7098,\n", + " 'ади': 2550,\n", + " 'диц': 3751,\n", + " 'цио': 8795,\n", + " 'ион': 4920,\n", + " 'онн': 6660,\n", + " 'ке': 5263,\n", + " 'лке': 5584,\n", + " 'жи': 4408,\n", + " 'ил': 4842,\n", + " 'ли': 5553,\n", + " 'жил': 4417,\n", + " 'или': 4849,\n", + " 'аб': 2488,\n", + " 'раб': 7095,\n", + " 'або': 2499,\n", + " 'боч': 3093,\n", + " 'очи': 6840,\n", + " 'чие': 8887,\n", + " 'овх': 6469,\n", + " 'хоз': 8681,\n", + " 'оза': 6541,\n", + " 'вп': 3347,\n", + " 'рё': 7482,\n", + " 'ёд': 9892,\n", + " 'впе': 3349,\n", + " 'ерё': 4224,\n", + " 'рёд': 7485,\n", + " 'це': 8761,\n", + " 'цен': 8769,\n", + " 'ент': 4149,\n", + " 'нтр': 6290,\n", + " 'зл': 4594,\n", + " 'озл': 6550,\n", + " 'зле': 4596,\n", + " 'лка': 5583,\n", + " 'рь': 7439,\n", + " 'ье': 9361,\n", + " 'кар': 5242,\n", + " 'арь': 2795,\n", + " 'рье': 7440,\n", + " 'ьер': 9369,\n", + " 'гд': 3491,\n", + " 'де': 3689,\n", + " 'где': 3493,\n", + " 'ак': 2645,\n", + " 'кт': 5393,\n", + " 'акт': 2660,\n", + " 'кти': 5396,\n", + " 'бы': 3160,\n", + " 'ыв': 9194,\n", + " 'доб': 3786,\n", + " 'обы': 6450,\n", + " 'быв': 3161,\n", + " 'ыва': 9195,\n", + " 'вал': 3205,\n", + " 'али': 2674,\n", + " 'ву': 3373,\n", + " 'ую': 8451,\n", + " 'дол': 3796,\n", + " 'лом': 5628,\n", + " 'оми': 6633,\n", + " 'мит': 5887,\n", + " 'ову': 6468,\n", + " 'вую': 3389,\n", + " 'му': 5977,\n", + " 'ук': 8239,\n", + " 'ку': 5404,\n", + " 'мук': 5983,\n", + " 'уку': 8250,\n", + " 'бу': 3119,\n", + " 'ут': 8372,\n", + " 'бут': 3133,\n", + " 'уто': 8382,\n", + " 'нь': 6367,\n", + " 'кам': 5238,\n", + " 'аме': 2697,\n", + " 'мен': 5856,\n", + " 'ень': 4158,\n", + " 'щ': 9114,\n", + " 'оя': 6876,\n", + " 'ящ': 9870,\n", + " 'ще': 9126,\n", + " 'сто': 7701,\n", + " 'тоя': 7984,\n", + " 'оящ': 6886,\n", + " 'яще': 9872,\n", + " 'щее': 9131,\n", + " 'вр': 3352,\n", + " 'мя': 6040,\n", + " 'вре': 3354,\n", + " 'рем': 7177,\n", + " 'емя': 4131,\n", + " 'ыт': 9296,\n", + " 'зак': 4493,\n", + " 'акр': 2658,\n", + " 'кры': 5374,\n", + " 'рыт': 7434,\n", + " 'дан': 3653,\n", + " '7': 317,\n", + " '97': 412,\n", + " '78': 333,\n", + " 'ха': 8587,\n", + " 'зо': 4618,\n", + " 'лс': 5648,\n", + " 'хар': 8603,\n", + " 'ара': 2770,\n", + " 'рак': 7104,\n", + " 'кте': 5395,\n", + " 'ери': 4204,\n", + " 'риз': 7216,\n", + " 'изо': 4807,\n", + " 'зов': 4620,\n", + " 'алс': 2681,\n", + " 'лся': 5655,\n", + " 'как': 5236,\n", + " 'еп': 4179,\n", + " 'рс': 7325,\n", + " 'неп': 6141,\n", + " 'епе': 4181,\n", + " 'ерс': 4212,\n", + " 'рсп': 7333,\n", + " 'спе': 7661,\n", + " 'пек': 6926,\n", + " 'ект': 4089,\n", + " 'вны': 3317,\n", + " 'зд': 4538,\n", + " 'сь': 7783,\n", + " 'зде': 4541,\n", + " 'дес': 3707,\n", + " 'есь': 4245,\n", + " 'ты': 8073,\n", + " 'асч': 2818,\n", + " 'счи': 7760,\n", + " 'иты': 5006,\n", + " 'тыв': 8074,\n", + " 'ало': 2679,\n", + " 'лос': 5633,\n", + " 'ось': 6760,\n", + " '24': 161,\n", + " 'зя': 4689,\n", + " 'яй': 9779,\n", + " 'йс': 5179,\n", + " 'озя': 6558,\n", + " 'зяй': 4690,\n", + " 'яйс': 9780,\n", + " 'йст': 5186,\n", + " 'тва': 7849,\n", + " '43': 238,\n", + " 'жит': 4423,\n", + " 'еля': 4112,\n", + " 'бж': 3014,\n", + " 'вод': 3324,\n", + " 'одо': 6505,\n", + " 'дос': 3802,\n", + " 'сна': 7625,\n", + " 'наб': 6054,\n", + " 'абж': 2494,\n", + " 'бже': 3016,\n", + " 'ущ': 8439,\n", + " 'ял': 9788,\n", + " 'осу': 6752,\n", + " 'сущ': 7736,\n", + " 'уще': 8441,\n", + " 'щес': 9139,\n", + " 'твл': 7852,\n", + " 'лял': 5778,\n", + " 'яло': 9791,\n", + " 'дц': 3867,\n", + " 'кол': 5350,\n", + " 'лод': 5620,\n", + " 'одц': 6512,\n", + " 'дце': 3869,\n", + " 'цев': 8762,\n", + " 'ик': 4825,\n", + " 'одн': 6504,\n", + " 'ник': 6176,\n", + " 'ико': 4834,\n", + " 'уч': 8417,\n", + " 'чр': 8928,\n", + " 'еж': 4015,\n", + " 'жд': 4382,\n", + " 'учр': 8423,\n", + " 'чре': 8929,\n", + " 'реж': 7171,\n", + " 'ежд': 4017,\n", + " 'жде': 4384,\n", + " 'ден': 3703,\n", + " 'оц': 6831,\n", + " 'цк': 8803,\n", + " 'ул': 8253,\n", + " 'тб': 7841,\n", + " 'соц': 7655,\n", + " 'оцк': 6835,\n", + " 'цку': 8808,\n", + " 'кул': 5415,\n", + " 'уль': 8266,\n", + " 'тбы': 7847,\n", + " 'быт': 3166,\n", + " 'ыта': 9297,\n", + " 'отс': 6779,\n", + " 'тсу': 8012,\n", + " 'сут': 7731,\n", + " 'утс': 8384,\n", + " 'тво': 7853,\n", + " 'вов': 3322,\n", + " '99': 414,\n", + " '199': 126,\n", + " 'оду': 6510,\n", + " '6': 295,\n", + " '16': 97,\n", + " 'ей': 4059,\n", + " 'лей': 5525,\n", + " 'кот': 5357,\n", + " 'ото': 6776,\n", + " 'тор': 7974,\n", + " 'оры': 6732,\n", + " 'рых': 7435,\n", + " 'уд': 8172,\n", + " 'бн': 3064,\n", + " 'тру': 7997,\n", + " 'руд': 7360,\n", + " 'удо': 8184,\n", + " 'осп': 6748,\n", + " 'осо': 6747,\n", + " 'соб': 7634,\n", + " 'обн': 6440,\n", + " 'бно': 3068,\n", + " 'ног': 6227,\n", + " 'зр': 4637,\n", + " 'озр': 6554,\n", + " 'зра': 4638,\n", + " 'ян': 9800,\n", + " 'нв': 6087,\n", + " 'ря': 7463,\n", + " 'нва': 6088,\n", + " 'вар': 3208,\n", + " 'аря': 2797,\n", + " '95': 410,\n", + " 'им': 4863,\n", + " 'име': 4867,\n", + " 'мел': 5854,\n", + " 'ело': 4103,\n", + " 'ьк': 9385,\n", + " 'тол': 7969,\n", + " 'льк': 5729,\n", + " 'ько': 9390,\n", + " 'аё': 2948,\n", + " 'ёт': 9939,\n", + " 'таё': 7840,\n", + " 'аёт': 2950,\n", + " 'ётс': 9945,\n", + " 'лы': 5705,\n", + " 'илы': 4858,\n", + " 'лым': 5714,\n", + " 'лу': 5663,\n", + " 'олу': 6619,\n", + " 'луч': 5686,\n", + " 'учи': 8420,\n", + " 'чил': 8891,\n", + " 'раз': 7101,\n", + " 'зви': 4522,\n", + " 'вит': 3282,\n", + " 'ити': 4994,\n", + " 'тие': 7903,\n", + " 'аг': 2528,\n", + " 'лаг': 5466,\n", + " 'аго': 2537,\n", + " 'дар': 3656,\n", + " 'св': 7527,\n", + " 'сво': 7531,\n", + " 'вое': 3325,\n", + " 'бли': 3053,\n", + " 'лиз': 5561,\n", + " 'зос': 4631,\n", + " 'вя': 3443,\n", + " 'ят': 9845,\n", + " 'свя': 7532,\n", + " 'вят': 3447,\n", + " 'яты': 9854,\n", + " 'тым': 8080,\n", + " 'ист': 4981,\n", + " 'точ': 7981,\n", + " 'чни': 8918,\n", + " 'ика': 4826,\n", + " 'лук': 5674,\n", + " 'уки': 8243,\n", + " 'кил': 5295,\n", + " 'ило': 4853,\n", + " 'оме': 6632,\n", + " 'мет': 5860,\n", + " 'етр': 4260,\n", + " 'каз': 5233,\n", + " 'аза': 2587,\n", + " 'зан': 4496,\n", + " 'анс': 2735,\n", + " 'ято': 9851,\n", + " 'тог': 7961,\n", + " 'аф': 2864,\n", + " 'сер': 7560,\n", + " 'ера': 4196,\n", + " 'раф': 7114,\n", + " 'афи': 2868,\n", + " 'фим': 8502,\n", + " 'има': 4864,\n", + " 'овс': 6466,\n", + " 'кит': 5302,\n", + " 'мо': 5924,\n", + " 'ыр': 9271,\n", + " 'мон': 5938,\n", + " 'она': 6650,\n", + " 'сты': 7706,\n", + " 'тыр': 8082,\n", + " 'ыря': 9283,\n", + " '01': 11,\n", + " '201': 132,\n", + " '012': 14,\n", + " 'ыл': 9238,\n", + " 'был': 3164,\n", + " 'щё': 9164,\n", + " 'осв': 6740,\n", + " 'вящ': 3448,\n", + " 'ящё': 9875,\n", + " 'щён': 9166,\n", + " 'дом': 3797,\n", + " 'омо': 6638,\n", + " 'мов': 5927,\n", + " 'хр': 8698,\n", + " 'хра': 8699,\n", + " 'рам': 7106,\n", + " 'че': 8860,\n", + " 'чес': 8872,\n", + " 'э': 9484,\n", + " 'эс': 9582,\n", + " 'ьд': 9352,\n", + " 'мер': 5858,\n", + " 'рал': 7105,\n", + " 'льд': 5724,\n", + " 'ьда': 9353,\n", + " 'ьм': 9393,\n", + " 'фил': 8501,\n", + " 'иль': 4859,\n", + " 'льм': 5730,\n", + " '90': 403,\n", + " '05': 24,\n", + " '190': 122,\n", + " 'нем': 6138,\n", + " 'емо': 4123,\n", + " 'мой': 5934,\n", + " 'аж': 2575,\n", + " 'жн': 4436,\n", + " 'кор': 5355,\n", + " 'отк': 6772,\n", + " 'раж': 7100,\n", + " 'ажн': 2581,\n", + " 'жны': 4442,\n", + " 'др': 3815,\n", + " 'дра': 3816,\n", + " 'мат': 5818,\n", + " 'тич': 7921,\n", + " 'иче': 5050,\n", + " 'еск': 4231,\n", + " 'ёр': 9922,\n", + " 'ежи': 4019,\n", + " 'жис': 4422,\n", + " 'ссё': 7689,\n", + " 'сёр': 7811,\n", + " 'ёро': 9930,\n", + " 'лис': 5571,\n", + " 'ги': 3511,\n", + " 'аш': 2911,\n", + " 'ше': 8999,\n", + " 'лаш': 5487,\n", + " 'аше': 2914,\n", + " '18': 113,\n", + " '87': 390,\n", + " '73': 325,\n", + " '96': 411,\n", + " '68': 314,\n", + " 'вик': 3275,\n", + " 'икт': 4837,\n", + " 'кто': 5398,\n", + " 'оре': 6712,\n", + " 'рен': 7178,\n", + " 'ена': 4134,\n", + " 'жа': 4356,\n", + " 'жас': 4370,\n", + " 'асс': 2812,\n", + " 'ссе': 7676,\n", + " '86': 384,\n", + " '62': 303,\n", + " '186': 120,\n", + " '862': 386,\n", + " '91': 406,\n", + " '13': 72,\n", + " '191': 123,\n", + " 'ня': 6390,\n", + " 'сня': 7631,\n", + " 'нят': 6400,\n", + " 'ну': 6296,\n", + " 'ома': 6630,\n", + " 'ман': 5814,\n", + " 'ану': 2737,\n", + " 'ора': 6707,\n", + " 'гю': 3636,\n", + " 'юго': 9621,\n", + " 'мь': 6028,\n", + " 'пре': 7024,\n", + " 'емь': 4130,\n", + " 'мье': 6029,\n", + " 'оял': 6880,\n", + " 'яла': 9789,\n", + " 'ась': 2821,\n", + " 'фр': 8539,\n", + " 'ии': 4814,\n", + " 'фра': 8540,\n", + " 'анц': 2740,\n", + " 'нци': 6336,\n", + " 'ции': 8789,\n", + " 'ае': 2569,\n", + " 'ита': 4991,\n", + " 'тае': 7818,\n", + " 'ает': 2573,\n", + " 'амы': 2714,\n", + " 'мым': 6021,\n", + " 'рв': 7132,\n", + " 'ерв': 4198,\n", + " 'ьмо': 9397,\n", + " 'мом': 5937,\n", + " 'жс': 4458,\n", + " 'пар': 6905,\n", + " 'ари': 2778,\n", + " 'риж': 7215,\n", + " 'ижс': 4792,\n", + " 'жск': 4459,\n", + " 'бог': 3075,\n", + " 'гом': 3570,\n", + " 'ате': 2826,\n", + " 'азы': 2606,\n", + " 'зыв': 4674,\n", + " 'вае': 3199,\n", + " 'жиз': 4414,\n", + " 'изн': 4806,\n", + " 'зни': 4612,\n", + " 'кра': 5369,\n", + " 'аса': 2800,\n", + " 'сав': 7497,\n", + " 'ави': 2511,\n", + " 'виц': 3284,\n", + " 'ицы': 5047,\n", + " 'ды': 3884,\n", + " 'ьды': 9360,\n", + " 'рб': 7122,\n", + " 'орб': 6708,\n", + " 'рбу': 7130,\n", + " 'бун': 3130,\n", + " 'уна': 8287,\n", + " 'кв': 5256,\n", + " 'зи': 4567,\n", + " 'ква': 5257,\n", + " 'ваз': 3201,\n", + " 'ази': 2594,\n", + " 'зим': 4578,\n", + " 'имо': 4873,\n", + " ...}" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# vec.get_feature_names()" + "vec.vocabulary_" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -1205,7 +2219,7 @@ "(368802, 10000)" ] }, - "execution_count": 66, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1216,7 +2230,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -1237,43 +2251,43 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 225 ms, sys: 23.7 ms, total: 249 ms\n", - "Wall time: 253 ms\n" + "CPU times: user 230 ms, sys: 77.8 ms, total: 308 ms\n", + "Wall time: 336 ms\n" ] }, { "data": { "text/plain": [ "[('монце', 0.24999999999999978),\n", - " ('донце', 0.24999999999999978),\n", " ('конце', 0.24999999999999978),\n", - " ('херсонцев', 0.2640199278060129),\n", - " ('саксонцев', 0.2640199278060129),\n", + " ('донце', 0.24999999999999978),\n", + " ('херсонцев', 0.2640199278060127),\n", + " ('саксонцев', 0.2640199278060127),\n", " ('сон', 0.2928932188134523),\n", + " ('ньонце', 0.29985995798599496),\n", " ('олонце', 0.2998599579859951),\n", - " ('ньонце', 0.2998599579859952),\n", " ('соне', 0.3264246859454365),\n", - " ('донцем', 0.3291796067500631),\n", " ('монцей', 0.3291796067500631),\n", " ('солнце', 0.3291796067500631),\n", - " ('концессионное', 0.33217692887937167),\n", + " ('донцем', 0.3291796067500631),\n", + " ('концессионное', 0.33217692887937156),\n", " ('концерн', 0.3545027756320972),\n", " ('монцезе', 0.3545027756320972),\n", " ('концессионном', 0.35948738477965136),\n", " ('концессионера', 0.36155760193093855),\n", " ('концессионером', 0.36555873142548445),\n", " ('лондонцев', 0.367544467966324),\n", - " ('эстонцев', 0.3700592116512881)]" + " ('эстонцев', 0.37005921165128786)]" ] }, - "execution_count": 68, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -1292,7 +2306,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -1307,24 +2321,24 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 74.2 ms, sys: 25.7 ms, total: 99.9 ms\n", - "Wall time: 106 ms\n" + "CPU times: user 225 ms, sys: 27.3 ms, total: 253 ms\n", + "Wall time: 259 ms\n" ] }, { "data": { "text/plain": [ - "[('солнце', 0.8333333333333334), ('монце', 0.8), ('донце', 0.8)]" + "[('солнце', 0.8333333333333334), ('монце', 0.8), ('конце', 0.8)]" ] }, - "execution_count": 70, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -1343,13 +2357,13 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6b7310769f05474692061c96191cbda6", + "model_id": "1214fb53a53648399f23b37a8bbd446e", "version_major": 2, "version_minor": 0 }, @@ -1401,10 +2415,8 @@ }, { "cell_type": "code", - "execution_count": 72, - "metadata": { - "scrolled": false - }, + "execution_count": 51, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1458,7 +2470,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 52, "metadata": { "scrolled": true }, @@ -1468,14 +2480,17 @@ "output_type": "stream", "text": [ "Collecting pyaspeller\n", - " Downloading pyaspeller-2.0.0-py3-none-any.whl (12 kB)\n", - "Requirement already satisfied: requests<3.0.0,>=2.27.1 in /Users/mnefedov/miniforge3/envs/tf1/lib/python3.8/site-packages (from pyaspeller) (2.28.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/mnefedov/miniforge3/envs/tf1/lib/python3.8/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (2022.9.24)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/mnefedov/miniforge3/envs/tf1/lib/python3.8/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (3.4)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/mnefedov/miniforge3/envs/tf1/lib/python3.8/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (1.26.12)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /Users/mnefedov/miniforge3/envs/tf1/lib/python3.8/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (2.1.1)\n", + " Using cached pyaspeller-2.0.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: requests<3.0.0,>=2.27.1 in /Users/mnefedov/.pyenv/versions/3.10.9/lib/python3.10/site-packages (from pyaspeller) (2.32.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/mnefedov/.pyenv/versions/3.10.9/lib/python3.10/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (2024.2.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/mnefedov/.pyenv/versions/3.10.9/lib/python3.10/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (3.7)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/mnefedov/.pyenv/versions/3.10.9/lib/python3.10/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (3.3.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/mnefedov/.pyenv/versions/3.10.9/lib/python3.10/site-packages (from requests<3.0.0,>=2.27.1->pyaspeller) (2.2.1)\n", "Installing collected packages: pyaspeller\n", - "Successfully installed pyaspeller-2.0.0\n" + "Successfully installed pyaspeller-2.0.0\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], @@ -1485,7 +2500,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -1494,7 +2509,7 @@ "'Для Яндекс Спеллера есть питоновская библиотека, которая упрощает его использование. У него есть некоторые ограничения (10 к запросов в день), но для небольших проектов этого вполне достаточно.'" ] }, - "execution_count": 76, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -1503,20 +2518,20 @@ "# Так можно исправить предложение целиком\n", "from pyaspeller import YandexSpeller\n", "speller = YandexSpeller()\n", - "fixed = speller.spelled('Для Яндекс Спеллера есть питоновская библиотека, которая упрощает его использование. У него есть некоторые ограничения (10 к запросов в день), но для небольших проектов этого вполне достаточно.')\n", + "fixed = speller.spelled('Для Яндекс Спеллера есь питоновская библиатека, которай упрощает его использование. У него есть некоторые ограничения (10 к запросов в день), но для небольших проектов этого вполне достаточно.')\n", "fixed" ] }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/mnefedov/miniforge3/envs/tf1/lib/python3.8/site-packages/pyaspeller/word.py:17: UserWarning: Class Word is deprecated. Use YandexSpeller().spelled(text) instead\n", + "/Users/mnefedov/.pyenv/versions/3.10.9/lib/python3.10/site-packages/pyaspeller/word.py:17: UserWarning: Class Word is deprecated. Use YandexSpeller().spelled(text) instead\n", " warnings.warn(\"Class Word is deprecated. Use YandexSpeller().spelled(text) instead\")\n" ] }, @@ -1560,9 +2575,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.14" + "version": "3.10.9" } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 }