From 6415d294269865e7d6fe5d35eb495f4fd3b7db99 Mon Sep 17 00:00:00 2001 From: marshygeek Date: Thu, 15 Feb 2018 20:04:40 +0400 Subject: [PATCH] FilterUnnecessaryEvents (#102) * Added func exclude_unpopular: removes unnecessary events that was referred in issue #87 * Less patterns * Redundant parameter "r" --- surebet/handling/excluding.py | 25 + surebet/main.py | 4 +- surebet/tests/handling/test_exclude.py | 2 +- surebet/tests/handling/test_unpopular.py | 37 ++ surebet/tests/handling/unpopular/known.pkl | Bin 0 -> 4584 bytes .../tests/handling/unpopular/knownResult.json | 432 ++++++++++++++++++ surebet/tests/handling/unpopular/sample0.pkl | Bin 0 -> 4979 bytes surebet/tests/handling/unpopular/sample1.pkl | Bin 0 -> 6050 bytes surebet/tests/handling/unpopular/sample2.pkl | Bin 0 -> 6875 bytes 9 files changed, 498 insertions(+), 2 deletions(-) create mode 100644 surebet/tests/handling/test_unpopular.py create mode 100644 surebet/tests/handling/unpopular/known.pkl create mode 100644 surebet/tests/handling/unpopular/knownResult.json create mode 100644 surebet/tests/handling/unpopular/sample0.pkl create mode 100644 surebet/tests/handling/unpopular/sample1.pkl create mode 100644 surebet/tests/handling/unpopular/sample2.pkl diff --git a/surebet/handling/excluding.py b/surebet/handling/excluding.py index badcadc..e0b1a0d 100644 --- a/surebet/handling/excluding.py +++ b/surebet/handling/excluding.py @@ -1,6 +1,11 @@ +from re import search + from surebet import find_in_iter, reverse_enum from surebet.handling.surebets import Surebets, Surebet +women_patterns = ["\([Ww]\)"] +other_patterns = ["\([Rr]\)", "U\d+", "\(U\d+\)", "U-\d+", "Reserves"] + def _del_equal(found_obj, posit_obj) -> None: for attr, found_iter in found_obj.__dict__.items(): @@ -20,3 +25,23 @@ def _del_equal(found_obj, posit_obj) -> None: def exclude_posit(found_surebets: Surebets, posit_surebets: Surebets) -> None: _del_equal(found_surebets, posit_surebets) found_surebets.format() + + +def exclude_unpopular(surebets): + for book in surebets.books_surebets: + for sport_name, sport in book.attrs_dict().items(): + patterns = list(other_patterns) + if sport_name != "volley": # volleyball women teams are allowed + patterns += women_patterns + + for idx, event in reverse_enum(sport): + if is_unpopular(patterns, event): + del sport[idx] + + +def is_unpopular(patterns, event): + for pattern in patterns: + for teams in (event.teams1, event.teams2): + if search(pattern, teams[0]) or search(pattern, teams[1]): + return True + return False diff --git a/surebet/main.py b/surebet/main.py index 6226366..2dc3169 100644 --- a/surebet/main.py +++ b/surebet/main.py @@ -1,6 +1,6 @@ from surebet.bookmakers import Posit, Fonbet, Marat, Olimp from surebet.handling.detailed_surebets import convert_to_detailed -from surebet.handling.excluding import exclude_posit +from surebet.handling.excluding import exclude_posit, exclude_unpopular from surebet.handling.searching import find_surebets from surebet.handling.surebets import Surebets from surebet.loading.selenium import SeleniumService @@ -29,6 +29,8 @@ def start_scanning(iter_num): exclude_posit(surebets, posit_surebets) + exclude_unpopular(surebets) + surebets.set_timestamps(old_surebets) old_surebets = surebets diff --git a/surebet/tests/handling/test_exclude.py b/surebet/tests/handling/test_exclude.py index 3934574..6bbbb01 100644 --- a/surebet/tests/handling/test_exclude.py +++ b/surebet/tests/handling/test_exclude.py @@ -29,7 +29,7 @@ def test_known_result(): surebets_other, surebets_posit = read_sample("known.pkl") exclude_posit(surebets_other, surebets_posit) - with open(path.join(resource_dir, "knownResult.json"), "r") as file_result: + with open(path.join(resource_dir, "knownResult.json")) as file_result: surebets_result = json.load(file_result) assert obj_dumps(surebets_other) == json_dumps(surebets_result) diff --git a/surebet/tests/handling/test_unpopular.py b/surebet/tests/handling/test_unpopular.py new file mode 100644 index 0000000..4420eb0 --- /dev/null +++ b/surebet/tests/handling/test_unpopular.py @@ -0,0 +1,37 @@ +import logging +import pickle +import json +from os import path + +from surebet.tests.handling import package_dir +from surebet.json_funcs import json_dumps, obj_dumps +from surebet.handling.excluding import exclude_unpopular + +resource_dir = path.join(package_dir, "unpopular") + + +def read_sample(filename): + with open(path.join(resource_dir, filename), "rb") as found_file: + obj = pickle.load(found_file) + return obj + + +def test_sample(): + for sample_num in range(3): + filename = 'sample{}.pkl'.format(sample_num) + surebets = read_sample(filename) + exclude_unpopular(surebets) + + logging.info("PASS: samples") + + +def test_known_result(): + surebets = read_sample("known.pkl") + exclude_unpopular(surebets) + + with open(path.join(resource_dir, "knownResult.json")) as file_result: + surebets_result = json.load(file_result) + + assert obj_dumps(surebets) == json_dumps(surebets_result) + + logging.info("PASS: known") diff --git a/surebet/tests/handling/unpopular/known.pkl b/surebet/tests/handling/unpopular/known.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a1866982c123565e463b233e0336cba8683269fa GIT binary patch literal 4584 zcma)9d0-q>8Q-SqwN0BI=?SDmdQA#Bc6(UVy4xJ4Hl#pf?SNhD>}Jw8&1NUx+ufw8 zJ`F13fuJH{tKxz8QEyvNG=jG+-qu@CJP{S8tsDa1_szVS4XuIsBYE@Qcf9X6zwg+w zskvfVI$5XGEbKyG-Yx8F4mOJMUjL*Rhgjp)FhxO?H8&QEWj$}8xK~TT)C^RclUQj( z@yOCNv@~784AYv9rixxJ=SY~Dfm!@satZ~v2=NTep3G$Duu~{SHfzx490hYtYZ}@d zvSq0VwHcT<$)zqwp~%2|G_XLyLer`a3_x9X%$i9z3^0I28CX2oodRodSc1luDp+P( zRj5CbfaR0iqund??Y8$hG7jgV-uVhH@GnuY2OX&AfJ$D;&cg}@sNc?&JPAfu>M=Bq z58pV4R^Y;}^rkU?kD)F1_qzw6!J2}5yI^HG>)$5{s|2}PtzeBQ>ct9Sbfs%g#3JSGR!8%hcXPXx(xHz;)(c?olH;nnq z27%WQzW*i#&7tfRb|0T8i=G9#Yw+(@&N}6+9k9e6EGD2O1Ffc2ho-xwvp?q!c*Ax} zuUl|!T0z39#iJfaBB_cky*W`X%OWJ5$V3v>XJCVAEuCP(Xp4*(-LTPG7@09TwIQPe zHsOT?aqn3QHk$$&cZn&8O3zmC98(~J1d`^tCd0Hv!SggsDFx{;bYdJn6ng8JzpR95 ztAe&rmXDt*8^E+?^A-1BQ-3V@D(wn7w7X6P&qoth9M#=I-(KcZ0=nqKS9o9GTNjny zw<&l5c_-P_6A?)w*}(hiL-ZUDyl+>~t-W8Wpyw>_N!URaA0i?hKS8lKp2PcP3N9z_ zB%5B?&(B2JzdqVXGzQ+UP_R>b?^WL|<*Mf|rtgPQZ%q zuu|WsB3Wz&*-$p-M@vXVvH=on%Ls&dIzS?)pijGV6!eGTMDE7*vOz?l?^7T`*$Q`& zY!Gzl4uf_CCZua}i|7=EmL7XRcyhok*s$Ls!gLPg%Z0v{-jY+6#&)B}*bW1Zz!c1z z{<$a^J1f+TtuScK3l)tXtqKL5d7gqH&OGolPbw&enOE{LFDn?gr!CvbmM!9gvXt|5Y%rzgq=nAcG0jBg1rf0=@pYj;Y)D?%J+6F&|eCKDlrDlmVg zf-xw z@DlKP&P?{P6JuH>NahU+-WbXx;kt^9L>aRyb)TP7SfT?ayvelYAgJ4lWuv|6%9dVx z(3NmKf9P%QKD#K~l3j4I)Jg~2)D298H&^KR8b&X?#ac{uA{}E#pk=hfTZwpF(S?^A za7N+A47`osvvzU7DZxz{cssujd-*(`-<*MWaM3=b;4o9?ot&-jQt<8 Mskyhp)% z^{0HFg7?$sp=?b(8ZoCmt(ik?@GHly3O=A^)3s4+i)5({V$DcytjyLAD)^9g_hAJe z2~AXovV|8vZX&zsf~Jnrr_&eCm4TmoNURy{?T4!(%&uz7}}nKc1NnF;+~ z0q0#f79J$wHeL_f(k{X6h!0JvUsCX8Q{ZTN2lM4C3ckvG8E3wHO~IXErMZg%{knp0 zXh6TI;9CR|w=No=@IjDL_wXu)g(jlQw-tOx%cdR=+gycg0H}FKP-rm6Yy$MV{=|Eg z-DgYqo;3@NciTf=aoCnF9B1fGREpfqD2F1(PMnl-2VM9+;UsYC{(;G{{!qb>IMyF? ztUpok(=gVb`MGgY!OwNDzfkbYkP)Y%G8c@a6RPU36#QDtrm48&L+puU{i>RJfKubs zT>)8tqu{sN-R~6KqfMlq#@)RNey?SpnLCC8vHe3(i|o<7eX!+HdoY`K;67^+-nYXo z<^=vya%4-lQ!F|8tmh8G9~tjInT+u^R>PL%D`Knrv$br3Dvpwove5&7LB|aKUlsg~ z!M~ru|GR<*Lh%2=MEj?Le`zotRPgUGz>4HQ*U0@J1rJ5Ces&V}k!;Y>8prUeu^1O~ zCfX@eBt((}JpDBD@|piTFaK9Z_^^UUIKoFc!p9W+FO2YUj_?U3WB9uz2vSbLwUSl5 z6O`b|o41_G8r2mV={ZqZi`sn<@LWAW2AL(NvF7!H`;weaZ#;@{J2P}YGkWC=Jes6I z$(eYLaXaG1jGTpQaZ^Ud>Hp0cIh+0;6~}~Me!RRpI3#Ou)2~2s4lb3Pi+`yO$yyS} z-y3os@6YG`+KgO4`#5pRg}h(K`~I&FxrolN+RMd!wuI04$C_NqJIi<{m~tzo9G#jK zW1(Em8s}ALq#g;&)p(>43@Yo6FvUU^$@58*D=k+nc>%tkyCTVYJR-Wu73}S9p-YrB zaD%|Skp2@I4Y*KpCCecit7xClSk3!uct1d6Eu9e>CZ9F(*>W0i`7Ll?%^xR^&~$dl9wAulxjD!Y9p?RRh#JlQE|JtZNJYY IrSMqQR&4ne^gcEJG_L>P`HVM2Old#Pb(ruS9PEbN*x zOA>P@=1vqf2gcmwPRuFM+|eY)B<4;CxitwVfmuvXmz^uS5{y*hiT{Q}uTn`_g*h*Jdem~5yYH_3mF0N$#a9Uxmpp`CBFwYc?VwNyc$%*&V z^MIJ0x+m})QDP*W2S zunkt=LOx|H6sOwO*aQhgn5ksbvdxd&)Oa_(P!LH4t8@@;3fe+)Mmln~Ys}-z?Y@;wE+hDobenAAwU8^9ams_h~9pzk)Y2{0uyE*R`SWjux zm?!SNdDnbeeLkg4iN{v&P_ThkXWKD>L6vQ=`icf#K3ILDg6s6^n-pA6F1XNQ%V}?O z1eRF9@MZ;DLfcxb@BX7}F|;F*E!Ht{I{}GBb}Lyd16yF>rA?~=!|RaF-dw)h?X{cJ z`BL7dU$B*Ha-$*L2tlvO2}YbJ5;62IR)+$SLU5w#fFD-k{)(I{h~(cr&HMO7tv?#uiGy{RoE32a*n%pZ;4Eyj9yKheT9G zwj>>>(vSK2gfFB%H!FCXUheG*-l1J2PDT~jo;-KCcPe<7woML?Ef?DQ%O!{U9V9xC z?>715r1sgeGjAI^q?;?)`;2X^3*kN11YD_YRoZCF+l4-_>~{0`dwCDuXIfKnx+86z zgKcVA4!3X-p9Q9gTX}n^|I_uP1rGAobh#zG+Q4lr_%zW9xAX6!E$uSgf&9>g2qfG0^6ntFM`UwS})Kn!-lJakUly-6H z0ETpytbR(tVQrfn5p6+a8_4QJUjw5e{&a9i?Y@HD-@L&tW(y8{#+r;Ptj>Emf!8-j zHg`B)*(qe*d=Wm&q<+q1&et-`*cbqf_3(LXdKDBUE&>}H;R{;jzNp|!Ovaa)jJp+l zB_!jkeocQ(!Phk#-%#+)FvAyVarY=V650;&8woLYPK%qTk2I72Ed}@Lfu{E$Rbpi=&bi|l>{4=}Gk zHksI;DEMhe?1P->&lEhQ6aBe@Uudoprz5UrP>r3X6~9#Qu(nOsMpuW(HqeTP{5h^N z4&qk|eoaBtl4AXagZQn2-{~NJuiy`n+zb>S0Gs(Jy>m{&Tf1AFI;kztF)KD8_$fLjR`V?@Z`FIF)}Y_*cm1Bb>^k3jVEA z`HzDCQg~E2@pim$K=T>hPL3=1pSDfbg=rs&Y=d?@nQn$XcSfi7F<%VZg(JId&+E=R z#zwohr?06M9=As0N^KoDAoG z;}HW38PO6$Bu+(~MTj7PI^F1b3tAbxh{1jUi+N;8;Evl|txp;Ck@j#!6 zbz7FBYZ#wkyx)ps5?`30RXo?mbAEj7JhGZcf~HJ_g355T2>W2=iK|25y<=a!n^0*|J-x_d(OG%O@DbR zUli$Nx-ic&bIt9UT+u8gsXBCtuV6ZWOmO5PS zX&Qo>hDsP_#D}7)yq!v=1ym$pIFAeITrQJ`$^?uUNagCTbgtm(9EmzdNf>R!hoH_* zQxx(rCIMpyFlrMn@)V3i1>+@5Fye!?0;p;X#4E@`%>hhIz@&j}(%LFvGAf%Q;W8sW z2=Uz^s2;$MdOO#=-E2*ZO1KZ^9%W5 zi&LltW;R3@_v4O%fTFh8(vs;OFn zN#~+|R1gSQvn({jQv26PnCGok&Xlgjoh{sMX5aCK!&STWcJn1HaJCCc2&0O4CF;)P zns+f#Ay`Nr1&k9Z_6nH3hZrBLKk{FKrz&X zup1+&M?x>U$R*1TLm#Eqa1|*`1FhLD;dWY6jvm>e9+0>wa!S#`i4%E-idyHRp8t2K z;@Tw8-8;Fst`*s00s7hXJ-*qye6x4EUU%8;?Rds}6R?lHzDL5n?Dc)@_5BhaaJ_zz zy?#i-o18O#vxJA~+^FP6&UkTOA4Ly1?y2agGk%MNw>oS6<=I2b;nR;O+5oxKCYeELfK@+k5i`JInHSu>0>c;*)W@F&4Z4 zTN7Fe@8Z;X1DGb>&GgXKOPxUo_H%t{v2)l}zyaowCc_C*{_#`T|+aM;oRDG8r;_4mMMNPh)a8Z0$h|7Rt9 z&eb0Z!{=GQYXm0ZP<0z={a=vqMMwXaBs_swi-Y$Zm3h8dmEm8O@D-Zm@E@!0NM-QK zsAD}D{8b5GbAs&a623tgzSrq)!Uv0k<8R3w{7nf*ytT@!(zW^)T&DU1ZU46%cyq#f+O;I3BPn~`jv!VyKarT7>k|} zzmf1;ckML2GX}#2MkHr&-Q+5grrYy(1~(5)*p7GFq7bYzTg+T5{60PwwX70mtDRew z4(>4RVy-z2f8YXg?BbZ?TsRaa*zACgJ`R7x!of-ZCkcP%r2h*O@K*_cb94UhD(C+p z;h)4nH=fW75?&;)I4!2Qd8TsaUlRUJlbld5yMct2QeCQRbP)eX!b=XN|CR7R!thyD zD20>8Z5yaOp~4aS(&+LYOx?&yV+T(axT zj-*{I*m+`UJ-#-ZaEh5d?LZx5KIlNtToQT~H%?plE%geM{Hlc4nB;Rz@_8u&_-Wwq zEXsI*2_OdHn-t|d=DuDGX2}qi=*lv1cQ1xAVwfK>$jesmdqC&F9H&vCXcrX>9?oE8 zKqX5?Xm#8+`x<<&M{@f{F_ICZN)YbBdW|-BRal^hQNPu2n3IqWON?e@tyM?-i!n4B zuuB$Waml3`WibvXISh6r#CUwC0}&JOMT#o?OUEK6@(Z@lViHeG=CNv`#S|PcH<$5T zHP0z-E@#P9mgwH%oJV!FPI^^tIE2MCMojl1qGfI|i+Tv%VLay~5q8!TI${RngN#>Y zO3cIo+c%5nX7ik~?+TV&$&w3#$9FAJC@A~pFybm7B67|vpr>7rw0-(Z*sWc{V5DVn zLo4Re$lvygt5M=_YsEDv;N$5?h->kkkEaG-q?m_)`FQ4&oV;Ja@(|1Q>4a&HB6lIr zE#f)l{bH6}#}f6y=l|ka(9h2Jzj(y;j9B7BM9bYOf5b!R@1BWHnbF@pVksdx66>wB z$eQ_lHiPBF?CR{T35#Vo8y|tY)-|F`WQv`3ySM>`9Oug!OP@#{)`hMhC^e9^eRfv| zen5E&R+0iLb9)mag74OT>wu-&J-a>A(J7)R#Ph|DPt@Y86m|HQ`aV%l%PAdJv3xbl zF&%CsIq}`V@--~ie6OWB;`=6^Yveh;8^k)6tY?Y7A*xGn2(f{eY}89GyeD4eCJZlR zhtW;6lo`E9?G~GHHa?sfb;`Hcf+FTK#+#h6s1%!6ip5qGSXcy_=zsEh8@@=fo!6n) z^lJ?_zG4SY+{|O`_ANXhzjyLvg2yV(Ze__Xmgp+*imGYU@=QVV@X<|&%Cg%SVfql! zA#R-c5c)csNS~4?j;h;JBpIAyuri>TC27C9fSa_wqT;1_3nNK5ZZup^-GaHf&zn!3|0nov7}q8<6E*!k%kN$p`K;*pdn&#rr&-guz z*H_V9IN;)NH_z?mIc48ImfXVA0kS>HsV7)pNCK<6nWM; vAuJu#MvMCynYG%v96UgyHoAV={8!F{D4@#u5dFX3dc-=|rs`!npPctPM>GIa literal 0 HcmV?d00001 diff --git a/surebet/tests/handling/unpopular/sample2.pkl b/surebet/tests/handling/unpopular/sample2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..99ab1c7c5d79f3ce43ac4063c0c71f1196f67cf7 GIT binary patch literal 6875 zcmaJ`33yc16;6mCMiGRd5j8w<;YDOg+)%=`H(BQ8+?fd^dKDMk z_kG`RUr^MxwzVzQw$@hLTH9*vrrle+*jf}7q34`?-@A7NGM`^~@7(jBd(OG%{`cJH znyO5pB(iPU;91&mC?Zv0-%=JUA%RHtG0p-j&0&*qE9<{oHsl!QIq z)Nr)f=Zj(iMyFxS5Kb*ZC1YSOG%!}eI5)M6HUKrP6{(Trq0Rw}Ps83r`J}B?!#-%N zR>Hn+Y8c{IHNb=+{AdpI?aTbmtf+?l5VyaC1C$eazc&kY98fVV`aLj_6V%~nilG3f zC9wsK_&^quHrs zXz##GoHw!6%o~|inQVmPJN*Wony!-X=bI!(gjNY`_2r{5ivDx<5CDm?U zN0=e}FTl(s-P9I=)14iFt&^3|5$SrvSGW{;S^04OaO;fxVoo~U)UFsybF$T0x-9|tDRnZD3 zwymvu;k5VH(hAR9gI;htOf_6B;hIR-A`{Q((r&_P7)Dm=Uf^`F6W2<(j-05ZHM!np zFK&=P zI-{#sIL-YirVMqv!XLw9O!lyg@buNPnGSstLouNF5G0%7Xwbx&kV)`PjT6= zcT?lhpY7pP+fOqsE(-gE-|pBZXz&@PMQX>WTWo+0OcEVo9y?8VmfIQ{XoTnZxz`td z5uV3YqW=!p|^X?gYaI_&L>Yjp|r1_Xh{0^9u=Ykxmtc7p`uEw`oIcR>?ypB$DZG z5RWX@d+L`G-l5zR_E!>q9ot_Gzajf0BNo>7e=Fg4vHgZdc$e+RN@%~D7`^X7ZT~$9 zzmM$yLBbymb~XHou$r0T0RAlDFR>Gi@KX+9o%bdD1EH!Z zrp69`R69ESvKI2668>d$5masWC@ey-o4UuLPs6E{$5m+1@|@Z6cj|<_cnAQB6oD zhb4Hs?F&WGY4SV#e5cqQM^atS7Vl=_A;;=avWAIsuy~qx>=wd8BaN5SPyoTkfR~O^?h4QvH5AMV7ythRyOZH_+ zUBJ7lHr};N9by8j?59=Y znnq)^T+?}M29K#+Hn60TCA!SbrrObsTpYoOBg+tmB+MONZ~Q5IOF1V#62(!BoE`WOwL6LNEsWO` zH4k6dzms|F6dqIlwX)pG_2mZ}x!~zRpw=)Kz{o7{w#}u`YkwL(BK(UD4 z?36({m-ryXdH9!W?R*J-N7URIxjJU=|3_(WTxH1Q>W$<+jRt{Xxk{fJuRs2x}@C^0# z-$4d$WW-GtB4O=NgV3jL4fku<5OD$C%-~xXtQ@$NCAV4T?1Y*xeI^69GvW>lk@z(3 zz^(6y+JP$jBd)lU!FMrOIdC^i?$PGdS6P)b%)mza-vIb4W1M>#ai4{-_|`KB%{W!+ z8?Vm5{S1D9!ODSkEP2o}xAX6M{Jljy#E6G2gq?x%uZH+3Y@7N-VtVFB82>2a^?~#l zzVHe?&SOvTnDXyQmORB0y-7ZfkH5qRae~YWt!Kp37Q&;?^!TI3%pmlE^hUHoNFaP5 qJ;TWEpod9e1HE<8A=G6bJkO$l`13jXcSG=e@LZQ-&1|7<+W!G4)$0lX literal 0 HcmV?d00001