From 717b163163b60e931900f93ab15ac115095c43da Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 19 Sep 2024 21:30:08 +0000 Subject: [PATCH 01/11] p Signed-off-by: kevin --- scripts/__init__.py | 0 scripts/__pycache__/__init__.cpython-311.pyc | Bin 0 -> 167 bytes scripts/pipeline_generator/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 186 bytes .../__pycache__/utils.cpython-311.pyc | Bin 0 -> 3138 bytes scripts/pipeline_generator/utils.py | 53 ++++++++++++++++ scripts/tests/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 173 bytes scripts/tests/pipeline_generator/__init__.py | 0 .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 192 bytes .../test_utils.cpython-311-pytest-8.1.1.pyc | Bin 0 -> 7918 bytes .../tests/pipeline_generator/test_utils.py | 57 ++++++++++++++++++ 12 files changed, 110 insertions(+) create mode 100644 scripts/__init__.py create mode 100644 scripts/__pycache__/__init__.cpython-311.pyc create mode 100644 scripts/pipeline_generator/__init__.py create mode 100644 scripts/pipeline_generator/__pycache__/__init__.cpython-311.pyc create mode 100644 scripts/pipeline_generator/__pycache__/utils.cpython-311.pyc create mode 100644 scripts/pipeline_generator/utils.py create mode 100644 scripts/tests/__init__.py create mode 100644 scripts/tests/__pycache__/__init__.cpython-311.pyc create mode 100644 scripts/tests/pipeline_generator/__init__.py create mode 100644 scripts/tests/pipeline_generator/__pycache__/__init__.cpython-311.pyc create mode 100644 scripts/tests/pipeline_generator/__pycache__/test_utils.cpython-311-pytest-8.1.1.pyc create mode 100644 scripts/tests/pipeline_generator/test_utils.py diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/__pycache__/__init__.cpython-311.pyc b/scripts/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5c7f8f2a99b65ad67d4aba6645245fba7f99d52 GIT binary patch literal 167 zcmZ3^%ge<81R_)4q=D$iAOZ#$p^VRLK*n^26oz01O-8?!3`I;p{%4TnFDw0w{M=Oi z(xlS7l2ZM$oSa@J%z~0){rLFIyv&mLc)fzk lUmP~M`6;D2sdh!IK%+pG74rj$56p~=j2{?aL=iJk3;;|xC`AAO literal 0 HcmV?d00001 diff --git a/scripts/pipeline_generator/__init__.py b/scripts/pipeline_generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/pipeline_generator/__pycache__/__init__.cpython-311.pyc b/scripts/pipeline_generator/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2750a53e48795264d1de74b946d95ba1eaccef7a GIT binary patch literal 186 zcmZ3^%ge<81Oijvq=D$iAOZ#$p^VRLK*n^26oz01O-8?!3`I;p{%4TnFJJwP{M=Oi z(xlS7l2ZM$oSa@J%z~0){esMb)SS$`)cEw& zywsw^lKdk5`1s7c%#!$cy@JYL95%W6DWy57c15f}(?RwZ^8<+w%#4hT9~fXn5i?K> E096bx{{R30 literal 0 HcmV?d00001 diff --git a/scripts/pipeline_generator/__pycache__/utils.cpython-311.pyc b/scripts/pipeline_generator/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0b951e103c95162898ca38bcdb1bed0ea696e30 GIT binary patch literal 3138 zcmaJCO>EoN`B9=J>fe_98>gO!J!}+l9m}$qVh;J zu^_iR z=+=O@@k1J+NAS6WO^ON0FQvvj1`_*$?(d=pfFA6klTvbv5mWbQc1RqQ=pNl6sb5S= z{aXQX805n*$wxpwDh-MQ@Qt17M#VFrJ0emWXlnd>a*pzp&t_KZx~$fC#imQLrZ-1( zSXK30R^WwgDYxCIRODJ^ix;HkQ0@aosb=n|ct@)PBd1H6t~E#Mjhl)rWF-M-`8!(f z_GSGoZI*w#nRN99IYSgPSgNbdC{~54$G2&IW_r5$TJ~l`R>U1ycbCl6<+`NEH7UEx zSC!^Omsq;1OBl}Ttf;y2(Jpzf<+&~iZqzc>hN8=vnkq_;eOB8RI?(_G^1~U=FT!+y zBt)`<^@$Yg7kw<+%3)74mL+HIuD!K!Tibsw+I#v0;YdL&&mBy-py^RRVmlF*{JCa zH&%75-jW18*9lZckaL=V<+`rrJg0Nu1PSxHigOKJRXbuj};#? z7OlYx#)_3VYb?1SGGZ)P@e~N+BgP_m@Tj+j9jK`sxX@;ZANAe3WHYr!m9R8BfF-?w zYl0uNz2O6d(9b^xaDZ9}M|DpI^DQspXaltKy3-&=xpR#)9K2*o_Qw zeaGQtK)~TtJ<#ojJ#gD$$LYkt8!hw@_>VfWJt(=B585x)@_&vV(BL+Ivsh4TA4phF zlOv_QBcO=sYs=+DZee-drf;coZHmU{;c)ne0PINDeeHoBhj5yXHF9l;$!SoANKm#K zQ2E`OcnXv*z^DBLfPubCo;Q=1zj^(#am^Y%`$gyvp~K9x(dpySX_FqZBD4GBW@MJU z>6uf6o^?xM671~|Nqo}EGcSz3g($GUMSltEK3Ot1U zA9c>qyDrIwvd~EuSb`jo)XQ^HEeEIqSA3t@gZ|y3Ac@ce|L=VFLtsXoP?LBBbRhV? zEHptci^xu=0XzxMEAS@-N&QMQM}q16I2>>Hgfu&^r~9A;IwL4p!ys zQ$aT^Ne*E0pK_W4{t=NUZZ}k25?|D6vg_R5JxN0*sS3kwA>;q&g7^|>leM)90B9%9 zjh?&@mv}Z@I36w-tN(-s!lX>*;x{7~zhxpHFMkrhA1BDtC$al6iy3Q2f@cElC<>3Y zV}P0TaJwHx6TiLm>r0;&9{A1Ia?b{*j|ZopMP`m8GbTObIA&_prX@&m+owWnhgJ{W z29pfJq}J@<(p7G0b*1RKDEiA%X@x5m*17BJB|Fxk){ASa&{^R(M=+cwmLbQ%Yx&J3 zd%XDGW^uioFL6ux^@SVx^;7RV!sOcWTCucTE^=3wOGS?G@GMy%zOh+c+u$}=xpzvd zg==_@FzF(k)oJH8eN$Bx+Ye32X~s_3JAQQ1&aGh5yHA*T_g8)eF9J!DS*rtV`zeaD zP{eQ^3xy2lvCuj5Rc||hA{UIHHJmX~%1XRpM6J{h%>G&THDXp|%wVj@fWd%f>`fzN zO8{aSFQr|#t7T_75!ete`q pDLme7BS62_oXNbwy7QLo%x#1|qTca&_@eJ!4+oo5ZV%=} AgentQueue: + if no_gpu: + return AgentQueue.AWS_SMALL_CPU + if gpu_type == A100_GPU: + return AgentQueue.A100 + return AgentQueue.AWS_1xL4 if num_gpus == 1 else AgentQueue.AWS_4xL4 + +def get_full_test_command(test_commands: List[str], step_working_dir: str) -> str: + """Convert test commands into one-line command with the right directory.""" + working_dir = step_working_dir or DEFAULT_WORKING_DIR + test_commands_str = "; ".join(test_commands) + return f"cd {working_dir}; {test_commands_str}" + +def get_multi_node_test_command(test_commands: List[str], working_dir: str, num_nodes: int, num_gpus: int, docker_image_path: str) -> str: + quoted_commands = [f"'{command}'" for command in test_commands] + multi_node_command = [ + MULTI_NODE_TEST_SCRIPT, + working_dir or DEFAULT_WORKING_DIR, + str(num_nodes), + str(num_gpus), + docker_image_path, + *quoted_commands + ] + return " ".join(map(str, multi_node_command)) diff --git a/scripts/tests/__init__.py b/scripts/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/tests/__pycache__/__init__.cpython-311.pyc b/scripts/tests/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3893cd9d09595b59d6028908248103d931bd907b GIT binary patch literal 173 zcmZ3^%ge<81UII^i`MIh3 zrAeiEC8hdhIXSty1x5K;smUe!Nu`-NDcPANsk+IT`o+mbnFS@q`X#BwKr%i)GcU6w qK3=b&@)w6qZhlH>PO4oIE6_lYmBsu(;sY}yBjX1K7*WIw6axVMrz>y( literal 0 HcmV?d00001 diff --git a/scripts/tests/pipeline_generator/__init__.py b/scripts/tests/pipeline_generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/tests/pipeline_generator/__pycache__/__init__.cpython-311.pyc b/scripts/tests/pipeline_generator/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ae663ecc8366f1723f3e198bc9665bdb85ed5d7 GIT binary patch literal 192 zcmZ3^%ge<81gECFNdwW3K?DpiLK&agfQ;!3DGb33nv8xc8H$*I{LdiCU%~nr`MIh3 zrAeiEC8hdhIXSty1x5K;smUe!Nu`-NDcPANsk+IT`o+mbnFS@q`X#BwK(ZjSAT=j5 zFEu_rH7~U&u_V7pKR!M)FS8^*Uaz3?7l%!5eoARhs$CH)&<>C*iur-W2WCb_#t#fI JqKFwN1^_(eGavu} literal 0 HcmV?d00001 diff --git a/scripts/tests/pipeline_generator/__pycache__/test_utils.cpython-311-pytest-8.1.1.pyc b/scripts/tests/pipeline_generator/__pycache__/test_utils.cpython-311-pytest-8.1.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8abe5bd34a16fc6a85db361a26c7dc818ec882b0 GIT binary patch literal 7918 zcmeGhTWs6bl@uw8k}bbqveQ~Y*E&jSJAUNhq*<3G8R9HSyV>plWdp0|QnGBlc1fj) zCJjarU>UHFU>Fb&OM(2%YvFzLcYpibC!IN%Kx{yPVIKkZp&;8Z%O=Ji`z{SE&xL`L>m5FwHKTzpg=BctnkTG>U?c0JUw+$yd+%tGa z{E~IXjyy+#n=#%LuV*wp<~O_3s;(r`YF<}If#fo03UI>y&D`?;VOK^w*~lZq_B>vSP?4%+v}>o;(y3%zeLXFv9|XH@fClHCqAT4 z`L;mkwyl>4dZ5X{acef-{eIoyw1`W#zsaLF2XJ*iHR|S&nuM)6@;niUcu0E&4z{P_ zwZctjTj8Yx-S8#1shjugKr7uII?(MWQk%LxUl47j+d~JsCD%^w?U3lc8xm;KLcuMs zrG*|kurNU6$5I(At4btDLKP{)TeE;}duNVw-33Glc2ojxt?t>!5F_sLRk7+P+!7b- zxcM3X%6H*Hj4yxl&11!Tr(#;@M~y-BLKH_QW7;vi&cw90EyQedFc;Hep=$I3qdODR zD1J-_ef2TP6hIP!83eNk<`A4haHiaq&@`3mmi&d7hRB6j(2{3{zEQkqIkp5fdzEAY z{2Wz_6x>`fK~=L8mX+J34EVXcW=h!tP-y0eqOBH6S#(ZCs-&}p+lj1_PvlfHsA!3l zifFUzqNO!c$X}tfK+S$7RiL?qu7GRHCh`d#R$}r&H)M98R%Z0Pb8?zZn4GiBI5wF% zso^t36|25ta7ujz| zT-K97NQfYYBpP# zD8H2?QTF~Jp;k;JRra!-k2>jI4IW5T4m*fuCS23miSp?~37?s8>6)FOF}NRk96-_$ z;5qz&aQ^aN5bF0Q4DMvEDpZ7uuaPc5>Hz7&Etku5rVFB#E&%=i(*@XHIR4wx1=o|? z3)!B~`Bf$rB=Fd81!+H#NwDH?gly>$AzN|>Y4@xVvW2Q>h3wExigg!6>;(+V^Ua72 zgD;8LjzE1H0ySo{d*7Jl6=HshbsELOj1B_8AUu!Yv@1NHW#RcOgl9|2DAJE1ID!D{ zABv?89Y+vFa1=l+;L+!dnsd&{DQ5^hyTnqszW2o6;W8WAJp6aM}mfiI#&u0`C^|HK2w|`>Grfs!**|eZ*fC zh*TAw6&2Qr;OIpXsBlEytKEOak-#T{g$;VK=>X)vp(l=d)3$)q?Me?aY}?U*L-f1D zi~A$3N;+1vje}LMXw?=-+qNZE1C>BUYJ?=YB0uzLgF4=G<5;uxk&cQCA*mDT_l2ae z)1|E0ng13@+3S#46|7UXZJ%y)98keJIhtK2t~CeLDq+%fNP7lc_iEeY4K+|F9cYD@ z4s>^s?lyIMD$|y_J#?TOx=U^9_Ee@Vb$jSQcMs`pQ@1B@w$$yR1Kqu(uT9;aI=iKA z4;|?4Yw>1yYF^UcriGq>+|oi19asq6#mBOh1_a3<8L9*uX+TIHZhqz9SDf!R)@9aF z3AufWd)7z;0#(^c1BP#we@dVsNHe!f(1c@Epauo5)$D^x@O+e&&C$s_(Mc`3kJix{ zggQ2p&gY;ZcnChb0)^@uYH>9SJmuH+iFcYpPr&^>$8?)w`=WeQP14C?LSJPSMKUvq zQG60=^1``n0>`iT32M=I0sSjJ+Dv(tG39|y18Y*i;7d{-)~$dh1sgzq#0JpgaAZT! zZJ_9Nt?b0?aMVg>5PO~2I@PTHrP)gg$#s<~nH+Rq6||ovPO>L)qC+Te5&=GQia7|y z5LE7Vv^i#=p<;^A5SfXEDSBj6Tq|VqrjSb%O#vP(GwZPFRR>G27EsqY1o(>4^C)-B z?$kP-d812g-2r_A$s8?>v-C~GSgn;t_hMEN3}VeB;}-p5BecT6fRJ)!N8;Lmd2@{CZ99ugSeNxu+&a zYV!Dl8=FUWJC7LhIQt!ds&~t8NQO9G@8gC?AFcj;^%L#abH=etJL8vk$1m>;UD+MF zvhm(u#H!)`?TFP~2j*#W4ESg6B`2fJ#c1p)&@c=@8@V6DM783DRS&Nd^FPSTHp;mP z?Q9*B!?w|eu;DNUu^wIagRm*>ftA6UvZCY?nY^NueUo=$QB!WVBdwCij3~?#PqD8b znu=3Y{fI(;&kWiqGtj_M{NhcCv6vG4*9Cev6q8~z+UNyN**@6r0!~>CK-Pf8bdxy` zESDO-C6}PmIl?^=?KtMcfcvN@cRJ^ciREl;tyC$?xO} zq?A>^PUFCjNx1fJfOUcA`5O9&7JuOl>#xQQ8}46Slz3mAYZvrzKKcIGpZJld{76k) z_#zm7pg&yRJhwYGXUOxlVAn%wQ`}s8TrlJ*oQV$;o1?ptw+(sHGpz^xe(8l@_WAz7 z0jT%!{5Af`K%K+u=l$&Zdx2ez4?biACw$TGX0A&v&2%h+K*_c~5(#w&7jT|{tt8PY9t67 zIN^(4H)CDu=YP!CIR`w>iJcG1PlbpfL~3H-Cs#kd`ry`s4}NxQ^J`CJBNE#QAKwih W-w{vjiYJWbuP$)nC@@%W#(w};B^@~c literal 0 HcmV?d00001 diff --git a/scripts/tests/pipeline_generator/test_utils.py b/scripts/tests/pipeline_generator/test_utils.py new file mode 100644 index 0000000..5d08973 --- /dev/null +++ b/scripts/tests/pipeline_generator/test_utils.py @@ -0,0 +1,57 @@ +import pytest +from typing import List + +from scripts.pipeline_generator.utils import ( + get_agent_queue, + get_full_test_command, + get_multi_node_test_command, + AgentQueue, + MULTI_NODE_TEST_SCRIPT, +) + +@pytest.mark.parametrize( + ("no_gpu", "gpu_type", "num_gpus", "expected_result"), + [ + (True, None, None, AgentQueue.AWS_SMALL_CPU), + (False, "a100", None, AgentQueue.A100), + (False, None, 1, AgentQueue.AWS_1xL4), + (False, None, 4, AgentQueue.AWS_4xL4), + ], +) +def test_get_agent_queue(no_gpu: bool, gpu_type: str, num_gpus: int, expected_result: AgentQueue): + assert get_agent_queue(no_gpu, gpu_type, num_gpus) == expected_result + +@pytest.mark.parametrize( + ("test_commands", "step_working_dir", "expected_result"), + [ + (["echo 'hello'"], None, "cd /vllm-workspace/tests; echo 'hello'"), + (["echo 'hello'"], "/vllm-workspace/tests", "cd /vllm-workspace/tests; echo 'hello'"), + (["echo 'hello1'", "echo 'hello2'"], None, "cd /vllm-workspace/tests; echo 'hello1'; echo 'hello2'"), + ], +) +def test_get_full_test_command(test_commands: List[str], step_working_dir: str, expected_result: str): + assert get_full_test_command(test_commands, step_working_dir) == expected_result + +def test_get_multi_node_test_command(): + test_commands = [ + "distributed/test_same_node.py; pytest -v -s distributed/test_multi_node_assignment.py; pytest -v -s distributed/test_pipeline_parallel.py", + "distributed/test_same_node.py", + ] + working_dir = "/vllm-workspace/tests" + num_nodes = 2 + num_gpus = 4 + docker_image_path = "ecr-path/vllm-ci-test-repo:latest" + expected_multi_node_command = [ + MULTI_NODE_TEST_SCRIPT, + working_dir, + num_nodes, + num_gpus, + docker_image_path, + f"'{test_commands[0]}'", + f"'{test_commands[1]}'", + ] + expected_result = " ".join(map(str, expected_multi_node_command)) + assert get_multi_node_test_command(test_commands, working_dir, num_nodes, num_gpus, docker_image_path) == expected_result + +if __name__ == "__main__": + sys.exit(pytest.main(["-v", __file__])) \ No newline at end of file From e3aac9c054f3cc4f4d943f4144923b63615cfe32 Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 19 Sep 2024 21:31:32 +0000 Subject: [PATCH 02/11] p Signed-off-by: kevin --- .gitignore | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3da3291..9d57626 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,9 @@ terraform.rc .env -.vscode/ \ No newline at end of file +.vscode/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class From 49043e5bc55327707e44f68b18a81709ca852558 Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 19 Sep 2024 21:33:15 +0000 Subject: [PATCH 03/11] p Signed-off-by: kevin --- scripts/__pycache__/__init__.cpython-311.pyc | Bin 167 -> 0 bytes .../__pycache__/__init__.cpython-311.pyc | Bin 186 -> 0 bytes .../__pycache__/utils.cpython-311.pyc | Bin 3138 -> 0 bytes .../tests/__pycache__/__init__.cpython-311.pyc | Bin 173 -> 0 bytes 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 scripts/__pycache__/__init__.cpython-311.pyc delete mode 100644 scripts/pipeline_generator/__pycache__/__init__.cpython-311.pyc delete mode 100644 scripts/pipeline_generator/__pycache__/utils.cpython-311.pyc delete mode 100644 scripts/tests/__pycache__/__init__.cpython-311.pyc diff --git a/scripts/__pycache__/__init__.cpython-311.pyc b/scripts/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index e5c7f8f2a99b65ad67d4aba6645245fba7f99d52..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 167 zcmZ3^%ge<81R_)4q=D$iAOZ#$p^VRLK*n^26oz01O-8?!3`I;p{%4TnFDw0w{M=Oi z(xlS7l2ZM$oSa@J%z~0){rLFIyv&mLc)fzk lUmP~M`6;D2sdh!IK%+pG74rj$56p~=j2{?aL=iJk3;;|xC`AAO diff --git a/scripts/pipeline_generator/__pycache__/__init__.cpython-311.pyc b/scripts/pipeline_generator/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 2750a53e48795264d1de74b946d95ba1eaccef7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 186 zcmZ3^%ge<81Oijvq=D$iAOZ#$p^VRLK*n^26oz01O-8?!3`I;p{%4TnFJJwP{M=Oi z(xlS7l2ZM$oSa@J%z~0){esMb)SS$`)cEw& zywsw^lKdk5`1s7c%#!$cy@JYL95%W6DWy57c15f}(?RwZ^8<+w%#4hT9~fXn5i?K> E096bx{{R30 diff --git a/scripts/pipeline_generator/__pycache__/utils.cpython-311.pyc b/scripts/pipeline_generator/__pycache__/utils.cpython-311.pyc deleted file mode 100644 index e0b951e103c95162898ca38bcdb1bed0ea696e30..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3138 zcmaJCO>EoN`B9=J>fe_98>gO!J!}+l9m}$qVh;J zu^_iR z=+=O@@k1J+NAS6WO^ON0FQvvj1`_*$?(d=pfFA6klTvbv5mWbQc1RqQ=pNl6sb5S= z{aXQX805n*$wxpwDh-MQ@Qt17M#VFrJ0emWXlnd>a*pzp&t_KZx~$fC#imQLrZ-1( zSXK30R^WwgDYxCIRODJ^ix;HkQ0@aosb=n|ct@)PBd1H6t~E#Mjhl)rWF-M-`8!(f z_GSGoZI*w#nRN99IYSgPSgNbdC{~54$G2&IW_r5$TJ~l`R>U1ycbCl6<+`NEH7UEx zSC!^Omsq;1OBl}Ttf;y2(Jpzf<+&~iZqzc>hN8=vnkq_;eOB8RI?(_G^1~U=FT!+y zBt)`<^@$Yg7kw<+%3)74mL+HIuD!K!Tibsw+I#v0;YdL&&mBy-py^RRVmlF*{JCa zH&%75-jW18*9lZckaL=V<+`rrJg0Nu1PSxHigOKJRXbuj};#? z7OlYx#)_3VYb?1SGGZ)P@e~N+BgP_m@Tj+j9jK`sxX@;ZANAe3WHYr!m9R8BfF-?w zYl0uNz2O6d(9b^xaDZ9}M|DpI^DQspXaltKy3-&=xpR#)9K2*o_Qw zeaGQtK)~TtJ<#ojJ#gD$$LYkt8!hw@_>VfWJt(=B585x)@_&vV(BL+Ivsh4TA4phF zlOv_QBcO=sYs=+DZee-drf;coZHmU{;c)ne0PINDeeHoBhj5yXHF9l;$!SoANKm#K zQ2E`OcnXv*z^DBLfPubCo;Q=1zj^(#am^Y%`$gyvp~K9x(dpySX_FqZBD4GBW@MJU z>6uf6o^?xM671~|Nqo}EGcSz3g($GUMSltEK3Ot1U zA9c>qyDrIwvd~EuSb`jo)XQ^HEeEIqSA3t@gZ|y3Ac@ce|L=VFLtsXoP?LBBbRhV? zEHptci^xu=0XzxMEAS@-N&QMQM}q16I2>>Hgfu&^r~9A;IwL4p!ys zQ$aT^Ne*E0pK_W4{t=NUZZ}k25?|D6vg_R5JxN0*sS3kwA>;q&g7^|>leM)90B9%9 zjh?&@mv}Z@I36w-tN(-s!lX>*;x{7~zhxpHFMkrhA1BDtC$al6iy3Q2f@cElC<>3Y zV}P0TaJwHx6TiLm>r0;&9{A1Ia?b{*j|ZopMP`m8GbTObIA&_prX@&m+owWnhgJ{W z29pfJq}J@<(p7G0b*1RKDEiA%X@x5m*17BJB|Fxk){ASa&{^R(M=+cwmLbQ%Yx&J3 zd%XDGW^uioFL6ux^@SVx^;7RV!sOcWTCucTE^=3wOGS?G@GMy%zOh+c+u$}=xpzvd zg==_@FzF(k)oJH8eN$Bx+Ye32X~s_3JAQQ1&aGh5yHA*T_g8)eF9J!DS*rtV`zeaD zP{eQ^3xy2lvCuj5Rc||hA{UIHHJmX~%1XRpM6J{h%>G&THDXp|%wVj@fWd%f>`fzN zO8{aSFQr|#t7T_75!ete`q pDLme7BS62_oXNbwy7QLo%x#1|qTca&_@eJ!4+oo5ZV%=}^i`MIh3 zrAeiEC8hdhIXSty1x5K;smUe!Nu`-NDcPANsk+IT`o+mbnFS@q`X#BwKr%i)GcU6w qK3=b&@)w6qZhlH>PO4oIE6_lYmBsu(;sY}yBjX1K7*WIw6axVMrz>y( From 72f1fd2e8280005496af1a2e0cb5cb267a4dcf4d Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 19 Sep 2024 21:34:03 +0000 Subject: [PATCH 04/11] p Signed-off-by: kevin --- .gitignore | 3 +++ .../__pycache__/__init__.cpython-311.pyc | Bin 192 -> 0 bytes .../test_utils.cpython-311-pytest-8.1.1.pyc | Bin 7918 -> 0 bytes 3 files changed, 3 insertions(+) delete mode 100644 scripts/tests/pipeline_generator/__pycache__/__init__.cpython-311.pyc delete mode 100644 scripts/tests/pipeline_generator/__pycache__/test_utils.cpython-311-pytest-8.1.1.pyc diff --git a/.gitignore b/.gitignore index 9d57626..797af49 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,6 @@ terraform.rc __pycache__/ *.py[cod] *$py.class + +.cache +*.log diff --git a/scripts/tests/pipeline_generator/__pycache__/__init__.cpython-311.pyc b/scripts/tests/pipeline_generator/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 7ae663ecc8366f1723f3e198bc9665bdb85ed5d7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 192 zcmZ3^%ge<81gECFNdwW3K?DpiLK&agfQ;!3DGb33nv8xc8H$*I{LdiCU%~nr`MIh3 zrAeiEC8hdhIXSty1x5K;smUe!Nu`-NDcPANsk+IT`o+mbnFS@q`X#BwK(ZjSAT=j5 zFEu_rH7~U&u_V7pKR!M)FS8^*Uaz3?7l%!5eoARhs$CH)&<>C*iur-W2WCb_#t#fI JqKFwN1^_(eGavu} diff --git a/scripts/tests/pipeline_generator/__pycache__/test_utils.cpython-311-pytest-8.1.1.pyc b/scripts/tests/pipeline_generator/__pycache__/test_utils.cpython-311-pytest-8.1.1.pyc deleted file mode 100644 index 8abe5bd34a16fc6a85db361a26c7dc818ec882b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7918 zcmeGhTWs6bl@uw8k}bbqveQ~Y*E&jSJAUNhq*<3G8R9HSyV>plWdp0|QnGBlc1fj) zCJjarU>UHFU>Fb&OM(2%YvFzLcYpibC!IN%Kx{yPVIKkZp&;8Z%O=Ji`z{SE&xL`L>m5FwHKTzpg=BctnkTG>U?c0JUw+$yd+%tGa z{E~IXjyy+#n=#%LuV*wp<~O_3s;(r`YF<}If#fo03UI>y&D`?;VOK^w*~lZq_B>vSP?4%+v}>o;(y3%zeLXFv9|XH@fClHCqAT4 z`L;mkwyl>4dZ5X{acef-{eIoyw1`W#zsaLF2XJ*iHR|S&nuM)6@;niUcu0E&4z{P_ zwZctjTj8Yx-S8#1shjugKr7uII?(MWQk%LxUl47j+d~JsCD%^w?U3lc8xm;KLcuMs zrG*|kurNU6$5I(At4btDLKP{)TeE;}duNVw-33Glc2ojxt?t>!5F_sLRk7+P+!7b- zxcM3X%6H*Hj4yxl&11!Tr(#;@M~y-BLKH_QW7;vi&cw90EyQedFc;Hep=$I3qdODR zD1J-_ef2TP6hIP!83eNk<`A4haHiaq&@`3mmi&d7hRB6j(2{3{zEQkqIkp5fdzEAY z{2Wz_6x>`fK~=L8mX+J34EVXcW=h!tP-y0eqOBH6S#(ZCs-&}p+lj1_PvlfHsA!3l zifFUzqNO!c$X}tfK+S$7RiL?qu7GRHCh`d#R$}r&H)M98R%Z0Pb8?zZn4GiBI5wF% zso^t36|25ta7ujz| zT-K97NQfYYBpP# zD8H2?QTF~Jp;k;JRra!-k2>jI4IW5T4m*fuCS23miSp?~37?s8>6)FOF}NRk96-_$ z;5qz&aQ^aN5bF0Q4DMvEDpZ7uuaPc5>Hz7&Etku5rVFB#E&%=i(*@XHIR4wx1=o|? z3)!B~`Bf$rB=Fd81!+H#NwDH?gly>$AzN|>Y4@xVvW2Q>h3wExigg!6>;(+V^Ua72 zgD;8LjzE1H0ySo{d*7Jl6=HshbsELOj1B_8AUu!Yv@1NHW#RcOgl9|2DAJE1ID!D{ zABv?89Y+vFa1=l+;L+!dnsd&{DQ5^hyTnqszW2o6;W8WAJp6aM}mfiI#&u0`C^|HK2w|`>Grfs!**|eZ*fC zh*TAw6&2Qr;OIpXsBlEytKEOak-#T{g$;VK=>X)vp(l=d)3$)q?Me?aY}?U*L-f1D zi~A$3N;+1vje}LMXw?=-+qNZE1C>BUYJ?=YB0uzLgF4=G<5;uxk&cQCA*mDT_l2ae z)1|E0ng13@+3S#46|7UXZJ%y)98keJIhtK2t~CeLDq+%fNP7lc_iEeY4K+|F9cYD@ z4s>^s?lyIMD$|y_J#?TOx=U^9_Ee@Vb$jSQcMs`pQ@1B@w$$yR1Kqu(uT9;aI=iKA z4;|?4Yw>1yYF^UcriGq>+|oi19asq6#mBOh1_a3<8L9*uX+TIHZhqz9SDf!R)@9aF z3AufWd)7z;0#(^c1BP#we@dVsNHe!f(1c@Epauo5)$D^x@O+e&&C$s_(Mc`3kJix{ zggQ2p&gY;ZcnChb0)^@uYH>9SJmuH+iFcYpPr&^>$8?)w`=WeQP14C?LSJPSMKUvq zQG60=^1``n0>`iT32M=I0sSjJ+Dv(tG39|y18Y*i;7d{-)~$dh1sgzq#0JpgaAZT! zZJ_9Nt?b0?aMVg>5PO~2I@PTHrP)gg$#s<~nH+Rq6||ovPO>L)qC+Te5&=GQia7|y z5LE7Vv^i#=p<;^A5SfXEDSBj6Tq|VqrjSb%O#vP(GwZPFRR>G27EsqY1o(>4^C)-B z?$kP-d812g-2r_A$s8?>v-C~GSgn;t_hMEN3}VeB;}-p5BecT6fRJ)!N8;Lmd2@{CZ99ugSeNxu+&a zYV!Dl8=FUWJC7LhIQt!ds&~t8NQO9G@8gC?AFcj;^%L#abH=etJL8vk$1m>;UD+MF zvhm(u#H!)`?TFP~2j*#W4ESg6B`2fJ#c1p)&@c=@8@V6DM783DRS&Nd^FPSTHp;mP z?Q9*B!?w|eu;DNUu^wIagRm*>ftA6UvZCY?nY^NueUo=$QB!WVBdwCij3~?#PqD8b znu=3Y{fI(;&kWiqGtj_M{NhcCv6vG4*9Cev6q8~z+UNyN**@6r0!~>CK-Pf8bdxy` zESDO-C6}PmIl?^=?KtMcfcvN@cRJ^ciREl;tyC$?xO} zq?A>^PUFCjNx1fJfOUcA`5O9&7JuOl>#xQQ8}46Slz3mAYZvrzKKcIGpZJld{76k) z_#zm7pg&yRJhwYGXUOxlVAn%wQ`}s8TrlJ*oQV$;o1?ptw+(sHGpz^xe(8l@_WAz7 z0jT%!{5Af`K%K+u=l$&Zdx2ez4?biACw$TGX0A&v&2%h+K*_c~5(#w&7jT|{tt8PY9t67 zIN^(4H)CDu=YP!CIR`w>iJcG1PlbpfL~3H-Cs#kd`ry`s4}NxQ^J`CJBNE#QAKwih W-w{vjiYJWbuP$)nC@@%W#(w};B^@~c From 3cdd2b3868566412cb146eb232569059bf263205 Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 19 Sep 2024 21:37:29 +0000 Subject: [PATCH 05/11] p Signed-off-by: kevin --- scripts/tests/pipeline_generator/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tests/pipeline_generator/test_utils.py b/scripts/tests/pipeline_generator/test_utils.py index 5d08973..e3365db 100644 --- a/scripts/tests/pipeline_generator/test_utils.py +++ b/scripts/tests/pipeline_generator/test_utils.py @@ -54,4 +54,4 @@ def test_get_multi_node_test_command(): assert get_multi_node_test_command(test_commands, working_dir, num_nodes, num_gpus, docker_image_path) == expected_result if __name__ == "__main__": - sys.exit(pytest.main(["-v", __file__])) \ No newline at end of file + sys.exit(pytest.main(["-v", __file__])) From ab323d9fc24d1f1abd8c72ac2870de68dd7067eb Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 19 Sep 2024 21:41:23 +0000 Subject: [PATCH 06/11] p Signed-off-by: kevin --- scripts/pipeline_generator/utils.py | 13 +++++++++++-- scripts/tests/pipeline_generator/test_utils.py | 11 ++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/scripts/pipeline_generator/utils.py b/scripts/pipeline_generator/utils.py index 102fa37..12911a7 100644 --- a/scripts/pipeline_generator/utils.py +++ b/scripts/pipeline_generator/utils.py @@ -1,5 +1,4 @@ import enum -import os from typing import Optional, List # Constants @@ -18,6 +17,7 @@ STEPS_TO_BLOCK = [] + class AgentQueue(str, enum.Enum): AWS_CPU = "cpu_queue" AWS_SMALL_CPU = "small_cpu_queue" @@ -27,6 +27,7 @@ class AgentQueue(str, enum.Enum): AMD_GPU = "amd" AMD_CPU = "amd-cpu" + def get_agent_queue(no_gpu: Optional[bool], gpu_type: Optional[str], num_gpus: Optional[int]) -> AgentQueue: if no_gpu: return AgentQueue.AWS_SMALL_CPU @@ -34,13 +35,21 @@ def get_agent_queue(no_gpu: Optional[bool], gpu_type: Optional[str], num_gpus: O return AgentQueue.A100 return AgentQueue.AWS_1xL4 if num_gpus == 1 else AgentQueue.AWS_4xL4 + def get_full_test_command(test_commands: List[str], step_working_dir: str) -> str: """Convert test commands into one-line command with the right directory.""" working_dir = step_working_dir or DEFAULT_WORKING_DIR test_commands_str = "; ".join(test_commands) return f"cd {working_dir}; {test_commands_str}" -def get_multi_node_test_command(test_commands: List[str], working_dir: str, num_nodes: int, num_gpus: int, docker_image_path: str) -> str: + +def get_multi_node_test_command( + test_commands: List[str], + working_dir: str, + num_nodes: int, + num_gpus: int, + docker_image_path: str + ) -> str: quoted_commands = [f"'{command}'" for command in test_commands] multi_node_command = [ MULTI_NODE_TEST_SCRIPT, diff --git a/scripts/tests/pipeline_generator/test_utils.py b/scripts/tests/pipeline_generator/test_utils.py index e3365db..74ab72f 100644 --- a/scripts/tests/pipeline_generator/test_utils.py +++ b/scripts/tests/pipeline_generator/test_utils.py @@ -1,4 +1,5 @@ import pytest +import sys from typing import List from scripts.pipeline_generator.utils import ( @@ -9,6 +10,7 @@ MULTI_NODE_TEST_SCRIPT, ) + @pytest.mark.parametrize( ("no_gpu", "gpu_type", "num_gpus", "expected_result"), [ @@ -21,6 +23,7 @@ def test_get_agent_queue(no_gpu: bool, gpu_type: str, num_gpus: int, expected_result: AgentQueue): assert get_agent_queue(no_gpu, gpu_type, num_gpus) == expected_result + @pytest.mark.parametrize( ("test_commands", "step_working_dir", "expected_result"), [ @@ -32,9 +35,14 @@ def test_get_agent_queue(no_gpu: bool, gpu_type: str, num_gpus: int, expected_re def test_get_full_test_command(test_commands: List[str], step_working_dir: str, expected_result: str): assert get_full_test_command(test_commands, step_working_dir) == expected_result + def test_get_multi_node_test_command(): test_commands = [ - "distributed/test_same_node.py; pytest -v -s distributed/test_multi_node_assignment.py; pytest -v -s distributed/test_pipeline_parallel.py", + ( + "distributed/test_same_node.py;" + "pytest -v -s distributed/test_multi_node_assignment.py;" + "pytest -v -s distributed/test_pipeline_parallel.py" + ), "distributed/test_same_node.py", ] working_dir = "/vllm-workspace/tests" @@ -53,5 +61,6 @@ def test_get_multi_node_test_command(): expected_result = " ".join(map(str, expected_multi_node_command)) assert get_multi_node_test_command(test_commands, working_dir, num_nodes, num_gpus, docker_image_path) == expected_result + if __name__ == "__main__": sys.exit(pytest.main(["-v", __file__])) From ec9e1843106458e9d1493f73dd8fa74a5263a296 Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 19 Sep 2024 23:05:15 +0000 Subject: [PATCH 07/11] p Signed-off-by: kevin --- scripts/pipeline_generator/plugin.py | 90 ++++++++++++++ .../tests/pipeline_generator/test_plugin.py | 116 ++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 scripts/pipeline_generator/plugin.py create mode 100644 scripts/tests/pipeline_generator/test_plugin.py diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py new file mode 100644 index 0000000..9ac5702 --- /dev/null +++ b/scripts/pipeline_generator/plugin.py @@ -0,0 +1,90 @@ +from pydantic import BaseModel, Field +from typing import List, Dict, Any, Optional + +from .utils import HF_HOME + +DOCKER_PLUGIN_NAME = "docker#v5.2.0" +KUBERNETES_PLUGIN_NAME = "kubernetes" + +class DockerPluginConfig(BaseModel): + image: str = "" + always_pull: bool = Field(default=True, alias="always-pull") + propagate_environment: bool = Field(default=True, alias="propagate-environment") + gpus: Optional[str] = "all" + mount_buildkite_agent: Optional[bool] = Field(default=False, alias="mount-buildkite-agent") + command: List[str] = Field(default_factory=list) + environment: List[str] = [ + f"HF_HOME={HF_HOME}", + "VLLM_USAGE_SOURCE=ci-test", + "HF_TOKEN", + "BUILDKITE_ANALYTICS_TOKEN" + ] + volumes: List[str] = [ + "/dev/shm:/dev/shm", + f"{HF_HOME}:{HF_HOME}" + ] + +class KubernetesPodContainerConfig(BaseModel): + image: str + command: List[str] + resources: Dict[str, Dict[str, int]] + volume_mounts: List[Dict[str, str]] = Field( + alias="volumeMounts", + default=[ + {"name": "devshm", "mountPath": "/dev/shm"}, + {"name": "hf-cache", "mountPath": HF_HOME} + ] + ) + env: List[Dict[str, str]] = Field( + default=[ + {"name": "HF_HOME", "value": HF_HOME}, + {"name": "VLLM_USAGE_SOURCE", "value": "ci-test"}, + { + "name": "HF_TOKEN", + "valueFrom": { + "secretKeyRef": { + "name": "hf-token-secret", + "key": "token" + } + } + }, + ], + ) + +class KubernetesPodSpec(BaseModel): + containers: List[KubernetesPodContainerConfig] + priority_class_name: str = Field(default="ci", alias="priorityClassName") + node_selector: Dict[str, Any] = Field( + default={"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}, + alias="nodeSelector" + ) + volumes: List[Dict[str, Any]] = Field( + default=[ + {"name": "devshm", "emptyDir": {"medium": "Memory"}}, + {"name": "hf-cache", "hostPath": {"path": HF_HOME, "type": "Directory"}} + ] + ) + +class KubernetesPluginConfig(BaseModel): + pod_spec: KubernetesPodSpec = Field(alias="podSpec") + +def get_kubernetes_plugin_config(docker_image_path: str, test_bash_command: List[str], num_gpus: int) -> Dict: + pod_spec = KubernetesPodSpec( + containers=[ + KubernetesPodContainerConfig( + image=docker_image_path, + command=[" ".join(test_bash_command)], + resources={"limits": {"nvidia.com/gpu": num_gpus}} + ) + ] + ) + return {KUBERNETES_PLUGIN_NAME: KubernetesPluginConfig(podSpec=pod_spec).dict(by_alias=True)} + +def get_docker_plugin_config(docker_image_path: str, test_bash_command: List[str], no_gpu: bool) -> Dict: + docker_plugin_config = DockerPluginConfig( + image=docker_image_path, + command=test_bash_command + ) + if no_gpu: + docker_plugin_config.gpus = None + return {DOCKER_PLUGIN_NAME: docker_plugin_config.dict(exclude_none=True, by_alias=True)} \ No newline at end of file diff --git a/scripts/tests/pipeline_generator/test_plugin.py b/scripts/tests/pipeline_generator/test_plugin.py new file mode 100644 index 0000000..9d8973d --- /dev/null +++ b/scripts/tests/pipeline_generator/test_plugin.py @@ -0,0 +1,116 @@ +import pytest +import sys + +from unittest.mock import patch +from scripts.pipeline_generator.plugin import ( + get_kubernetes_plugin_config, + get_docker_plugin_config, + DOCKER_PLUGIN_NAME, + KUBERNETES_PLUGIN_NAME, +) + +def test_get_kubernetes_plugin_config(): + docker_image_path = "test_image:latest" + test_bash_command = ["echo", "Hello, Kubernetes!"] + num_gpus = 1 + + expected_config = { + KUBERNETES_PLUGIN_NAME: { + "podSpec": { + "containers": [ + { + "image": docker_image_path, + "command": [" ".join(test_bash_command)], + "resources": {"limits": {"nvidia.com/gpu": num_gpus}}, + "volumeMounts": [ + {"name": "devshm", "mountPath": "/dev/shm"}, + {"name": "hf-cache", "mountPath": "/root/.cache/huggingface"} + ], + "env": [ + {"name": "HF_HOME", "value": "/root/.cache/huggingface"}, + {"name": "VLLM_USAGE_SOURCE", "value": "ci-test"}, + { + "name": "HF_TOKEN", + "valueFrom": { + "secretKeyRef": { + "name": "hf-token-secret", + "key": "token" + } + } + }, + ], + } + ], + "priorityClassName": "ci", + "nodeSelector": {"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}, + "volumes": [ + {"name": "devshm", "emptyDir": {"medium": "Memory"}}, + {"name": "hf-cache", "hostPath": {"path": "/root/.cache/huggingface", "type": "Directory"}} + ] + } + } + } + + assert get_kubernetes_plugin_config(docker_image_path, test_bash_command, num_gpus) == expected_config + + +@pytest.mark.parametrize( + "docker_image_path, test_bash_command, no_gpu, expected_config", + [ + ( + "test_image:latest", + ["bash", "-c", "echo A", "pytest -v -s a.py"], + False, + { + DOCKER_PLUGIN_NAME: { + "image": "test_image:latest", + "always-pull": True, + "propagate-environment": True, + "gpus": "all", + "command": ["bash", "-c", "echo A", "pytest -v -s a.py"], + "environment": [ + "HF_HOME=/root/.cache/huggingface", + "VLLM_USAGE_SOURCE=ci-test", + "HF_TOKEN", + "BUILDKITE_ANALYTICS_TOKEN" + ], + "mount-buildkite-agent": False, + "volumes": [ + "/dev/shm:/dev/shm", + "/root/.cache/huggingface:/root/.cache/huggingface" + ] + } + } + ), + ( + "cpu_image:latest", + ["bash", "-c", "echo B", "pytest -v -s b.py"], + True, + { + DOCKER_PLUGIN_NAME: { + "image": "cpu_image:latest", + "always-pull": True, + "propagate-environment": True, + "command": ["bash", "-c", "echo B", "pytest -v -s b.py"], + "environment": [ + "HF_HOME=/root/.cache/huggingface", + "VLLM_USAGE_SOURCE=ci-test", + "HF_TOKEN", + "BUILDKITE_ANALYTICS_TOKEN" + ], + "mount-buildkite-agent": False, + "volumes": [ + "/dev/shm:/dev/shm", + "/root/.cache/huggingface:/root/.cache/huggingface" + ] + } + } + ), + ] +) +def test_get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu, expected_config): + assert get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu) == expected_config + + +if __name__ == "__main__": + sys.exit(pytest.main(["-v", __file__])) From 3615f75f27ecff310d4261d86a41cc85f275ddce Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 20 Sep 2024 00:18:28 +0000 Subject: [PATCH 08/11] add req Signed-off-by: kevin --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..946ac76 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +click==8.1.7 +pydantic==2.9.2 From 9e529540a919f1f24bbbca236d0573787d5aae8e Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 24 Sep 2024 00:32:48 +0000 Subject: [PATCH 09/11] p Signed-off-by: kevin --- scripts/pipeline_generator/plugin.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py index 9ac5702..31b2afa 100644 --- a/scripts/pipeline_generator/plugin.py +++ b/scripts/pipeline_generator/plugin.py @@ -7,6 +7,11 @@ KUBERNETES_PLUGIN_NAME = "kubernetes" class DockerPluginConfig(BaseModel): + """ + Configuration for Docker plugin running in a Buildkite step. + The specification is based on: + https://github.com/buildkite-plugins/docker-buildkite-plugin?tab=readme-ov-file#configuration + """ image: str = "" always_pull: bool = Field(default=True, alias="always-pull") propagate_environment: bool = Field(default=True, alias="propagate-environment") @@ -25,6 +30,9 @@ class DockerPluginConfig(BaseModel): ] class KubernetesPodContainerConfig(BaseModel): + """ + Configuration for a container running in a Kubernetes pod. + """ image: str command: List[str] resources: Dict[str, Dict[str, int]] @@ -52,6 +60,9 @@ class KubernetesPodContainerConfig(BaseModel): ) class KubernetesPodSpec(BaseModel): + """ + Configuration for a Kubernetes pod running in a Buildkite step. + """ containers: List[KubernetesPodContainerConfig] priority_class_name: str = Field(default="ci", alias="priorityClassName") node_selector: Dict[str, Any] = Field( @@ -66,13 +77,16 @@ class KubernetesPodSpec(BaseModel): ) class KubernetesPluginConfig(BaseModel): + """ + Configuration for Kubernetes plugin running in a Buildkite step. + """ pod_spec: KubernetesPodSpec = Field(alias="podSpec") -def get_kubernetes_plugin_config(docker_image_path: str, test_bash_command: List[str], num_gpus: int) -> Dict: +def get_kubernetes_plugin_config(container_image: str, test_bash_command: List[str], num_gpus: int) -> Dict: pod_spec = KubernetesPodSpec( containers=[ KubernetesPodContainerConfig( - image=docker_image_path, + image=container_image, command=[" ".join(test_bash_command)], resources={"limits": {"nvidia.com/gpu": num_gpus}} ) @@ -87,4 +101,4 @@ def get_docker_plugin_config(docker_image_path: str, test_bash_command: List[str ) if no_gpu: docker_plugin_config.gpus = None - return {DOCKER_PLUGIN_NAME: docker_plugin_config.dict(exclude_none=True, by_alias=True)} \ No newline at end of file + return {DOCKER_PLUGIN_NAME: docker_plugin_config.dict(exclude_none=True, by_alias=True)} From 9d973dc7d3f99193e85580461f976eabf004d159 Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 24 Sep 2024 00:43:19 +0000 Subject: [PATCH 10/11] p Signed-off-by: kevin --- scripts/pipeline_generator/plugin.py | 6 ++++++ scripts/tests/pipeline_generator/test_plugin.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py index 31b2afa..db093bc 100644 --- a/scripts/pipeline_generator/plugin.py +++ b/scripts/pipeline_generator/plugin.py @@ -6,6 +6,7 @@ DOCKER_PLUGIN_NAME = "docker#v5.2.0" KUBERNETES_PLUGIN_NAME = "kubernetes" + class DockerPluginConfig(BaseModel): """ Configuration for Docker plugin running in a Buildkite step. @@ -29,6 +30,7 @@ class DockerPluginConfig(BaseModel): f"{HF_HOME}:{HF_HOME}" ] + class KubernetesPodContainerConfig(BaseModel): """ Configuration for a container running in a Kubernetes pod. @@ -59,6 +61,7 @@ class KubernetesPodContainerConfig(BaseModel): ], ) + class KubernetesPodSpec(BaseModel): """ Configuration for a Kubernetes pod running in a Buildkite step. @@ -76,12 +79,14 @@ class KubernetesPodSpec(BaseModel): ] ) + class KubernetesPluginConfig(BaseModel): """ Configuration for Kubernetes plugin running in a Buildkite step. """ pod_spec: KubernetesPodSpec = Field(alias="podSpec") + def get_kubernetes_plugin_config(container_image: str, test_bash_command: List[str], num_gpus: int) -> Dict: pod_spec = KubernetesPodSpec( containers=[ @@ -94,6 +99,7 @@ def get_kubernetes_plugin_config(container_image: str, test_bash_command: List[s ) return {KUBERNETES_PLUGIN_NAME: KubernetesPluginConfig(podSpec=pod_spec).dict(by_alias=True)} + def get_docker_plugin_config(docker_image_path: str, test_bash_command: List[str], no_gpu: bool) -> Dict: docker_plugin_config = DockerPluginConfig( image=docker_image_path, diff --git a/scripts/tests/pipeline_generator/test_plugin.py b/scripts/tests/pipeline_generator/test_plugin.py index 9d8973d..b178e22 100644 --- a/scripts/tests/pipeline_generator/test_plugin.py +++ b/scripts/tests/pipeline_generator/test_plugin.py @@ -1,7 +1,6 @@ import pytest import sys -from unittest.mock import patch from scripts.pipeline_generator.plugin import ( get_kubernetes_plugin_config, get_docker_plugin_config, @@ -9,6 +8,7 @@ KUBERNETES_PLUGIN_NAME, ) + def test_get_kubernetes_plugin_config(): docker_image_path = "test_image:latest" test_bash_command = ["echo", "Hello, Kubernetes!"] @@ -50,7 +50,7 @@ def test_get_kubernetes_plugin_config(): } } } - + assert get_kubernetes_plugin_config(docker_image_path, test_bash_command, num_gpus) == expected_config From a040279430bca018b614b6eb7346e1f23313729a Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 24 Sep 2024 18:42:34 +0000 Subject: [PATCH 11/11] p Signed-off-by: kevin --- scripts/tests/pipeline_generator/test_plugin.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/tests/pipeline_generator/test_plugin.py b/scripts/tests/pipeline_generator/test_plugin.py index b178e22..7e85ac0 100644 --- a/scripts/tests/pipeline_generator/test_plugin.py +++ b/scripts/tests/pipeline_generator/test_plugin.py @@ -59,7 +59,7 @@ def test_get_kubernetes_plugin_config(): [ ( "test_image:latest", - ["bash", "-c", "echo A", "pytest -v -s a.py"], + ["bash", "-c", "echo A;\npytest -v -s a.py"], False, { DOCKER_PLUGIN_NAME: { @@ -67,7 +67,7 @@ def test_get_kubernetes_plugin_config(): "always-pull": True, "propagate-environment": True, "gpus": "all", - "command": ["bash", "-c", "echo A", "pytest -v -s a.py"], + "command": ["bash", "-c", "echo A;\npytest -v -s a.py"], "environment": [ "HF_HOME=/root/.cache/huggingface", "VLLM_USAGE_SOURCE=ci-test", @@ -84,14 +84,14 @@ def test_get_kubernetes_plugin_config(): ), ( "cpu_image:latest", - ["bash", "-c", "echo B", "pytest -v -s b.py"], + ["bash", "-c", "echo B;\npytest -v -s b.py"], True, { DOCKER_PLUGIN_NAME: { "image": "cpu_image:latest", "always-pull": True, "propagate-environment": True, - "command": ["bash", "-c", "echo B", "pytest -v -s b.py"], + "command": ["bash", "-c", "echo B;\npytest -v -s b.py"], "environment": [ "HF_HOME=/root/.cache/huggingface", "VLLM_USAGE_SOURCE=ci-test",