From 2b91285d352d29aae3e09463465978f978b715df Mon Sep 17 00:00:00 2001 From: Justin Bandoro <79104794+jbandoro@users.noreply.github.com> Date: Tue, 14 Nov 2023 07:00:01 -0800 Subject: [PATCH] Refactor LoadMethod.LOCAL to use symlinks instead of copying directory (#660) This PR refactors the `create_symlinks` function that was previously used in load via dbt ls so that it can be used in `DbtLocalBaseOperator.run_command` instead of copying the entire directory. Closes: #614 (cherry picked from commit 5d23758da43ad93e4f6f80a063ff1c76cefe1557) --- cosmos/dbt/graph.py | 9 +-------- cosmos/dbt/project.py | 14 ++++++++++++++ cosmos/operators/local.py | 13 ++++--------- dev/dags/dbt/{simple => }/data/imdb.db | Bin 73728 -> 53248 bytes .../models/movies_ratings_simplified.sql | 2 +- dev/dags/dbt/simple/models/source.yml | 2 +- dev/dags/dbt/simple/profiles.yml | 4 ++-- dev/dags/example_cosmos_sources.py | 2 +- tests/dbt/test_graph.py | 14 +------------- tests/dbt/test_project.py | 15 +++++++++++++++ tests/sample/manifest_source.json | 12 ++++++------ 11 files changed, 46 insertions(+), 41 deletions(-) create mode 100644 cosmos/dbt/project.py rename dev/dags/dbt/{simple => }/data/imdb.db (71%) create mode 100644 tests/dbt/test_project.py diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 3b61ef57d..242bca6f9 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -21,6 +21,7 @@ LoadMode, ) from cosmos.dbt.parser.project import LegacyDbtProject +from cosmos.dbt.project import create_symlinks from cosmos.dbt.selector import select_nodes from cosmos.log import get_logger @@ -51,14 +52,6 @@ class DbtNode: has_test: bool = False -def create_symlinks(project_path: Path, tmp_dir: Path) -> None: - """Helper function to create symlinks to the dbt project files.""" - ignore_paths = (DBT_LOG_DIR_NAME, DBT_TARGET_DIR_NAME, "dbt_packages", "profiles.yml") - for child_name in os.listdir(project_path): - if child_name not in ignore_paths: - os.symlink(project_path / child_name, tmp_dir / child_name) - - def run_command(command: list[str], tmp_dir: Path, env_vars: dict[str, str]) -> str: """Run a command in a subprocess, returning the stdout.""" logger.info("Running command: `%s`", " ".join(command)) diff --git a/cosmos/dbt/project.py b/cosmos/dbt/project.py new file mode 100644 index 000000000..63f4fc007 --- /dev/null +++ b/cosmos/dbt/project.py @@ -0,0 +1,14 @@ +from pathlib import Path +import os +from cosmos.constants import ( + DBT_LOG_DIR_NAME, + DBT_TARGET_DIR_NAME, +) + + +def create_symlinks(project_path: Path, tmp_dir: Path) -> None: + """Helper function to create symlinks to the dbt project files.""" + ignore_paths = (DBT_LOG_DIR_NAME, DBT_TARGET_DIR_NAME, "dbt_packages", "profiles.yml") + for child_name in os.listdir(project_path): + if child_name not in ignore_paths: + os.symlink(project_path / child_name, tmp_dir / child_name) diff --git a/cosmos/operators/local.py b/cosmos/operators/local.py index 6af849b65..323aeacf7 100644 --- a/cosmos/operators/local.py +++ b/cosmos/operators/local.py @@ -1,7 +1,6 @@ from __future__ import annotations import os -import shutil import signal import tempfile from attr import define @@ -43,6 +42,7 @@ FullOutputSubprocessResult, ) from cosmos.dbt.parser.output import extract_log_issues, parse_output +from cosmos.dbt.project import create_symlinks DBT_NO_TESTS_MSG = "Nothing to do" DBT_WARN_MSG = "WARN" @@ -190,19 +190,14 @@ def run_command( """ Copies the dbt project to a temporary directory and runs the command. """ - with tempfile.TemporaryDirectory() as tmp_dir: + with tempfile.TemporaryDirectory() as tmp_project_dir: logger.info( "Cloning project to writable temp directory %s from %s", - tmp_dir, + tmp_project_dir, self.project_dir, ) - # need a subfolder because shutil.copytree will fail if the destination dir already exists - tmp_project_dir = os.path.join(tmp_dir, "dbt_project") - shutil.copytree( - self.project_dir, - tmp_project_dir, - ) + create_symlinks(Path(self.project_dir), Path(tmp_project_dir)) with self.profile_config.ensure_profile() as profile_values: (profile_path, env_vars) = profile_values diff --git a/dev/dags/dbt/simple/data/imdb.db b/dev/dags/dbt/data/imdb.db similarity index 71% rename from dev/dags/dbt/simple/data/imdb.db rename to dev/dags/dbt/data/imdb.db index 605f6526e931ad2ae4e0a5d89faf7ae302840bc7..be0c1fe7739b7877496e2b6bdd6ec60950552897 100644 GIT binary patch delta 55 zcmZoTz|ydQd4jYc69WSSFA&23<3t@}ekKOJcv+ArBiDBZuJ4-_1v}HefB-xq$ncc*YH333GFh~NJgaq?X0?87R zs0heL%b)lIJO!bGC{!y}D@H`9Xj=S3jY?Zidyb`=YNcv>#A6Fb?Ww(QURIoQCMPp@ z-+T9d_q*S{6F*X;=`g7J@@ksNuI(U_yN0;YvCphb7>x{JL z(iY)Mcn4Nf-%MR0{8!i_%t$$tQe!!1S(^NL@{VM?`M7ym(xs#ak_t>GP0i>FvBkSLmaNk-%OKj*N7v2sd%lWY=`TIK( zUOY(8ve`J^o#Q4|@49%k=@2QM5OMn&VIdrq$?;N_MD|Z;h1d0QF25WIMPyYCh^iI| z_)M~02JN6F;-c3e3iC}yLLmZM9GcLD;g8C&xqfj`@(W@_d{; z{(=+tu7cfv1dKl(WTB_O=*F==x}QDM23UH?@QY>0^q)zNY(633<-IKQ9OQ-#FP@N* zF+if^;!6%3TPZAHXHiqt$#TSfIyp{SPC0N#56ytx#vwOsc>0uzjO|Sv8T*YBujm&1 z1~BF5XI=AhAxaj$8Yh{jCEV2|G#YqY7OVo8*Pjk4+#4-Xm!ip9K$Rnbu&4w>0Y%Xw zF27&)hXb;rYU1)vL7^K`B6!H;D+MSi_6>aSgN$FzW~Q0XIPq{3^}t4R52p3c zR3KAJO`L3bR>EzKH20YR1WnaHo>kbYkACgI-8TzWu!=didshH)C$cyq?3QrvGGQ(( z%Asz1N&oI3O8e~jIN40xc-c~lo^3kI)aOJdA3P}HjwM16f}y^_Da8-@k>Tlbj+E_j z;MNAguQOKP>%g7@gtNcm$U}SGxV2s=1{1hVSADy;h{65Y!$70Ov{qP+d&sakS6a6x!L$V)kv;bZ29-j~A)_(GLuyE7lOt_3$t?^~q*`dgckTgRgv*#3h zT#Bp(gR-iqOv$c3w#=gWPk^&&ZjnM|gn|>GRUd?^R|0A%5DdBeAvp}f8c+gaOAaLz zfWuQ6{o6br-}nAF`J&2>d$Wa7ntrAS^XF?Q<&pF_nO`m8MOlKIE@HGjTJ2y|d!$Ch zD`(Q|CCc~vnh;ZeL#>2w$)p=jRS4;*En?uTSO|^bI^B0D5gO-j7v_7phxf!G)&RH~ z@%a+5CLW_U{r5mQs_?kZ;c#iH912I+52ya`y;e`*R0I}9Ln!;hC=pZ%H_Z^dSNm7L znuquiW1RH(C4B2O!ZI2G6Y;bDIV`;w0upYVE|lwvruVP%pmz!M3zw?NY6R#P4$?LS zfy?^|#fpiLD(rUYj;f?$w;Q zZyH4_q3{3FKv=6v$H~R8gqNmMInIPOO3|v06ctc6f^t}k$eKUIFu3}-!AfN_k;t7T zH7uZ0r4k-V6Y3HsFUh|C2F8C+nHzUuAwsjo8rF`oAP=pF?3?YxwW&JEp}o?2`fm0t zzRbdrkIO~eAPB`!pSsd{$dJQ{XyYLbP6=;F5!UHG>+1BPvlbaLeiS@HRxq$1){o^A#!^wM$4`TQLj9B~xdahp+}9`rWhCYuUX7VMTH zF=eHJr8NtY$)XZuPzpKn*W%p89_}1x-Db^5JDygXhVUUAPW^rA`cx$B5)>Hi`cur7 z{g$%i^U14|E#{}p6-l2Y-JO(Y`h#i2#G(DDoPUqs%Daqj8=H+uhC_x*?%b}y^>|I) zPo-+~z-%Pcd*fv9K|txogdWi>etS@6zI^{ytP%rbLR^|NIme6(!XF^ z4iDoU`F`6>9IFy;f{cKi)BPH@N7*pad5?%2=Osu%O=s=_QqY0!nH>=BT#5v%4=+i* zcPd(|5aRyr;>fAnMBG}&&SH~9=Iuqu_}wL#v(`Cq z+x5D0Q{uQ`T``;c+wPEXPpPgiS!|LxbBBfue0P+*wjOG=gaJYeQX;Mmr8?*3jXAhE zLZ=+aijMuZjWY8+ey506g@5`T@zI?TcGQJU67JNfMC)P;wR%mPM#Od}ZV1t2>0>Xv z9c_29U>iFecy&-e27IG+f3l;5wb6Gvp~D7P1vB9;>;gtpHu<_GJm}Y_JqVU&e|L8U zBgW5qL_DIhDTlSWPkO3|sTW$ZLg9w*5q&{VZ!X)bzYpl*qe)84$?x{5Os*zZNw~XM zSF$NgD^~|l%0TrvS<)}z_M)pK^NI1a#7QKe?#9AlH2eWMB0`iw}hpvY&Wt_A(G5Wf`#y419ZQ9+7S|Be7Ru`NCh4779YBk9w zzRpZ8H{?5JBHn9mip7RwYkJ%CE*Hv=ihlhn;Q0k+vZvltmTlyV&D8@#y`5cR-N