From 78afaf3bc59e8c87a03180c7014fc805b322f157 Mon Sep 17 00:00:00 2001 From: Jakob McBroome Date: Wed, 29 Jun 2022 16:50:20 -0700 Subject: [PATCH 01/11] initial test of new taxonium output setup --- .gitignore | 3 ++- .gitmodules | 3 +++ data/master_backend.py | 15 +++++++-------- taxonium | 1 + 4 files changed, 13 insertions(+), 9 deletions(-) create mode 100644 .gitmodules create mode 160000 taxonium diff --git a/.gitignore b/.gitignore index 265d9fba..fa2e6743 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ **/display_tables data/cview.pb data/cview.pb.gz +data/cview.jsonl.gz data/regions.js data/us-states.geo.json data/europe.geo.json @@ -8,4 +9,4 @@ data/hardcoded_clusters.tsv data/NC_045512v2.fa data/ncbiGenes.gtf data/clean.pb -data/cluster_labels.tsv +data/cluster_labels.tsv \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..f597bfc3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "taxonium"] + path = taxonium + url = https://github.com/theosanderson/taxonium diff --git a/data/master_backend.py b/data/master_backend.py index 068324d4..390936e7 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -28,6 +28,7 @@ def parse_setup(): parser.add_argument("-l","--lexicon",help="Optionally, link to a text file containing all names for the same region, one region per row, tab separated.", default = "") parser.add_argument("-X","--lookahead",type=int,help="Number to pass to parameter -X of introduce. Increase to merge nested clusters. Default 2", default = 2) parser.add_argument("-H","--host",help="Web-accessible link to the current directory for taxodium cluster view.",default="https://raw.githubusercontent.com/jmcbroome/introduction-website/main/") + parser.add_argument("-V","--taxversion",action='store_true',help="Export the view in Taxonium 2.0 jsonl format instead of taxonium protobuf. Requires the installation of taxoniumtools and adds some compute time.") args = parser.parse_args() return args @@ -47,12 +48,6 @@ def primary_pipeline(args): generate_display_tables(conversion, host = args.host) print("Preparing taxodium view.") sd = {} - # with open("cluster_labels.tsv") as inf: - # for entry in inf: - # spent = entry.strip().split() - # if spent[0] == "sample": - # continue - # sd[spent[0]] = spent[1] with open("hardcoded_clusters.tsv") as inf: for entry in inf: spent = entry.strip().split('\t') @@ -88,8 +83,12 @@ def primary_pipeline(args): spent.append("None") i += 1 print("\t".join(spent),file=outf) - print("Generating viewable pb.") - subprocess.check_call("matUtils extract -i " + args.input + " -M clusterswapped.tsv -F cluster,region --write-taxodium cview.pb --title Cluster-Tracker -g " + args.annotation + " -f " + args.reference,shell=True) + if not args.taxversion: + print("Generating viewable pb.") + subprocess.check_call("matUtils extract -i " + args.input + " -M clusterswapped.tsv -F cluster,region --write-taxodium cview.pb --title Cluster-Tracker -g " + args.annotation + " -f " + args.reference,shell=True) + else: + print("Generating viewable jsonl.") + subprocess.check_call("usher_to_taxonium -i " + args.input + "-m clusterswapped.tsv -c cluster,region -o cview.jsonl.gz --title Cluster-Tracker",shell=True) print("Process completed; check website for results.") if __name__ == "__main__": diff --git a/taxonium b/taxonium new file mode 160000 index 00000000..c958d09f --- /dev/null +++ b/taxonium @@ -0,0 +1 @@ +Subproject commit c958d09f792af7241966471c07af3a04ebfe6f2b From 7d4f7329b97a8d90ffa3776864083e390c9fd1e5 Mon Sep 17 00:00:00 2001 From: Jakob McBroome Date: Wed, 29 Jun 2022 18:08:42 -0700 Subject: [PATCH 02/11] fix typo to allow subprocess call to complete --- data/master_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/master_backend.py b/data/master_backend.py index 390936e7..3f9b5b1e 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -41,7 +41,7 @@ def primary_pipeline(args): conversion = {} # print(conversion) print("Calling introduce.") - subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) + # subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) print("Updating map display data.") update_js(args.geojson, conversion) print("Generating top cluster tables.") @@ -88,7 +88,7 @@ def primary_pipeline(args): subprocess.check_call("matUtils extract -i " + args.input + " -M clusterswapped.tsv -F cluster,region --write-taxodium cview.pb --title Cluster-Tracker -g " + args.annotation + " -f " + args.reference,shell=True) else: print("Generating viewable jsonl.") - subprocess.check_call("usher_to_taxonium -i " + args.input + "-m clusterswapped.tsv -c cluster,region -o cview.jsonl.gz --title Cluster-Tracker",shell=True) + subprocess.check_call("usher_to_taxonium -i " + args.input + " -m clusterswapped.tsv -c cluster,region -o cview.jsonl.gz --title Cluster-Tracker",shell=True) print("Process completed; check website for results.") if __name__ == "__main__": From 5f3f93074353d3629657af1df4f81a7f416aae5d Mon Sep 17 00:00:00 2001 From: Jakob McBroome Date: Wed, 29 Jun 2022 18:30:34 -0700 Subject: [PATCH 03/11] make all table taxonium links consistent --- data/generate_display_tables.py | 6 +++--- data/master_backend.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/data/generate_display_tables.py b/data/generate_display_tables.py index efb8c37d..7047b4e8 100644 --- a/data/generate_display_tables.py +++ b/data/generate_display_tables.py @@ -1,4 +1,4 @@ -def generate_display_tables(conversion = {}, host = "https://raw.githubusercontent.com/jmcbroome/introduction-website/main/"): +def generate_display_tables(conversion = {}, host = "https://raw.githubusercontent.com/jmcbroome/introduction-website/main/", extension = ".pb.gz"): filelines = {} def fix_month(datestr): monthswap = {"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12"} @@ -69,7 +69,7 @@ def fix_month(datestr): #generate a link to exist in the last column #based on the global "host" variable. #and including all html syntax. - link = "https://taxonium.org/?protoUrl=" + host + "data/cview.pb.gz" + link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension link += '&search=[{"id":0.123,"category":"cluster","value":"' link += spent[0] link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]' @@ -85,7 +85,7 @@ def fix_month(datestr): print(header,file=outf) for gv,dl in sorted_defaults: spent = dl.split("\t") - link = "https://taxonium.org/?protoUrl=" + host + "data/cview.pb.gz" + link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension link += '&search=[{"id":0.123,"category":"cluster","value":"' link += spent[0] link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]' diff --git a/data/master_backend.py b/data/master_backend.py index 3f9b5b1e..ac08842b 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -44,8 +44,8 @@ def primary_pipeline(args): # subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) print("Updating map display data.") update_js(args.geojson, conversion) - print("Generating top cluster tables.") - generate_display_tables(conversion, host = args.host) + print("Generating top cluster tables.") + generate_display_tables(conversion, host = args.host, extension = ".jsonl.gz" if args.taxversion else ".pb.gz") print("Preparing taxodium view.") sd = {} with open("hardcoded_clusters.tsv") as inf: From 7aedc81f7524e63f3056822b4dd1c89700d069a9 Mon Sep 17 00:00:00 2001 From: Jakob McBroome Date: Wed, 29 Jun 2022 18:33:05 -0700 Subject: [PATCH 04/11] reenable introduction computation --- data/master_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/master_backend.py b/data/master_backend.py index ac08842b..08a698d4 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -41,7 +41,7 @@ def primary_pipeline(args): conversion = {} # print(conversion) print("Calling introduce.") - # subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) + subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) print("Updating map display data.") update_js(args.geojson, conversion) print("Generating top cluster tables.") From b3bcbd6d33735710f837044928e0ef027bfcfa1b Mon Sep 17 00:00:00 2001 From: Jakob McBroome Date: Thu, 30 Jun 2022 16:52:00 -0700 Subject: [PATCH 05/11] split link generation into two functions to support jsonl.gz or pb.gz --- data/generate_display_tables.py | 36 +++++++++++++++++++++------------ data/master_backend.py | 2 +- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/data/generate_display_tables.py b/data/generate_display_tables.py index 7047b4e8..12d552fb 100644 --- a/data/generate_display_tables.py +++ b/data/generate_display_tables.py @@ -54,6 +54,21 @@ def fix_month(datestr): header = "Cluster ID\tRegion\tSample Count\tEarliest Date\tLatest Date\tClade\tLineage\tInferred Origins\tInferred Origin Confidences\tGrowth Score\tClick to View" mout = open("cluster_labels.tsv","w+") print("sample\tcluster",file=mout) + def generate_v1_link(cn): + link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension + link += '&search=[{"id":0.123,"category":"cluster","value":"' + link += cn + link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]' + link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}' + link += "&zoomToSearch=0&blinking=false" + return link + def generate_v2_link(cn): + link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension + link += '&srch=[{"key":"aa1","type":"meta_cluster","method":"text_match","text":"' + link += cn + link += '","gene":"S","position":484,"new_residue":"any","min_tips":0,"controls":true}]' + link += "&zoomToSearch=0" + return link for reg, lines in filelines.items(): with open("display_tables/" + conversion[reg] + "_topclusters.tsv", "w+") as outf: print(header,file=outf) @@ -69,12 +84,10 @@ def fix_month(datestr): #generate a link to exist in the last column #based on the global "host" variable. #and including all html syntax. - link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension - link += '&search=[{"id":0.123,"category":"cluster","value":"' - link += spent[0] - link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]' - link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}' - link += "&zoomToSearch=0&blinking=false" + if extension=="pb.gz": + link = generate_v1_link(spent[0]) + else: + link = generate_v2_link(spent[0]) #additionally process the date strings outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link] print("\t".join(outline),file=outf) @@ -84,13 +97,10 @@ def fix_month(datestr): with open("display_tables/default_clusters.tsv","w+") as outf: print(header,file=outf) for gv,dl in sorted_defaults: - spent = dl.split("\t") - link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension - link += '&search=[{"id":0.123,"category":"cluster","value":"' - link += spent[0] - link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]' - link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}' - link += "&zoomToSearch=0&blinking=false" + if extension=="pb.gz": + link = generate_v1_link(dl.split("\t")[0]) + else: + link = generate_v2_link(dl.split("\t")[0]) outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link] print("\t".join(outline), file = outf) stateconv = {"AL":"Alabama","AK":"Alaska","AR":"Arkansas","AZ":"Arizona","CA":"California","CO":"Colorado", diff --git a/data/master_backend.py b/data/master_backend.py index 08a698d4..ac08842b 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -41,7 +41,7 @@ def primary_pipeline(args): conversion = {} # print(conversion) print("Calling introduce.") - subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) + # subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) print("Updating map display data.") update_js(args.geojson, conversion) print("Generating top cluster tables.") From a2e3d40be5449b7402dc1268bef0219b22e1bce3 Mon Sep 17 00:00:00 2001 From: jmcbroome Date: Thu, 30 Jun 2022 17:08:40 -0700 Subject: [PATCH 06/11] reenable calculation --- data/master_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/master_backend.py b/data/master_backend.py index ac08842b..08a698d4 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -41,7 +41,7 @@ def primary_pipeline(args): conversion = {} # print(conversion) print("Calling introduce.") - # subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) + subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) print("Updating map display data.") update_js(args.geojson, conversion) print("Generating top cluster tables.") From cb3fe939d3834fe4550181624cb665bf6ea24441 Mon Sep 17 00:00:00 2001 From: Jakob McBroome Date: Thu, 30 Jun 2022 18:18:03 -0700 Subject: [PATCH 07/11] reenable introduction running --- data/master_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/master_backend.py b/data/master_backend.py index ac08842b..08a698d4 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -41,7 +41,7 @@ def primary_pipeline(args): conversion = {} # print(conversion) print("Calling introduce.") - # subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) + subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) print("Updating map display data.") update_js(args.geojson, conversion) print("Generating top cluster tables.") From 38841cd7e976918789944b9ebe90be6aefa73f7e Mon Sep 17 00:00:00 2001 From: jmcbroome Date: Sat, 2 Jul 2022 20:58:13 -0700 Subject: [PATCH 08/11] fix merge conflict --- .../generate_display_tables.cpython-38.pyc | Bin 3835 -> 4482 bytes .../__pycache__/master_backend.cpython-38.pyc | Bin 2965 -> 3337 bytes data/__pycache__/update_js.cpython-38.pyc | Bin 4735 -> 4633 bytes data/master_backend.py | 2 +- 4 files changed, 1 insertion(+), 1 deletion(-) diff --git a/data/__pycache__/generate_display_tables.cpython-38.pyc b/data/__pycache__/generate_display_tables.cpython-38.pyc index ed662bd8749fa5cdd1cb6d46a6ac0aea034d77cf..24df6725becdf09e23de5f9c1a5396c419e8910f 100644 GIT binary patch delta 2105 zcmc&#&2Jk;6rUNd*Xz$X&WG)!=|@^pT#=7fMJ**MYKcUt)JQa~s3NM=@vM`r?OkWQ z32iisrKX%v)l_@oPzk#SR3r|dO634Thy#BDhdqJBr58BBfsYHnT{|DD5FFXnyqS6L z&3nK1o2T8sM(!L=T}>uq1W)ysKZP1>fE=n6kmVdAk4JtzFtdyEo3Jx>ZVuA*LOmYAQS`wU}FVZ4sf zcu(<@Ih|C*2`v*x!3blhHsPU7y2b8~IT5Zn8gx+^nI&8Fx^!YP)}n0!IN>&R;xf+F zJmC?5xV$Lq{bDvNttjo^E}L2B_zJKL$Kn=Cgsrz z5l=h2mLj95C%S9Rk@s1i_%#P`5Cf@VNF7Ja-(<8E?f~;Wq}tgv+r(Dkmhi)unQvO1 z3PJ`NKj9V?IUio0>AQN$EZHMPzbL?y9} z%4GNO_Ub781vuGGlpCvqpfkXFyb~1KC_Q*{4R4<@>@AS#O zJq?$~WidVkR{Lc??E9SooEhNZtlxfsi#f>Jq^NlHTRto)!tcuxXHX9EkyF9kaqX=6 z{_cFBJ0A?@AQ=2WI3%i;AG_W5I$j(y0^Srm$A+!rB;jihOfuNSKZALm6| zf2O!!X(Q(~SR~;z?`LIm!v7+QgAkukkKPJa9dQUcPn)h5YfHR%_5^mnVvvo3`*|$u zo{VPtt`wLTs_3`9h|t${(a^=P`=R=K!7kpiOk=G!VTt9**XqKut*fFsvyldj;$&&v z(BGY?FHJ0OxKATT)DC?2hsX^&=bqEDs?e7W%XAmD69@K>ZJ_)s*1TR6rInefF>qs6 z40Cy=T&y;9_hW6YAdUgmbGXGz(9Qe$afkvH6PVmf(XoSlo?_K?(+f9jVbsM4^j@e6 zg{t4Ob(6avM30A+h9zwG%jo3cVeo62esO^0DJxdPUWk^Bstz1_LyY=ild(gm7b3Q0 zFRhi!UdS-Z3#=qO)!*vE3+tvWZX+Sm+Fl6NEvR*?ZkXJowmTPV-gt;8gep{16i9_8 z0vV$ztQdwCyeidLnvQloQgjHm49&0(6oR#iqGYvP)L2es%>h z8Y+ZJ7+&!OmY%k_SSuRl;v(+h=tXGe3Sn{0w?rE@8q+7(Q`hQ#lX@x|c1>@%<aG7k50>}?A|#FoQ3>(Zc4!3$j_hjQ@4bB=``+(q z9!|d+i?7kMJ6zwdhI388PN`|?^!LiSG*04Lxk4%K9_1s6&h5t!i>->wKrpT^P#udbBlYs^I4y_(?r!H>q>iBBP~>hAH6_LsiO zHHzFRZdO_D{amRZbIH6oG%$PMx;vk{w^)&vwwK=K=i#rxk$HV$4tMYaJ_%+|Oo7(X z7pujcOu8?fPMwd1;YyX&g5}sd!GqYsbOy$D zYYkw^acgVFmQ!U_r^Y;1`aEmC*uPK^dR zt9eyG4#FzdHI=A<7B+Dlk6>b3Scf-(Q^dk4z%+a};Y|UKk^tMw_{OtqHc`ep`o_ji k(Wz~0T%S|;jj&7%Q6rNe{pVvQeoHTKgCNgP3Nz=py34u@$L{x=Cm4@FUOJLSJ<9KcFPiALH zYprFZZ~>__XobWH*(a_@Js@$adgOw{1!)glxX>G1dx1A=hxEp-_U)TDZ{FX0?|rlQ z#d7ieLcu`reE0ZA^U>_r#Xml1!OBJh%u$3+@cn~<6iL_7N7o0LC^N!iG?EA& zAiX+JBAG}_D8iAliy|dbMwv0{X4uBLTu9_TiQeAt(^-|DN| z+ifr4{WovZ=C)~BGzjcwk5+rO+pTi1%G`SBcH2#XWYn+VWVHn|C~7hfdY% zg#o9m`l(HicltNlocqBmwHh;zcMfgd4x3>>ndP}0Z0uN`GtF*IBx<{yc_g&B?YY~> zv>Di()*RDzYw>T&&4wrqOGyd1B^SF$~r(?DPyA~EY z(uJXX*>X2zAsbb0T5HWs*6;@YkXYNOdCLn@ANQ1$f9jOH_czO#pxb zKzMziM(W4_c;tu%@X#WS=u3!XyAu2Ir~@lwN$lmu7_>_ReX<|Pk-m$LzCG&PMlTN0 z+vpwip)jB2S#X;N2LfTjLG|1LT&FOf2h?RJX28VHU|@P4&Ps6d&w&C#7D(|#y8r9I zAo?JeT6%PDX&P#d=w0a)4_PzPMg=l6h8$C_lyW(ee+dn8P!nxbj3B$nAhV(#r}$|8 zERU{wirMGl6y%iVzJ$bS*GMguCpJY*qHL=BL#jJXAl2PEQ=a>OxA8-5ac@4oqM1&K zT!3n!MMM@B0q~t!;1*M^3|xgQO)qa*=o18uu(Hn<;Ns)HzI%BSCvpn>$^oMQE89Ih zNAv{A3-N)zu=WIJnSfB;nBu%)lgIzS!&G7GZ+-PrU7K#zRYng1$-;Qc?$Lf`zqXgi z`<@%GW}n9&$G5VT(v7o*xjIpfX?E9m4~{57oA!F~ID2h4mngRDhrF&kl$*rlW}@}z z9UHoc&4SxR@gZW)B*ANk)awMWlYtexD2OJAE=U$gQVdMT@6pD@VUVaM3?G*ya>ujX z#0bKs&pgrkiD9*=)m0jn36YpaWteB8$C8ZSY}F;F#d5H!O95e`l~_TLqENs=U1DI- z3wbNS_M~agJGTV!Iwrf@@NJ*MY@zFLD5wKLScTVssV2)X+om&4)+W>LRvt^x%gZqS z#8w_(6sj3HRWpDWYGv3h;(5uKQXO9b=>}%kL}YrSAqGmLv6*9)cr~}aa|Og{8Pfsr apO$Mnw9$Af@=5rylY2(X>ofYRI{p_ckdnLr delta 1296 zcmYjR&5zqe6rb@&>^L8rO|#AJidJUfu!~3*5J+fQ2|+7?XjxI{s#T<7IiAVJiDP?b zCfyP_${{BXv{Y8bHMuM|B*eeK-@w5)#EBa>q&@K_Nl88O%zJO1fAi+|-t+gnFY4vf zQprN_d~1K={#1Nk{_E%q(0nw;0wrjQpS?d;66H2}v_Do8b%y6?rVu>GV=d9ZS{dt! zMwA_t;6zV9!}gY%X;DnWOy`36F)VT5i*$-V+$#g{)#rz&oHBmc8+dJYc$@ORh~mQw z+UCBX=^yyxnsh^0!xlDi8QxvYDrrNxfqzNwC_fix+8lyQ@HgmS@3)Hj55}7*)}Q7! z;VV{4U+5ye%-ww?QP^VI04XcPF5}eUR1D(^82C0!2+%99j2f=u8raw_?0Hyvte!lq zlBQem@~1exXS_G*<%pV~9;zdv)q$&?0g_%v!UQ=##h~R@nzhgjV*a3DBs2oMuol3t zn5fgG4Sc{>0$r{eDaaM_pouamLLQEQX&o4Ej&n(FwlSZeSp}Fd5`&a>5GnT+_WSuI zXd93WZD~D5=a&!A2k0L98a+g!oG1W9#yUV&$mUdeb}}{>pr5Wl0h3fAxhvE}>*s#N z?5&r0fNYR!$1=R?WVV?YbDWr@CU13qwLWMqf#cTVK02^>9&L`U5AL|)RQLg3TkVE#k2Jf%?a@uUF|@lb zBVI%(alBy21yo&A@W*!J{=(~_eg6PMd3I?Kq|^NIN2Bep7=<#CCkgjSMEv2ve(ckW z=BT;?a00hYg9Qwe<We%uti7VX92_n8SPL;LysQh*^cy4<94T|3_46ULrdX=73B9tIhCY?77ayb*>~&a ztH31&F8j`j{g^@~=}mCuM+luj3+7;0xTt7~iI1T9itx&@`~eg}SMX~Jc&PGsP=qg!zn{S%< EKYSBDJ^%m! diff --git a/data/__pycache__/update_js.cpython-38.pyc b/data/__pycache__/update_js.cpython-38.pyc index cd183496d4d8dfaa5daaea0e5d854128483d43cb..ad6d81f85c0ad8b744d953e7a040cbc1efe55292 100644 GIT binary patch delta 2243 zcma)6&2Jk;6rb5$Z)|7n+K%I|d^ukwZcCH2G^OAcC>1A!fC?2XOmI9->%_60-F1Y@ ztPBLH1*xiGd*NmasiGh*aOc1csaHg`{{j;9zyT^D_|1Bq8kIwLH8bzMdG9yx{pRid zv+%`2_>E8~NbuBt|EokNJ_|nxABGmfkTHy`ouEhCoOTI|u;|AG677)-WS{mZB`n4% zi{%L$Go>xLtMrKJV{tZqgQm!kCGm(Q*|%I)EQKp9l_o5WHG_2m>m=4GtVyiXRmE!i z*o=F)A`Z4xmL1YY*zAzzXLCbZz*O1%KJCz(cdZ}~ULYM>{J|RKqur4n>H671k6Ien z+yoZ4r4H5OyZ1ST<#3v$i4}tMM4Gf&%-vkUrY5pO=n_K1Jj`-^+1-Cv)Xa{sqb$RY z-H@$A^<%1M*wc_>Ldqk!=MT z-r^xX!Xs(2Jp%16OZ7D#%9DZQ13VIa5{U-c@g8NFBsq=yghz<0UaX}1S<0qzl4k6* zJkvf`B|Pe)xHY3JL|!1pcdRimG6pu{JkDb{RnoU`YK+H$hgqD)G9G?hV7R$S`Ym|5 zn(OIF!`>Zf_vH@Vy|sJ)5^0|zuaVcun}jX97pM;U%o?u}D`Bc!Tz%3U?P@#;?o&M3 z4fP0mFlD8Aimjy4mBp{D49du`RitzR>3Jx{HSjrMO>!TfKp|6nvK#iiPQo+QlX%+; z0&`RBsU*1S$h#3P3E=J_o^jj#6whp{9a-N)@F{pE9)jdaeaVdQ^gb1PdE&|c@!)B} zla)r@fA*1)@*mbTpJuCVWtz};WR%}+)yk-&t`BE>Lxrk2F< z+S0Cak=!A=51TpJk&dr8blahOuECuiVRjas$5e5J8dS>DJuLIluKQ5oiNh#t{_KsadAjSzFaZALv;aAP>j|FyF&|dwd%2W zz)DeoyZjTCND{H@qusFDYO<19aeP&?vSZjGZFRNWXw^+e*)ke+N2xVR%<+|L4WpvZ z0l-m9&1R*}P`0T%%1+6=Qm{YNrpFIEs#(&JjuR|5>U$O4s5a{Mx7t;EEp)U!D?$~V zRMJIx`iu+J)GM3S_X_f+$qFN&YrU~oE{w8@VOHxU6Y=^A{0cIwvVspZSCLSoWo{OH zO})|d>YEpMqDUvuuVsD95dC7@#zbh4PSaF^`sKWDfrhl+*bCBxq|rFlB+aKu5y>wlFbClsp(z)^56=}}7BPc2h8_L<_gBO7 zvbu_F^fmj-=*p4v5DNjr1o46^$G^T_FYQ#;*N57r-_iEp(J5Kh>h@G@T@Lu^zn`7@ A)Bpeg delta 2246 zcma)6%WoS+7@yf)KeOKY6~~Sf=V7O@+cZw;BYn6nrIjl6Qc+ZaEG*=D)?s7EkKIjx zcD9yq1wo>!+pPpe9Bhe0dqYA(LR=Bz$d#o+{R0p}95_S>_-5B}L*am}nfYGxo8R}% zxAQRbS4O%P2>22Fwr9_mkHzmvJ5m<301O_6mNjGeV21>^5eXI$3F&AZ_qhSmc@id( zJ2;N^Sfc;3j60S!!4RlGVhKbhV2#6?fOQhqB&_4GPAP)nBGV*w8&TmR>SDu9W`CXuOe z>Ywc=+X(c91WF(y0MdS-5v*}NtWLI{fDZIv-{Jsw42eo4>nL`>WtKI`kpm=74&LF7 zeT^k%(LE3cS$3JkgAADtHHpD>u@g;weeVF-6feKd89^gNgCxX|wNL3j8mtLG;uZ~1 z4-F+5#8_90@4Lr7px^6NJfou8lh3ZS*f8Or`CI1`zllr zF3*V37!B7vV0{lx57QW&46}3UkYkLUnm`>p>~jdSRXcPj?G0z(J>0(4e)1|hhF(Tj z(QAktwj03h)DBu=LP17c7bt6wgg(|6X#y^Jk|z3r0m6vHj3k{TFC?JSrANjTBtAtt zM<9cvtTGNv={V#zZKSA+PD3U$G}V_Nj71%T5gTwr?Pvng8FG9A>fPhpOO#{4{e5(5 zfZ?cBI#qM`c=bL+uR>(}8Abs$vU-pvyO{Yp1vFIt5kiwBzKV<_Kt~;D;QZ8>rL((K zjJe&eDScy84vi>>=p0b7JF4^<_~wl?ou_diB&acx{d5<}ZTVAm+ zu{C?zxsbo6$*PuL->6(y^A}~UrZ-#px8&=ZqRZy0=ZF|$ucaF%H{#4K&%7!54yPHM zgTfxvXLxFmsccL5zVF=s2#ehJ&Ey$+1co*X6Erg_#SKg`K?L7M+o+G&lhVZ|*A^fz zjxjhfVvyrh3AS0d_Fpuj40gyKap*?Yyy=-fzFZ(mMXxkBT4!_E5^C~0n#vM+j4gkq z+1P@LD9wh0H_9k#Uiam2(frU?G?TtShe59~vZ8@?MBzcXXA`uy0j#T9Zryva_eOkBGhr< zWnK|)NUWe$g~*MHY+=RxPF(6NfX@;j$Fe#PqMBy9yQRui$e43kjwdiW8&DOA&*cAUjwPT(AifJK-2ePH|$FM`*Lqnsc6!56pVy$Mn-SCi diff --git a/data/master_backend.py b/data/master_backend.py index 08a698d4..0c4cb414 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -27,8 +27,8 @@ def parse_setup(): parser.add_argument("-t","--threads",type=int,help="Number of threads to use.", default = 4) parser.add_argument("-l","--lexicon",help="Optionally, link to a text file containing all names for the same region, one region per row, tab separated.", default = "") parser.add_argument("-X","--lookahead",type=int,help="Number to pass to parameter -X of introduce. Increase to merge nested clusters. Default 2", default = 2) - parser.add_argument("-H","--host",help="Web-accessible link to the current directory for taxodium cluster view.",default="https://raw.githubusercontent.com/jmcbroome/introduction-website/main/") parser.add_argument("-V","--taxversion",action='store_true',help="Export the view in Taxonium 2.0 jsonl format instead of taxonium protobuf. Requires the installation of taxoniumtools and adds some compute time.") + parser.add_argument("-H","--host",help="Web-accessible link to the current directory for taxodium cluster view.",default="https://clustertracker.gi.ucsc.edu/") args = parser.parse_args() return args From bbb1eebff0609dd792653249ecb40589c75334aa Mon Sep 17 00:00:00 2001 From: jmcbroome Date: Mon, 11 Jul 2022 19:28:10 -0700 Subject: [PATCH 09/11] add option to skip cluster inference and take currently available cluster information instead --- data/master_backend.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/data/master_backend.py b/data/master_backend.py index 0c4cb414..9d865fa1 100644 --- a/data/master_backend.py +++ b/data/master_backend.py @@ -29,6 +29,7 @@ def parse_setup(): parser.add_argument("-X","--lookahead",type=int,help="Number to pass to parameter -X of introduce. Increase to merge nested clusters. Default 2", default = 2) parser.add_argument("-V","--taxversion",action='store_true',help="Export the view in Taxonium 2.0 jsonl format instead of taxonium protobuf. Requires the installation of taxoniumtools and adds some compute time.") parser.add_argument("-H","--host",help="Web-accessible link to the current directory for taxodium cluster view.",default="https://clustertracker.gi.ucsc.edu/") + parser.add_argument("-S","--skip",action='store_true',help="Use to skip inference of introductions and go straight to preparing the data for display. hardcoded_clusters.tsv must already exist.") args = parser.parse_args() return args @@ -40,8 +41,11 @@ def primary_pipeline(args): else: conversion = {} # print(conversion) - print("Calling introduce.") - subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) + if not args.skip: + print("Calling introduce.") + subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True) + else: + print("Skipping introduction inference.") print("Updating map display data.") update_js(args.geojson, conversion) print("Generating top cluster tables.") From 7ba7617fcc975d1941daa8aeca11315140cb8d5e Mon Sep 17 00:00:00 2001 From: jmcbroome Date: Mon, 11 Jul 2022 19:50:47 -0700 Subject: [PATCH 10/11] remove unnecessary submodule listing --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .gitmodules diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index f597bfc3..00000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "taxonium"] - path = taxonium - url = https://github.com/theosanderson/taxonium From f7b945d52aa128bfc4677e178b72ae5037aa3326 Mon Sep 17 00:00:00 2001 From: jmcbroome Date: Mon, 11 Jul 2022 20:04:23 -0700 Subject: [PATCH 11/11] fix bug causing failure to generate default table display --- data/generate_display_tables.py | 10 +++++----- taxonium | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) delete mode 160000 taxonium diff --git a/data/generate_display_tables.py b/data/generate_display_tables.py index 12d552fb..d37d935e 100644 --- a/data/generate_display_tables.py +++ b/data/generate_display_tables.py @@ -1,4 +1,4 @@ -def generate_display_tables(conversion = {}, host = "https://raw.githubusercontent.com/jmcbroome/introduction-website/main/", extension = ".pb.gz"): +def generate_display_tables(conversion = {}, host = "https://clustertracker.gi.ucsc.edu/", extension = ".jsonl.gz"): filelines = {} def fix_month(datestr): monthswap = {"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12"} @@ -91,16 +91,16 @@ def generate_v2_link(cn): #additionally process the date strings outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link] print("\t".join(outline),file=outf) - mout.close() sorted_defaults = sorted(list(zip(default_growthvs,default_lines)),key=lambda x:-x[0]) with open("display_tables/default_clusters.tsv","w+") as outf: print(header,file=outf) for gv,dl in sorted_defaults: + spent = dl.split("\t") if extension=="pb.gz": - link = generate_v1_link(dl.split("\t")[0]) + link = generate_v1_link(spent[0]) else: - link = generate_v2_link(dl.split("\t")[0]) + link = generate_v2_link(spent[0]) outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link] print("\t".join(outline), file = outf) stateconv = {"AL":"Alabama","AK":"Alaska","AR":"Arkansas","AZ":"Arizona","CA":"California","CO":"Colorado", @@ -113,4 +113,4 @@ def generate_v2_link(cn): "WA":"Washington","WV":"West Virginia","WI":"Wisconsin","WY":"Wyoming","PR":"Puerto Rico"} stateconv.update({v:v for v in stateconv.values()}) if __name__ == "__main__": - generate_display_tables(stateconv, host = "https://raw.githubusercontent.com/jmcbroome/introduction-website/main/") + generate_display_tables(stateconv, host = "https://clustertracker.gi.ucsc.edu/", extension=".jsonl.gz") diff --git a/taxonium b/taxonium deleted file mode 160000 index c958d09f..00000000 --- a/taxonium +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c958d09f792af7241966471c07af3a04ebfe6f2b