Skip to content

Commit

Permalink
Merge pull request #8 from jmcbroome/taxonium_2
Browse files Browse the repository at this point in the history
Update to Taxonium 2.0
  • Loading branch information
jmcbroome authored Jul 25, 2022
2 parents 8d84b6a + f7b945d commit 7287e76
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 29 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
**/display_tables
data/cview.pb
data/cview.pb.gz
data/cview.jsonl.gz
data/regions.js
data/us-states.geo.json
data/europe.geo.json
data/hardcoded_clusters.tsv
data/NC_045512v2.fa
data/ncbiGenes.gtf
data/clean.pb
data/cluster_labels.tsv
data/cluster_labels.tsv
Binary file modified data/__pycache__/generate_display_tables.cpython-38.pyc
Binary file not shown.
Binary file modified data/__pycache__/master_backend.cpython-38.pyc
Binary file not shown.
Binary file modified data/__pycache__/update_js.cpython-38.pyc
Binary file not shown.
40 changes: 25 additions & 15 deletions data/generate_display_tables.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def generate_display_tables(conversion = {}, host = "https://raw.githubusercontent.com/jmcbroome/introduction-website/main/"):
def generate_display_tables(conversion = {}, host = "https://clustertracker.gi.ucsc.edu/", extension = ".jsonl.gz"):
filelines = {}
def fix_month(datestr):
monthswap = {"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12"}
Expand Down Expand Up @@ -54,6 +54,21 @@ def fix_month(datestr):
header = "Cluster ID\tRegion\tSample Count\tEarliest Date\tLatest Date\tClade\tLineage\tInferred Origins\tInferred Origin Confidences\tGrowth Score\tClick to View"
mout = open("cluster_labels.tsv","w+")
print("sample\tcluster",file=mout)
def generate_v1_link(cn):
link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension
link += '&search=[{"id":0.123,"category":"cluster","value":"'
link += cn
link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]'
link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}'
link += "&zoomToSearch=0&blinking=false"
return link
def generate_v2_link(cn):
link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension
link += '&srch=[{"key":"aa1","type":"meta_cluster","method":"text_match","text":"'
link += cn
link += '","gene":"S","position":484,"new_residue":"any","min_tips":0,"controls":true}]'
link += "&zoomToSearch=0"
return link
for reg, lines in filelines.items():
with open("display_tables/" + conversion[reg] + "_topclusters.tsv", "w+") as outf:
print(header,file=outf)
Expand All @@ -69,28 +84,23 @@ def fix_month(datestr):
#generate a link to exist in the last column
#based on the global "host" variable.
#and including all html syntax.
link = "https://taxonium.org/?protoUrl=" + host + "data/cview.pb.gz"
link += '&search=[{"id":0.123,"category":"cluster","value":"'
link += spent[0]
link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]'
link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}'
link += "&zoomToSearch=0&blinking=false"
if extension=="pb.gz":
link = generate_v1_link(spent[0])
else:
link = generate_v2_link(spent[0])
#additionally process the date strings
outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link]
print("\t".join(outline),file=outf)

mout.close()
sorted_defaults = sorted(list(zip(default_growthvs,default_lines)),key=lambda x:-x[0])
with open("display_tables/default_clusters.tsv","w+") as outf:
print(header,file=outf)
for gv,dl in sorted_defaults:
spent = dl.split("\t")
link = "https://taxonium.org/?protoUrl=" + host + "data/cview.pb.gz"
link += '&search=[{"id":0.123,"category":"cluster","value":"'
link += spent[0]
link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]'
link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}'
link += "&zoomToSearch=0&blinking=false"
if extension=="pb.gz":
link = generate_v1_link(spent[0])
else:
link = generate_v2_link(spent[0])
outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link]
print("\t".join(outline), file = outf)
stateconv = {"AL":"Alabama","AK":"Alaska","AR":"Arkansas","AZ":"Arizona","CA":"California","CO":"Colorado",
Expand All @@ -103,4 +113,4 @@ def fix_month(datestr):
"WA":"Washington","WV":"West Virginia","WI":"Wisconsin","WY":"Wyoming","PR":"Puerto Rico"}
stateconv.update({v:v for v in stateconv.values()})
if __name__ == "__main__":
generate_display_tables(stateconv, host = "https://raw.githubusercontent.com/jmcbroome/introduction-website/main/")
generate_display_tables(stateconv, host = "https://clustertracker.gi.ucsc.edu/", extension=".jsonl.gz")
29 changes: 16 additions & 13 deletions data/master_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ def parse_setup():
parser.add_argument("-t","--threads",type=int,help="Number of threads to use.", default = 4)
parser.add_argument("-l","--lexicon",help="Optionally, link to a text file containing all names for the same region, one region per row, tab separated.", default = "")
parser.add_argument("-X","--lookahead",type=int,help="Number to pass to parameter -X of introduce. Increase to merge nested clusters. Default 2", default = 2)
parser.add_argument("-H","--host",help="Web-accessible link to the current directory for taxodium cluster view.",default="https://raw.githubusercontent.com/jmcbroome/introduction-website/main/")
parser.add_argument("-V","--taxversion",action='store_true',help="Export the view in Taxonium 2.0 jsonl format instead of taxonium protobuf. Requires the installation of taxoniumtools and adds some compute time.")
parser.add_argument("-H","--host",help="Web-accessible link to the current directory for taxodium cluster view.",default="https://clustertracker.gi.ucsc.edu/")
parser.add_argument("-S","--skip",action='store_true',help="Use to skip inference of introductions and go straight to preparing the data for display. hardcoded_clusters.tsv must already exist.")
args = parser.parse_args()
return args

Expand All @@ -39,20 +41,17 @@ def primary_pipeline(args):
else:
conversion = {}
# print(conversion)
print("Calling introduce.")
subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True)
if not args.skip:
print("Calling introduce.")
subprocess.check_call("matUtils introduce -i " + args.input + " -s " + args.sample_regions + " -u hardcoded_clusters.tsv -T " + str(args.threads) + " -X " + str(args.lookahead), shell=True)
else:
print("Skipping introduction inference.")
print("Updating map display data.")
update_js(args.geojson, conversion)
print("Generating top cluster tables.")
generate_display_tables(conversion, host = args.host)
print("Generating top cluster tables.")
generate_display_tables(conversion, host = args.host, extension = ".jsonl.gz" if args.taxversion else ".pb.gz")
print("Preparing taxodium view.")
sd = {}
# with open("cluster_labels.tsv") as inf:
# for entry in inf:
# spent = entry.strip().split()
# if spent[0] == "sample":
# continue
# sd[spent[0]] = spent[1]
with open("hardcoded_clusters.tsv") as inf:
for entry in inf:
spent = entry.strip().split('\t')
Expand Down Expand Up @@ -88,8 +87,12 @@ def primary_pipeline(args):
spent.append("None")
i += 1
print("\t".join(spent),file=outf)
print("Generating viewable pb.")
subprocess.check_call("matUtils extract -i " + args.input + " -M clusterswapped.tsv -F cluster,region --write-taxodium cview.pb --title Cluster-Tracker -g " + args.annotation + " -f " + args.reference,shell=True)
if not args.taxversion:
print("Generating viewable pb.")
subprocess.check_call("matUtils extract -i " + args.input + " -M clusterswapped.tsv -F cluster,region --write-taxodium cview.pb --title Cluster-Tracker -g " + args.annotation + " -f " + args.reference,shell=True)
else:
print("Generating viewable jsonl.")
subprocess.check_call("usher_to_taxonium -i " + args.input + " -m clusterswapped.tsv -c cluster,region -o cview.jsonl.gz --title Cluster-Tracker",shell=True)
print("Process completed; check website for results.")

if __name__ == "__main__":
Expand Down

0 comments on commit 7287e76

Please sign in to comment.