-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from pathogen-genomics/main-new-grid-tool
Rewrite table display to use SlickGrid and display all data instead of a limited subset
- Loading branch information
Showing
1,074 changed files
with
762,014 additions
and
19,738 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
@import url('../lib/slickgrid/slick-default-theme.css'); | ||
|
||
#myGrid, #pager, #searchbox, div.ui-dialog { | ||
font-family: arial; | ||
font-size: 8pt; | ||
} | ||
div.ui-dialog {line-height: normal;} | ||
|
||
#myGrid { | ||
background: white; | ||
outline: 0; | ||
border: 1px solid gray; | ||
} | ||
|
||
#searchbox { | ||
float: right; | ||
text-align: right; | ||
} | ||
#txtSearch {border: 1px solid gray;} | ||
|
||
#pager {margin-bottom: 6px;} | ||
|
||
|
||
/* header height */ | ||
.slick-column-name { white-space: normal; } | ||
.slick-header-column.ui-state-default { height: 45px; } | ||
|
||
/* cell copy color */ | ||
.slick-cell.copied { | ||
background: blue; | ||
background: rgba(0, 0, 255, 0.2); | ||
-webkit-transition: 0.5s background; | ||
} | ||
|
||
/* sample IDs popup */ | ||
#sample-popup.ui-dialog-content { | ||
width: 300px; | ||
overflow: auto; | ||
} | ||
|
||
|
||
|
||
.options-panel li { | ||
/background: url("../lib/slickgrid/images/arrow_right_spearmint.png") no-repeat center left; | ||
padding: 0 0 0 14px; | ||
|
||
list-style: none; | ||
margin: 0; | ||
} | ||
|
||
|
||
.grid-header { | ||
border: 1px solid gray; | ||
border-bottom: 0; | ||
border-top: 0; | ||
background: url('../lib/slickgrid/images/header-bg.gif') repeat-x center top; | ||
color: black; | ||
height: 24px; | ||
line-height: 24px; | ||
} | ||
|
||
.grid-header label { | ||
display: inline-block; | ||
font-weight: bold; | ||
margin: auto auto auto 6px; | ||
} | ||
|
||
.grid-header .ui-icon { | ||
margin: 4px 4px auto 6px; | ||
background-color: transparent; | ||
border-color: transparent; | ||
} | ||
|
||
.grid-header .ui-icon.ui-state-hover { | ||
background-color: white; | ||
} | ||
|
||
.grid-header #txtSearch { | ||
margin: 0 4px 0 4px; | ||
padding: 2px 2px; | ||
-moz-border-radius: 2px; | ||
-webkit-border-radius: 2px; | ||
border: 1px solid silver; | ||
} | ||
|
||
.options-panel { | ||
-moz-border-radius: 6px; | ||
-webkit-border-radius: 6px; | ||
border: 1px solid silver; | ||
background: #f0f0f0; | ||
padding: 4px; | ||
margin-bottom: 20px; | ||
width: 320px; | ||
position: absolute; | ||
top: 0px; | ||
left: 650px; | ||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,116 +1,61 @@ | ||
def generate_display_tables(conversion = {}, host = "https://clustertracker.gi.ucsc.edu/", extension = ".jsonl.gz"): | ||
filelines = {} | ||
import json, gzip | ||
|
||
def generate_display_tables(): | ||
#function to convert date format from YYYY-Mon-DD to YYYY-MM-DD | ||
def fix_month(datestr): | ||
monthswap = {"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12"} | ||
splitr = datestr.split("-") | ||
return splitr[0] + "-" + monthswap.get(splitr[1],splitr[1]) + "-" + splitr[2] | ||
default_growthvs = [] | ||
default_lines = [] | ||
totbuff = [] #track overall no valid date clusters to fill in at the end. | ||
# function to add quotes around a variable for JSON formatting | ||
def addq(item): | ||
return "\"" + item + "\"" | ||
|
||
# get clusters data and put into array | ||
cluster_data = [] | ||
bad_date_data = [] | ||
with open("hardcoded_clusters.tsv") as inf: | ||
cr = "None" | ||
buffer = [] | ||
for entry in inf: | ||
spent = entry.strip().split("\t") | ||
if spent[0] == "cluster_id": | ||
continue | ||
reg = conversion[spent[9]] | ||
if reg not in filelines: | ||
filelines[reg] = [] | ||
if cr == "None": | ||
cr = reg | ||
elif reg != cr: | ||
#when moving to a new region | ||
if len(filelines[cr]) < 100: | ||
filelines[cr].extend(buffer[:100-len(filelines[cr])]) | ||
buffer = [] | ||
cr = reg | ||
if spent[3] == "no-valid-date": | ||
buffer.append(entry.strip()) | ||
totbuff.append((entry.strip(), float(spent[4]))) | ||
continue | ||
if len(filelines[reg]) < 100: | ||
filelines[reg].append(entry.strip()) | ||
#now, check to see if this scores in the top 100 overall. Significantly more complicated since we have to sort things out as we go here. | ||
if len(default_lines) < 100: | ||
default_growthvs.append(float(spent[4])) | ||
default_lines.append(entry.strip()) | ||
elif float(spent[4]) > min(default_growthvs): | ||
popind = default_growthvs.index(min(default_growthvs)) | ||
default_growthvs.pop(popind) | ||
default_lines.pop(popind) | ||
default_growthvs.append(float(spent[4])) | ||
default_lines.append(entry.strip()) | ||
assert len(default_lines) == 100 | ||
#remove any remaining buffer for the last region in the file as well. | ||
if len(filelines[cr]) < 100: | ||
filelines[cr].extend(buffer[:100-len(filelines[cr])]) | ||
#and if there are less than 100 clusters with dates for the default view, extend that as well. | ||
if len(default_lines) < 100: | ||
totbuff.sort(key = lambda x: x[1], reverse = True) | ||
for t in totbuff[:100-len(default_lines)]: | ||
default_lines.append(t[0]) | ||
default_growthvs.append(0-1/t[1]) | ||
header = "Cluster ID\tRegion\tSample Count\tEarliest Date\tLatest Date\tClade\tLineage\tInferred Origins\tInferred Origin Confidences\tGrowth Score\tClick to View" | ||
mout = open("cluster_labels.tsv","w+") | ||
print("sample\tcluster",file=mout) | ||
def generate_v1_link(cn): | ||
link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension | ||
link += '&search=[{"id":0.123,"category":"cluster","value":"' | ||
link += cn | ||
link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]' | ||
link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}' | ||
link += "&zoomToSearch=0&blinking=false" | ||
return link | ||
def generate_v2_link(cn): | ||
link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension | ||
link += '&srch=[{"key":"aa1","type":"meta_cluster","method":"text_match","text":"' | ||
link += cn | ||
link += '","gene":"S","position":484,"new_residue":"any","min_tips":0,"controls":true}]' | ||
link += "&zoomToSearch=0" | ||
return link | ||
for reg, lines in filelines.items(): | ||
with open("display_tables/" + conversion[reg] + "_topclusters.tsv", "w+") as outf: | ||
print(header,file=outf) | ||
for l in lines: | ||
#process the line | ||
#into something more parseable. | ||
spent = l.split("\t") | ||
#save matching results to the other output files | ||
#for downstream extraction of json | ||
samples = spent[-1].split(",") | ||
for s in samples: | ||
print(s + "\t" + spent[0],file=mout) | ||
#generate a link to exist in the last column | ||
#based on the global "host" variable. | ||
#and including all html syntax. | ||
if extension=="pb.gz": | ||
link = generate_v1_link(spent[0]) | ||
else: | ||
link = generate_v2_link(spent[0]) | ||
#additionally process the date strings | ||
outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link] | ||
print("\t".join(outline),file=outf) | ||
mout.close() | ||
sorted_defaults = sorted(list(zip(default_growthvs,default_lines)),key=lambda x:-x[0]) | ||
with open("display_tables/default_clusters.tsv","w+") as outf: | ||
print(header,file=outf) | ||
for gv,dl in sorted_defaults: | ||
spent = dl.split("\t") | ||
if extension=="pb.gz": | ||
link = generate_v1_link(spent[0]) | ||
if spent[2] == "no-valid-date" and spent[3] == "no-valid-date": | ||
bad_date_data.append(spent) | ||
else: | ||
link = generate_v2_link(spent[0]) | ||
outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link] | ||
print("\t".join(outline), file = outf) | ||
stateconv = {"AL":"Alabama","AK":"Alaska","AR":"Arkansas","AZ":"Arizona","CA":"California","CO":"Colorado", | ||
"CT":"Connecticut","DE":"Delaware","DC":"District of Columbia","FL":"Florida","GA":"Georgia","HI":"Hawaii", | ||
"ID":"Idaho","IL":"Illinois","IN":"Indiana","IA":"Iowa","KS":"Kansas","KY":"Kentucky","LA":"Louisiana","ME":"Maine", | ||
"MD":"Maryland","MA":"Massachusetts","MI":"Michigan","MN":"Minnesota","MS":"Mississippi","MO":"Missouri","MT":"Montana", | ||
"NE":"Nebraska","NV":"Nevada","NH":"New Hampshire","NJ":"New Jersey","NM":"New Mexico","NY":"New York","NC":"North Carolina", | ||
"ND":"North Dakota","OH":"Ohio","OK":"Oklahoma","OR":"Oregon","PA":"Pennsylvania","RI":"Rhode Island", | ||
"SC":"South Carolina","SD":"South Dakota","TN":"Tennessee","TX":"Texas","UT":"Utah","VT":"Vermont","VA":"Virginia", | ||
"WA":"Washington","WV":"West Virginia","WI":"Wisconsin","WY":"Wyoming","PR":"Puerto Rico"} | ||
stateconv.update({v:v for v in stateconv.values()}) | ||
#fix date format | ||
spent[2] = fix_month(spent[2]) | ||
spent[3] = fix_month(spent[3]) | ||
cluster_data.append(spent) | ||
|
||
#now, sort by growth score | ||
cluster_data.sort(key = lambda x: x[4], reverse = True) | ||
# sort clusters with no-valid-date by growth score and append to cluster_data at the end | ||
bad_date_data.sort(key = lambda x: x[4], reverse = True) | ||
cluster_data.extend(bad_date_data) | ||
|
||
#output data to be compatible with parse.JSON | ||
# -create as compact a string as possible, | ||
# -only add quotes to items that are strings to save space | ||
txt_data = "[" | ||
txt_samples = "[" | ||
for i, d in enumerate(cluster_data): | ||
outline_data = [addq(d[0]), addq(d[9]), d[1], addq(d[2]), addq(d[3]), addq(d[12]), addq(d[13]), addq(d[10]), d[11], d[4]] | ||
outline_samples = [addq(d[15])] | ||
txt_data += "[" + ",".join(outline_data) + "]" | ||
txt_samples += "[" + ",".join(outline_samples) + "]" | ||
if i == len(cluster_data)-1: | ||
txt_data += "]" | ||
txt_samples += "]" | ||
else: | ||
txt_data += "," | ||
txt_samples += "," | ||
|
||
#now write data to file, and gzip for quicker loading into browser | ||
#basic cluster data (no samples) | ||
with gzip.open("cluster_data.json.gz", "wb") as f: | ||
f.write(txt_data.encode()) | ||
#sample names for each cluster | ||
with gzip.open("sample_data.json.gz", "wb") as f: | ||
f.write(txt_samples.encode()) | ||
|
||
if __name__ == "__main__": | ||
generate_display_tables(stateconv, host = "https://clustertracker.gi.ucsc.edu/", extension=".jsonl.gz") | ||
generate_display_tables() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.