Skip to content

Commit

Permalink
Merge pull request #9 from pathogen-genomics/main-new-grid-tool
Browse files Browse the repository at this point in the history
Rewrite table display to use SlickGrid and display all data instead of a limited subset
  • Loading branch information
jmcbroome authored Sep 7, 2022
2 parents 7287e76 + 71f4632 commit 04f14f0
Show file tree
Hide file tree
Showing 1,074 changed files with 762,014 additions and 19,738 deletions.
23 changes: 11 additions & 12 deletions css/custom.css
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,22 @@ DePaul IHS
}
@font-face {
font-family: 'ColaborateRegRegular';
src: url('http://www.housingstudies.org/static/css/fonts/ColabReg-webfont.eot');
src: url('http://www.housingstudies.org/static/css/fonts/ColabReg-webfont.eot?#iefix') format('embedded-opentype'),
url('http://www.housingstudies.org/static/css/fonts/ColabReg-webfont.woff') format('woff'),
url('http://www.housingstudies.org/static/css/fonts/ColabReg-webfont.ttf') format('truetype'),
url('http://www.housingstudies.org/static/css/fonts/ColabReg-webfont.svg#ColaborateRegRegular') format('svg');
src: url('https://www.housingstudies.org/static/css/fonts/ColabReg-webfont.eot');
src: url('https://www.housingstudies.org/static/css/fonts/ColabReg-webfont.eot?#iefix') format('embedded-opentype'),
url('https://www.housingstudies.org/static/css/fonts/ColabReg-webfont.woff') format('woff'),
url('https://www.housingstudies.org/static/css/fonts/ColabReg-webfont.ttf') format('truetype'),
url('https://www.housingstudies.org/static/css/fonts/ColabReg-webfont.svg#ColaborateRegRegular') format('svg');
font-weight: normal;
font-style: normal;

}
@font-face {
font-family: 'AlegreyaBold';
src: url('http://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.eot');
src: url('http://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.eot?#iefix') format('embedded-opentype'),
url('http://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.woff') format('woff'),
url('http://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.ttf') format('truetype'),
url('http://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.svg#AlegreyaBold') format('svg');
src: url('https://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.eot');
src: url('https://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.eot?#iefix') format('embedded-opentype'),
url('https://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.woff') format('woff'),
url('https://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.ttf') format('truetype'),
url('https://www.housingstudies.org/static/css/fonts/Alegreya-Bold-webfont.svg#AlegreyaBold') format('svg');
font-weight: normal;
font-style: normal;
}
Expand Down Expand Up @@ -110,6 +110,5 @@ h2 {

#dropcolor {
position: absolute;
right: 0px;
padding-top: 14px;
right: 8px;
}
100 changes: 100 additions & 0 deletions css/gridstyles.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
@import url('../lib/slickgrid/slick-default-theme.css');

#myGrid, #pager, #searchbox, div.ui-dialog {
font-family: arial;
font-size: 8pt;
}
div.ui-dialog {line-height: normal;}

#myGrid {
background: white;
outline: 0;
border: 1px solid gray;
}

#searchbox {
float: right;
text-align: right;
}
#txtSearch {border: 1px solid gray;}

#pager {margin-bottom: 6px;}


/* header height */
.slick-column-name { white-space: normal; }
.slick-header-column.ui-state-default { height: 45px; }

/* cell copy color */
.slick-cell.copied {
background: blue;
background: rgba(0, 0, 255, 0.2);
-webkit-transition: 0.5s background;
}

/* sample IDs popup */
#sample-popup.ui-dialog-content {
width: 300px;
overflow: auto;
}



.options-panel li {
/background: url("../lib/slickgrid/images/arrow_right_spearmint.png") no-repeat center left;
padding: 0 0 0 14px;

list-style: none;
margin: 0;
}


.grid-header {
border: 1px solid gray;
border-bottom: 0;
border-top: 0;
background: url('../lib/slickgrid/images/header-bg.gif') repeat-x center top;
color: black;
height: 24px;
line-height: 24px;
}

.grid-header label {
display: inline-block;
font-weight: bold;
margin: auto auto auto 6px;
}

.grid-header .ui-icon {
margin: 4px 4px auto 6px;
background-color: transparent;
border-color: transparent;
}

.grid-header .ui-icon.ui-state-hover {
background-color: white;
}

.grid-header #txtSearch {
margin: 0 4px 0 4px;
padding: 2px 2px;
-moz-border-radius: 2px;
-webkit-border-radius: 2px;
border: 1px solid silver;
}

.options-panel {
-moz-border-radius: 6px;
-webkit-border-radius: 6px;
border: 1px solid silver;
background: #f0f0f0;
padding: 4px;
margin-bottom: 20px;
width: 320px;
position: absolute;
top: 0px;
left: 650px;
}



155 changes: 50 additions & 105 deletions data/generate_display_tables.py
Original file line number Diff line number Diff line change
@@ -1,116 +1,61 @@
def generate_display_tables(conversion = {}, host = "https://clustertracker.gi.ucsc.edu/", extension = ".jsonl.gz"):
filelines = {}
import json, gzip

def generate_display_tables():
#function to convert date format from YYYY-Mon-DD to YYYY-MM-DD
def fix_month(datestr):
monthswap = {"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06","Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12"}
splitr = datestr.split("-")
return splitr[0] + "-" + monthswap.get(splitr[1],splitr[1]) + "-" + splitr[2]
default_growthvs = []
default_lines = []
totbuff = [] #track overall no valid date clusters to fill in at the end.
# function to add quotes around a variable for JSON formatting
def addq(item):
return "\"" + item + "\""

# get clusters data and put into array
cluster_data = []
bad_date_data = []
with open("hardcoded_clusters.tsv") as inf:
cr = "None"
buffer = []
for entry in inf:
spent = entry.strip().split("\t")
if spent[0] == "cluster_id":
continue
reg = conversion[spent[9]]
if reg not in filelines:
filelines[reg] = []
if cr == "None":
cr = reg
elif reg != cr:
#when moving to a new region
if len(filelines[cr]) < 100:
filelines[cr].extend(buffer[:100-len(filelines[cr])])
buffer = []
cr = reg
if spent[3] == "no-valid-date":
buffer.append(entry.strip())
totbuff.append((entry.strip(), float(spent[4])))
continue
if len(filelines[reg]) < 100:
filelines[reg].append(entry.strip())
#now, check to see if this scores in the top 100 overall. Significantly more complicated since we have to sort things out as we go here.
if len(default_lines) < 100:
default_growthvs.append(float(spent[4]))
default_lines.append(entry.strip())
elif float(spent[4]) > min(default_growthvs):
popind = default_growthvs.index(min(default_growthvs))
default_growthvs.pop(popind)
default_lines.pop(popind)
default_growthvs.append(float(spent[4]))
default_lines.append(entry.strip())
assert len(default_lines) == 100
#remove any remaining buffer for the last region in the file as well.
if len(filelines[cr]) < 100:
filelines[cr].extend(buffer[:100-len(filelines[cr])])
#and if there are less than 100 clusters with dates for the default view, extend that as well.
if len(default_lines) < 100:
totbuff.sort(key = lambda x: x[1], reverse = True)
for t in totbuff[:100-len(default_lines)]:
default_lines.append(t[0])
default_growthvs.append(0-1/t[1])
header = "Cluster ID\tRegion\tSample Count\tEarliest Date\tLatest Date\tClade\tLineage\tInferred Origins\tInferred Origin Confidences\tGrowth Score\tClick to View"
mout = open("cluster_labels.tsv","w+")
print("sample\tcluster",file=mout)
def generate_v1_link(cn):
link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension
link += '&search=[{"id":0.123,"category":"cluster","value":"'
link += cn
link += '","enabled":true,"aa_final":"any","min_tips":1,"aa_gene":"S","search_for_ids":""}]'
link += '&colourBy={"variable":"region","gene":"S","colourLines":false,"residue":"681"}'
link += "&zoomToSearch=0&blinking=false"
return link
def generate_v2_link(cn):
link = "https://taxonium.org/?protoUrl=" + host + "data/cview" + extension
link += '&srch=[{"key":"aa1","type":"meta_cluster","method":"text_match","text":"'
link += cn
link += '","gene":"S","position":484,"new_residue":"any","min_tips":0,"controls":true}]'
link += "&zoomToSearch=0"
return link
for reg, lines in filelines.items():
with open("display_tables/" + conversion[reg] + "_topclusters.tsv", "w+") as outf:
print(header,file=outf)
for l in lines:
#process the line
#into something more parseable.
spent = l.split("\t")
#save matching results to the other output files
#for downstream extraction of json
samples = spent[-1].split(",")
for s in samples:
print(s + "\t" + spent[0],file=mout)
#generate a link to exist in the last column
#based on the global "host" variable.
#and including all html syntax.
if extension=="pb.gz":
link = generate_v1_link(spent[0])
else:
link = generate_v2_link(spent[0])
#additionally process the date strings
outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link]
print("\t".join(outline),file=outf)
mout.close()
sorted_defaults = sorted(list(zip(default_growthvs,default_lines)),key=lambda x:-x[0])
with open("display_tables/default_clusters.tsv","w+") as outf:
print(header,file=outf)
for gv,dl in sorted_defaults:
spent = dl.split("\t")
if extension=="pb.gz":
link = generate_v1_link(spent[0])
if spent[2] == "no-valid-date" and spent[3] == "no-valid-date":
bad_date_data.append(spent)
else:
link = generate_v2_link(spent[0])
outline = [spent[0], spent[9], spent[1], fix_month(spent[2]), fix_month(spent[3]), spent[12], spent[13], spent[10], spent[11], spent[4], link]
print("\t".join(outline), file = outf)
stateconv = {"AL":"Alabama","AK":"Alaska","AR":"Arkansas","AZ":"Arizona","CA":"California","CO":"Colorado",
"CT":"Connecticut","DE":"Delaware","DC":"District of Columbia","FL":"Florida","GA":"Georgia","HI":"Hawaii",
"ID":"Idaho","IL":"Illinois","IN":"Indiana","IA":"Iowa","KS":"Kansas","KY":"Kentucky","LA":"Louisiana","ME":"Maine",
"MD":"Maryland","MA":"Massachusetts","MI":"Michigan","MN":"Minnesota","MS":"Mississippi","MO":"Missouri","MT":"Montana",
"NE":"Nebraska","NV":"Nevada","NH":"New Hampshire","NJ":"New Jersey","NM":"New Mexico","NY":"New York","NC":"North Carolina",
"ND":"North Dakota","OH":"Ohio","OK":"Oklahoma","OR":"Oregon","PA":"Pennsylvania","RI":"Rhode Island",
"SC":"South Carolina","SD":"South Dakota","TN":"Tennessee","TX":"Texas","UT":"Utah","VT":"Vermont","VA":"Virginia",
"WA":"Washington","WV":"West Virginia","WI":"Wisconsin","WY":"Wyoming","PR":"Puerto Rico"}
stateconv.update({v:v for v in stateconv.values()})
#fix date format
spent[2] = fix_month(spent[2])
spent[3] = fix_month(spent[3])
cluster_data.append(spent)

#now, sort by growth score
cluster_data.sort(key = lambda x: x[4], reverse = True)
# sort clusters with no-valid-date by growth score and append to cluster_data at the end
bad_date_data.sort(key = lambda x: x[4], reverse = True)
cluster_data.extend(bad_date_data)

#output data to be compatible with parse.JSON
# -create as compact a string as possible,
# -only add quotes to items that are strings to save space
txt_data = "["
txt_samples = "["
for i, d in enumerate(cluster_data):
outline_data = [addq(d[0]), addq(d[9]), d[1], addq(d[2]), addq(d[3]), addq(d[12]), addq(d[13]), addq(d[10]), d[11], d[4]]
outline_samples = [addq(d[15])]
txt_data += "[" + ",".join(outline_data) + "]"
txt_samples += "[" + ",".join(outline_samples) + "]"
if i == len(cluster_data)-1:
txt_data += "]"
txt_samples += "]"
else:
txt_data += ","
txt_samples += ","

#now write data to file, and gzip for quicker loading into browser
#basic cluster data (no samples)
with gzip.open("cluster_data.json.gz", "wb") as f:
f.write(txt_data.encode())
#sample names for each cluster
with gzip.open("sample_data.json.gz", "wb") as f:
f.write(txt_samples.encode())

if __name__ == "__main__":
generate_display_tables(stateconv, host = "https://clustertracker.gi.ucsc.edu/", extension=".jsonl.gz")
generate_display_tables()
2 changes: 1 addition & 1 deletion data/master_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def primary_pipeline(args):
print("Updating map display data.")
update_js(args.geojson, conversion)
print("Generating top cluster tables.")
generate_display_tables(conversion, host = args.host, extension = ".jsonl.gz" if args.taxversion else ".pb.gz")
generate_display_tables()
print("Preparing taxodium view.")
sd = {}
with open("hardcoded_clusters.tsv") as inf:
Expand Down
Loading

0 comments on commit 04f14f0

Please sign in to comment.