Skip to content

Commit

Permalink
improved range search & fixed a bug in classification search when sea…
Browse files Browse the repository at this point in the history
…rching for heredicare variants
  • Loading branch information
MarvinDo committed Nov 3, 2023
1 parent d3ed644 commit b3e58be
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 38 deletions.
18 changes: 9 additions & 9 deletions src/common/db_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,15 +444,12 @@ def add_constraints_to_command(self, command, constraints, operator = 'AND'): #
return command

def preprocess_range(self, range_constraint):
parts = range_constraint.split(':')
if len(parts) != 2:
parts = range_constraint.split('-')
if len(parts) != 3:
return None, None, None
chr = parts[0]
positions = parts[1].split('-')
if len(positions) != 2:
return None, None, None
start = int(positions[0])
end = int(positions[1])
start = int(parts[1])
end = int(parts[2])
return chr, start, end

def convert_to_gene_id(self, string):
Expand Down Expand Up @@ -663,7 +660,8 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use
new_constraints_inner = new_constraints_inner + "SELECT variant_id FROM consensus_classification WHERE classification IN " + placeholders + " AND is_recent = 1"
actual_information += tuple(consensus_without_dash)
new_constraints = "variant.id IN (" + new_constraints_inner + ")"
postfix = self.add_constraints_to_command(postfix, new_constraints)
#postfix = self.add_constraints_to_command(postfix, new_constraints)
constraints_complete = new_constraints

if include_heredicare_consensus and len(consensus_without_dash) > 0:
heredicare_consensus = []
Expand All @@ -674,7 +672,9 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use
new_constraints = "variant.id IN (SELECT variant_id FROM variant_heredicare_annotation WHERE consensus_class IN " + placeholders1 + " AND variant_id NOT IN (SELECT variant_id FROM consensus_classification WHERE classification NOT IN " + placeholders2 + " AND is_recent = 1))"
actual_information += tuple(heredicare_consensus)
actual_information += tuple(consensus_without_dash)
postfix = self.add_constraints_to_command(postfix, new_constraints, 'OR')
constraints_complete = functions.enbrace(constraints_complete + " OR " + new_constraints)

postfix = self.add_constraints_to_command(postfix, constraints_complete)

if user is not None and len(user) > 0:
new_constraints_inner = ''
Expand Down
2 changes: 2 additions & 0 deletions src/frontend_celery/webapp/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ <h4>Changelog</h4>
<ul>
<li>Added option to include heredicare consensus classififcations in consensus classification search</li>
<li>Added BayesDEL annotation</li>
<li>Improved ClinVar review status & summary display</li>
<li>Improved range search. Each range search entry must now be of the format: chrom&lt;sep&gt;start&lt;sep&gt;end. With &lt;sep&gt; being either :, -, space or tabulator. The chromosome number can start with 'chr', but you can also choose to omit it.</li>
</ul>
Bugfixes:
<ul>
Expand Down
7 changes: 7 additions & 0 deletions src/frontend_celery/webapp/templates/macros.html
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,13 @@
>{{ request.args.get('hgvs', '') }}</textarea>
</div>

<div class="form-group sst">
<label for="external_ids" class="form-label">External IDs</label>
<textarea type="text" name="external_ids" id="external_ids"
placeholder="..." class="form-control"
>{{ request.args.get('external_ids', '') }}</textarea>
</div>

<div class="form-group sst">
{% set selected_consensus = request.args.getlist('consensus') %}
<label for="consensus-select" class="form-label">Consensus classification</label>
Expand Down
19 changes: 15 additions & 4 deletions src/frontend_celery/webapp/templates/variant/variant_base.html
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,23 @@ <h4 class="card-subcaption">HerediCare center classifications</h4>
<h4 class="card-subcaption">ClinVar classifications</h4>

<div class="ssl sst ssb">
<div>
<abbr title="The review status from ClinVar"><a class="dashed_underline">review status:</a></abbr>&nbsp;{{ clinvar.review_status }}
<div class="d-flex">
<abbr class="ssr width_small" title="The review status from ClinVar"><a class="dashed_underline">review status:</a></abbr>
<div>{{ clinvar.review_status }}</div>
</div>

<div>
<abbr title="A summary of the ClinVar interpretations. The numbers in brackets display the number of ClinVar submissions reporting the specific type of interpretation."><a class="dashed_underline">interpretation summary:</a></abbr>&nbsp;{{ clinvar.interpretation_summary }}
<div class="d-flex">
<abbr class="ssr width_small" title="A summary of the ClinVar interpretations. The numbers in brackets display the number of ClinVar submissions reporting the specific type of interpretation.">
<a class="dashed_underline">summary:</a>
</abbr>
<ul class="list-group list-group-horizontal-sm list-nopad-td list-nobackground" >
{% for item in clinvar.interpretation_summary.split('|') %}
<li class="list-group-item noboarder-td" >
{{ item.replace('(', ' (') }}
</li>
{% endfor %}
</ul>

</div>
</div>
<!-- clinvar submission table -->
Expand Down
78 changes: 53 additions & 25 deletions src/frontend_celery/webapp/utils/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,40 +23,68 @@ def preprocess_query(query, pattern = '.*'):
def get_search_query_separators():
return '[;,\n]'

def bed_ranges_to_heredivar_ranges(ranges):
ranges = re.split('[\n]', ranges)
result = []
for range_entry in ranges:
range_entry = range_entry.strip()
if '\t' in range_entry: # it is a bed style range
new_heredivar_range = convert_bed_line_to_heredivar_range(range_entry)
if new_heredivar_range is not None:
result.append(new_heredivar_range)
else: # it is already a heredivar style range
result.append(range_entry)
return ';'.join(result)


def convert_bed_line_to_heredivar_range(bed_line):
parts = bed_line.split('\t')
#def bed_ranges_to_heredivar_ranges(ranges):
# ranges = re.split('[\n]', ranges)
# result = []
# for range_entry in ranges:
# range_entry = range_entry.strip()
# if '\t' in range_entry: # it is a bed style range
# new_heredivar_range = convert_bed_line_to_heredivar_range(range_entry)
# if new_heredivar_range is not None:
# result.append(new_heredivar_range)
# else: # it is already a heredivar style range
# result.append(range_entry)
# return ';'.join(result)
#
#
#def convert_bed_line_to_heredivar_range(bed_line):
# parts = bed_line.split('\t')
# chrom = parts[0]
# chr_num = functions.validate_chr(chrom)
# if chr_num is None:
# return None
# chrom = 'chr' + str(chr_num)
# start = parts[1] # bed ranges are zero based at the start position
# end = int(parts[2]) - 1 # bed ranges are one based at the end position -> need to substract one because mysql has start and end zero based when using BETWEEN operator
# return chrom + ':' + str(start) + '-' + str(end)

def preprocess_ranges(ranges):
if ranges is None:
return None
seps = get_search_query_separators()
ranges_split = re.split(seps, ranges)
ranges_split = [proprocess_range_worker(r) for r in ranges_split]
ranges_split_filtered = [r for r in ranges_split if r is not None] # filter out erroneous
if len(ranges_split) != len(ranges_split_filtered):
flash("At least one of your range query(s) has an error. Please check the syntax. The erroneous range query(s) were removed. You still have " + str(len(ranges_split_filtered)) + " ranges after removing the erroneous ones.", 'alert-danger')
ranges_split = ';'.join(ranges_split_filtered)
return ranges_split

def proprocess_range_worker(r):
r = r.strip()
r = r.replace(':', '-').replace('\t', '-').replace(' ', '-')
parts = r.split('-')
chrom = parts[0]
chr_num = functions.validate_chr(chrom)
if chr_num is None:
return None
chrom = 'chr' + str(chr_num)
start = parts[1] # bed ranges are zero based at the start position
end = int(parts[2]) - 1 # bed ranges are one based at the end position -> need to substract one because mysql has start and end zero based when using BETWEEN operator
return chrom + ':' + str(start) + '-' + str(end)
parts[0] = 'chr' + str(chr_num)
r = '-'.join(parts)
return r


def extract_ranges(request_obj):
ranges = request_obj.args.get('ranges', '')
if '\t' in ranges:
ranges = bed_ranges_to_heredivar_ranges(ranges)
ranges = preprocess_query(ranges, pattern= r"chr.+:\d+-\d+")
if ranges is None:
flash("You have an error in your range query(s). Please check the syntax! Results are not filtered by ranges.", "alert-danger")
if ranges != '':
ranges = preprocess_ranges(ranges)
ranges = preprocess_query(ranges, pattern= r"(chr)?.+-\d+-\d+")
if ranges is None:
flash("You have an error in your range query(s). Please check the syntax! Results are not filtered by ranges.", "alert-danger")
else:
return []
return ranges


def extract_genes(request_obj):
genes = request_obj.args.get('genes', '')
genes = preprocess_query(genes)
Expand Down

0 comments on commit b3e58be

Please sign in to comment.