Skip to content

Commit

Permalink
update priors
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvinDo committed Aug 21, 2024
1 parent 0c2f499 commit 38c6be4
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
4 changes: 2 additions & 2 deletions data/script/download_hci_priors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ tabix -p vcf priors_hg19.vcf.gz


## crossmap to lift from GRCh37 to GRCh38
CrossMap.py vcf $data/genomes/hg19ToHg38.fixed.over.chain.gz priors_hg19.vcf.gz $grch38 priors.vcf
CrossMap vcf $data/genomes/hg19ToHg38.fixed.over.chain.gz priors_hg19.vcf.gz $grch38 priors.vcf
rm priors_hg19.vcf.gz
rm priors_hg19.vcf.gz.tbi


python3 $dbconverter -g MLH1 -e exon1 >> priors.vcf

##### STILL MISSING:
python3 $dbconverter -g MSH2 -e exon1 >> priors.vcf
python3 $dbconverter -g MSH2 -e exon1 > priors_msh2.vcf
python3 $dbconverter -g MSH6 -e exon1 >> priors.vcf

$ngsbits/VcfSort -in priors.vcf -out priors.vcf
Expand Down
10 changes: 6 additions & 4 deletions tools/priors_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,13 @@ def extract_ref_alt(variant_container_text):

def parse_html(url):
try:
resp = requests.get(url)
resp = requests.get(url, verify=False)
resp.raise_for_status()
html_text = resp.text
return lxml.html.fromstring(html_text)
except: #(HTTPError, RemoteDisconnected) as e
functions.eprint("Got an error for url: " + url + "... retrying")
functions.eprint(resp.text)
functions.eprint("Got a http " + str(resp.status_code) + " error for url: " + url + "... retrying")
return None


Expand All @@ -88,7 +90,7 @@ def retry_parse_html(url):



base_url = "http://priors.hci.utah.edu/PRIORS/BRCA/"
base_url = "https://priors.hci.utah.edu/PRIORS/BRCA/"
exon_url = urljoin(base_url, ("viewer.php?gene=%s&exon=%s" % (gene, first_exon)))


Expand Down Expand Up @@ -123,6 +125,7 @@ def retry_parse_html(url):
]
functions.write_vcf_header(info_headers, reference_genome="hg19")

functions.eprint("YOYO")

all_exon_urls = []

Expand All @@ -135,7 +138,6 @@ def retry_parse_html(url):
all_exon_urls.append(new_exon_url)



for exon_url in all_exon_urls:
functions.eprint(exon_url)
doc = retry_parse_html(exon_url)
Expand Down

0 comments on commit 38c6be4

Please sign in to comment.