From 38c6be481eeb1c14bf168686d78eae5654da0621 Mon Sep 17 00:00:00 2001 From: MarvinDo Date: Wed, 21 Aug 2024 09:45:27 +0200 Subject: [PATCH] update priors --- data/script/download_hci_priors.sh | 4 ++-- tools/priors_crawler.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/data/script/download_hci_priors.sh b/data/script/download_hci_priors.sh index 2a4fac0a..016e542b 100755 --- a/data/script/download_hci_priors.sh +++ b/data/script/download_hci_priors.sh @@ -86,7 +86,7 @@ tabix -p vcf priors_hg19.vcf.gz ## crossmap to lift from GRCh37 to GRCh38 -CrossMap.py vcf $data/genomes/hg19ToHg38.fixed.over.chain.gz priors_hg19.vcf.gz $grch38 priors.vcf +CrossMap vcf $data/genomes/hg19ToHg38.fixed.over.chain.gz priors_hg19.vcf.gz $grch38 priors.vcf rm priors_hg19.vcf.gz rm priors_hg19.vcf.gz.tbi @@ -94,7 +94,7 @@ rm priors_hg19.vcf.gz.tbi python3 $dbconverter -g MLH1 -e exon1 >> priors.vcf ##### STILL MISSING: -python3 $dbconverter -g MSH2 -e exon1 >> priors.vcf +python3 $dbconverter -g MSH2 -e exon1 > priors_msh2.vcf python3 $dbconverter -g MSH6 -e exon1 >> priors.vcf $ngsbits/VcfSort -in priors.vcf -out priors.vcf diff --git a/tools/priors_crawler.py b/tools/priors_crawler.py index ee338e97..28c40fd5 100755 --- a/tools/priors_crawler.py +++ b/tools/priors_crawler.py @@ -69,11 +69,13 @@ def extract_ref_alt(variant_container_text): def parse_html(url): try: - resp = requests.get(url) + resp = requests.get(url, verify=False) + resp.raise_for_status() html_text = resp.text return lxml.html.fromstring(html_text) except: #(HTTPError, RemoteDisconnected) as e - functions.eprint("Got an error for url: " + url + "... retrying") + functions.eprint(resp.text) + functions.eprint("Got a http " + str(resp.status_code) + " error for url: " + url + "... retrying") return None @@ -88,7 +90,7 @@ def retry_parse_html(url): -base_url = "http://priors.hci.utah.edu/PRIORS/BRCA/" +base_url = "https://priors.hci.utah.edu/PRIORS/BRCA/" exon_url = urljoin(base_url, ("viewer.php?gene=%s&exon=%s" % (gene, first_exon))) @@ -123,6 +125,7 @@ def retry_parse_html(url): ] functions.write_vcf_header(info_headers, reference_genome="hg19") +functions.eprint("YOYO") all_exon_urls = [] @@ -135,7 +138,6 @@ def retry_parse_html(url): all_exon_urls.append(new_exon_url) - for exon_url in all_exon_urls: functions.eprint(exon_url) doc = retry_parse_html(exon_url)