From a607213677e00277187dcdf6a8f8d1e5b0c7c427 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Thu, 22 Feb 2024 10:21:55 +0000 Subject: [PATCH 1/5] fix handling of dominant / recessive --- pgscatalog_utils/scorefile/qc.py | 6 +- pgscatalog_utils/scorefile/scorevariant.py | 10 +- tests/data/combine/PGS000802_hmPOS_GRCh37.txt | 39 + .../scorefile_dominant_and_recessive.txt | 838 ------------------ tests/test_combine.py | 15 +- 5 files changed, 57 insertions(+), 851 deletions(-) create mode 100644 tests/data/combine/PGS000802_hmPOS_GRCh37.txt delete mode 100644 tests/data/combine/scorefile_dominant_and_recessive.txt diff --git a/pgscatalog_utils/scorefile/qc.py b/pgscatalog_utils/scorefile/qc.py index 526fda2..f9d8b42 100644 --- a/pgscatalog_utils/scorefile/qc.py +++ b/pgscatalog_utils/scorefile/qc.py @@ -137,11 +137,11 @@ def assign_effect_type( ) -> typing.Generator[ScoreVariant, None, None]: for variant in variants: match (variant.is_recessive, variant.is_dominant): - case (None, None) | ("FALSE", "FALSE"): + case (None, None) | (False, False): pass # default value is additive, pass to break match and yield - case ("FALSE", "TRUE"): + case (False, True): variant.effect_type = EffectType.DOMINANT - case ("TRUE", "FALSE"): + case (True, False): variant.effect_type = EffectType.RECESSIVE case _: logger.critical(f"Bad effect type setting: {variant}") diff --git a/pgscatalog_utils/scorefile/scorevariant.py b/pgscatalog_utils/scorefile/scorevariant.py index 38135dc..1f0061f 100644 --- a/pgscatalog_utils/scorefile/scorevariant.py +++ b/pgscatalog_utils/scorefile/scorevariant.py @@ -106,8 +106,14 @@ def __init__( self.hm_inferOtherAllele: Optional[str] = hm_inferOtherAllele self.hm_source: Optional[str] = hm_source - self.is_dominant: Optional[bool] = is_dominant - self.is_recessive: Optional[bool] = is_recessive + + self.is_dominant: Optional[bool] = ( + is_dominant == "True" if is_dominant is not None else None + ) + self.is_recessive: Optional[bool] = ( + is_recessive == "True" if is_recessive is not None else None + ) + self.hm_rsID: Optional[str] = hm_rsID self.hm_match_chr: Optional[str] = hm_match_chr self.hm_match_pos: Optional[str] = hm_match_pos diff --git a/tests/data/combine/PGS000802_hmPOS_GRCh37.txt b/tests/data/combine/PGS000802_hmPOS_GRCh37.txt new file mode 100644 index 0000000..80b4551 --- /dev/null +++ b/tests/data/combine/PGS000802_hmPOS_GRCh37.txt @@ -0,0 +1,39 @@ +###PGS CATALOG SCORING FILE - see https://www.pgscatalog.org/downloads/#dl_ftp_scoring for additional information +#format_version=2.0 +##POLYGENIC SCORE (PGS) INFORMATION +#pgs_id=PGS000802 +#pgs_name=CRC_19 +#trait_reported=Colorectal cancer +#trait_mapped=colorectal cancer +#trait_efo=EFO_0005842 +#genome_build=NR +#variants_number=19 +#weight_type=NR +##SOURCE INFORMATION +#pgp_id=PGP000191 +#citation=He CY et al. Genomics (2021). doi:10.1016/j.ygeno.2021.01.025 +##HARMONIZATION DETAILS +#HmPOS_build=GRCh37 +#HmPOS_date=2022-07-28 +#HmPOS_match_chr={"True": null, "False": null} +#HmPOS_match_pos={"True": null, "False": null} +rsID chr_name chr_position effect_allele other_allele effect_weight allelefrequency_effect is_dominant is_recessive locus_name hm_source hm_rsID hm_chr hm_pos hm_inferOtherAllele +rs10936599 3 170974795 T C 0.123 0.377 True False MYNN ENSEMBL rs10936599 3 169492101 +rs6061231 20 60390312 A C 0.491 0.892 False True LOC100128184, RPS21 ENSEMBL rs6061231 20 60956917 +rs10774214 12 4238613 C T 0.122 0.637 True False LOC100129645, CCND2 ENSEMBL rs10774214 12 4368352 +rs10795668 10 8741225 A G 0.147 0.61 True False LOC338591, LOC389936 ENSEMBL rs10795668 10 8701219 +rs11903757 2 192295449 T C 0.185 0.925 False True LOC100132133, hCG_2045843 ENSEMBL rs11903757 2 192587204 +rs12603526 17 747343 T C 0.142 0.695 False True NXN ENSEMBL rs12603526 17 800593 +rs1321311 6 36730878 G T 0.135 0.876 True False SFRS3, LOC389386 ENSEMBL rs1321311 6 36622900 +rs2423279 20 7760350 T C 0.193 0.715 False True FUSIP1P2 ENSEMBL rs2423279 20 7812350 +rs3802842 11 110676919 A C 0.177 0.663 True False LOC120376 ENSEMBL rs3802842 11 111171709 +rs4813802 20 6647595 T G 0.189 0.816 False True LOC728383 ENSEMBL rs4813802 20 6699595 +rs6469656 8 117716969 G A 0.204 0.627 False True TRPS1, EIF3H ENSEMBL rs6469656 8 117647788 +rs647161 5 134526991 C A 0.277 0.663 False True LOC389328, H2AFY ENSEMBL rs647161 5 134499092 +rs704017 10 80489138 A G 0.123 0.644 True False LOC100132987, ZMIZ1 ENSEMBL rs704017 10 80819132 +rs7315438 12 114375786 C T 0.146 0.388 True False TBX3, LOC100129020 ENSEMBL rs7315438 12 115891403 +rs10411210 19 38224140 C T 0.232 0.836 False False RHPN2 ENSEMBL rs10411210 19 33532300 +rs12953717 18 44707927 T C 0.157 0.789 False False SMAD7 ENSEMBL rs12953717 18 46453929 +rs16969681 15 30780403 T C 0.175 0.583 False False C15orf45, GREM1 ENSEMBL rs16969681 15 32993111 +rs1801133 1 11778965 A G 0.16 0.592 False False MTHFR ENSEMBL rs1801133 1 11856378 +rs6983267 8 128482487 G T 0.217 0.376 False False FAM84B, POU5F1P1 ENSEMBL rs6983267 8 128413305 \ No newline at end of file diff --git a/tests/data/combine/scorefile_dominant_and_recessive.txt b/tests/data/combine/scorefile_dominant_and_recessive.txt deleted file mode 100644 index bbf23f0..0000000 --- a/tests/data/combine/scorefile_dominant_and_recessive.txt +++ /dev/null @@ -1,838 +0,0 @@ -#pgs_name=PGS001229_22_DominantRecessiveExample -#genome_build=GRCh37 -chr_name chr_position effect_allele other_allele effect_weight is_dominant is_recessive -22 17080378 G A 0.01045457 TRUE FALSE -22 17300230 A G 0.0001411475 FALSE TRUE -22 17318864 A C 0.008166266 FALSE FALSE -22 17327595 T C 0.007791641 FALSE FALSE -22 17409813 A G 0.0003108784 FALSE FALSE -22 17450952 G A -0.03033983 FALSE FALSE -22 17492533 G A 0.00388999 FALSE FALSE -22 17542810 C T 0.00803629 FALSE FALSE -22 17565013 G A 0.02135621 FALSE FALSE -22 17589209 T C 0.003026491 FALSE FALSE -22 17600977 A G 0.01581277 FALSE FALSE -22 17625915 A G -0.1172964 FALSE FALSE -22 17630486 A C 0.01012909 FALSE FALSE -22 17633785 C T 0.0023255 FALSE FALSE -22 17643689 A G 0.003361814 FALSE FALSE -22 17669306 C T 0.0214506 FALSE FALSE -22 17677699 T C -0.0007031384 FALSE FALSE -22 17680519 C A 0.001079236 FALSE FALSE -22 17701234 G A 0.004477145 FALSE FALSE -22 17703119 A T 0.0007771872 FALSE FALSE -22 17718699 C A -0.01320632 FALSE FALSE -22 17721595 C T 0.009480363 FALSE FALSE -22 17727648 T C 0.007811685 FALSE FALSE -22 17738177 G A -0.004719812 FALSE FALSE -22 17749096 A G -0.005244795 FALSE FALSE -22 17770181 G T -0.03101703 FALSE FALSE -22 17793969 G A 0.01774444 FALSE FALSE -22 17815696 G C -0.00551609 FALSE FALSE -22 17827684 G A -0.005944752 FALSE FALSE -22 17831813 T C 0.01061587 FALSE FALSE -22 17844929 T G 0.001717643 FALSE FALSE -22 17850661 T C -0.02805489 FALSE FALSE -22 17887534 A G 0.0007723542 FALSE FALSE -22 17887725 A G 0.007472703 FALSE FALSE -22 17958221 C A -0.02098647 FALSE FALSE -22 18036253 G A -0.01772981 FALSE FALSE -22 18038786 A G -0.002119071 FALSE FALSE -22 18262301 A T -0.005065485 FALSE FALSE -22 18289204 A G 0.005306345 FALSE FALSE -22 18295575 C T 0.02300129 FALSE FALSE -22 18296238 G A -0.005665446 FALSE FALSE -22 18319179 T C 0.03440642 FALSE FALSE -22 18393534 A C 0.01764269 FALSE FALSE -22 18439958 T C -0.002261707 FALSE FALSE -22 18483388 G A 0.03318724 FALSE FALSE -22 18488883 C G -0.0191918 FALSE FALSE -22 18489048 C A 0.01233198 FALSE FALSE -22 18495470 A G -0.005804926 FALSE FALSE -22 18537145 G A -0.004930116 FALSE FALSE -22 18571008 A G -8.844726E-05 FALSE FALSE -22 18584433 C T -0.001169893 FALSE FALSE -22 18631365 T C -0.001551714 FALSE FALSE -22 18650682 T C -0.01313784 FALSE FALSE -22 18890037 A G 0.05968921 FALSE FALSE -22 18891398 G A 0.006891943 FALSE FALSE -22 18892575 A G -0.00224447 FALSE FALSE -22 18915963 A G 0.003719756 FALSE FALSE -22 18959581 T C 0.006464581 FALSE FALSE -22 18963340 A G -0.01397565 FALSE FALSE -22 18970915 T C -0.001507131 FALSE FALSE -22 19024651 T C -0.00350575 FALSE FALSE -22 19121872 A G 0.01644046 FALSE FALSE -22 19135603 A G -0.02970077 FALSE FALSE -22 19190143 T C 0.003268027 FALSE FALSE -22 19263698 T C 0.02057255 FALSE FALSE -22 19292446 G T 0.01153989 FALSE FALSE -22 19371052 T C 0.01055134 FALSE FALSE -22 19420109 C T -0.008628228 FALSE FALSE -22 19451186 A C 0.02141029 FALSE FALSE -22 19518079 C T 0.005372247 FALSE FALSE -22 19581331 T C 0.01686942 FALSE FALSE -22 19593854 C A 0.0006544249 FALSE FALSE -22 19606703 G A 0.02070121 FALSE FALSE -22 19649005 A G 0.002868601 FALSE FALSE -22 19735854 C T 0.006262962 FALSE FALSE -22 19738355 T C 4.97384E-05 FALSE FALSE -22 19770886 A G -0.01013929 FALSE FALSE -22 19781823 T C 0.02481609 FALSE FALSE -22 19873357 T C 0.0116302 FALSE FALSE -22 19907099 A C -0.0267645 FALSE FALSE -22 19968597 T C -0.02203945 FALSE FALSE -22 20046344 G A -0.009801428 FALSE FALSE -22 20084821 C T -0.02232886 FALSE FALSE -22 20185457 A G 0.006892171 FALSE FALSE -22 20189077 T C 0.01738215 FALSE FALSE -22 20219648 A G 0.009307625 FALSE FALSE -22 20248391 A G -0.005405845 FALSE FALSE -22 20267213 A G 0.006713242 FALSE FALSE -22 20286099 G T 0.01574758 FALSE FALSE -22 20749042 G A 0.006603339 FALSE FALSE -22 20754039 A G -0.01181141 FALSE FALSE -22 20775167 T C 0.01160113 FALSE FALSE -22 20780296 A G 0.06735311 FALSE FALSE -22 20789074 C T 0.02844307 FALSE FALSE -22 20791438 A C 0.0473474 FALSE FALSE -22 20793914 C T 0.007009781 FALSE FALSE -22 20839810 T G 0.003947346 FALSE FALSE -22 20860931 T C 0.0005613511 FALSE FALSE -22 20979980 G A 0.003231665 FALSE FALSE -22 20991771 G A 0.004226765 FALSE FALSE -22 21075537 C A -0.002096453 FALSE FALSE -22 21154393 G T -0.004297086 FALSE FALSE -22 21323357 C T -0.006041745 FALSE FALSE -22 21331918 G C -0.002280912 FALSE FALSE -22 21334924 C G -0.02031369 FALSE FALSE -22 21356824 A G 0.01476577 FALSE FALSE -22 21386019 A G 0.01435557 FALSE FALSE -22 21449028 G A -0.01537701 FALSE FALSE -22 21463515 A G -0.01335614 FALSE FALSE -22 21982892 T C -0.06373335 FALSE FALSE -22 22001704 T G 0.02809584 FALSE FALSE -22 22062480 T C 0.0529113 FALSE FALSE -22 22080735 G A -0.0139426 FALSE FALSE -22 22151939 C A -0.008287849 FALSE FALSE -22 22163425 G A 0.05518983 FALSE FALSE -22 22307519 C G -0.003486191 FALSE FALSE -22 22351283 G A -0.0007483763 FALSE FALSE -22 22394291 AG A 0.004320583 FALSE FALSE -22 22395754 T C 0.002587971 FALSE FALSE -22 22424302 A C 0.0011408 FALSE FALSE -22 22473905 C A 0.01226009 FALSE FALSE -22 22550450 G C 0.01773244 FALSE FALSE -22 22561610 C T -0.006207024 FALSE FALSE -22 22581369 G A -0.006272413 FALSE FALSE -22 22584678 A G -0.00217647 FALSE FALSE -22 22711786 T C 0.007779875 FALSE FALSE -22 22726372 T C 0.00349632 FALSE FALSE -22 22762771 C T 0.01252501 FALSE FALSE -22 22769923 G A -0.01103632 FALSE FALSE -22 22869742 A C -0.002412657 FALSE FALSE -22 22871922 A G -0.002769974 FALSE FALSE -22 22929268 T C -0.007035723 FALSE FALSE -22 23001481 A G 0.007524178 FALSE FALSE -22 23022520 T C 0.002175257 FALSE FALSE -22 23064982 A C -0.01255076 FALSE FALSE -22 23249440 A C 0.02085816 FALSE FALSE -22 23268677 A G 0.01337349 FALSE FALSE -22 23279456 C G -0.01371401 FALSE FALSE -22 23282286 C T 0.004994329 FALSE FALSE -22 23325722 C T 0.0008506657 FALSE FALSE -22 23412058 A G -0.009545553 FALSE FALSE -22 23627369 G A -0.01900175 FALSE FALSE -22 23644425 G A -0.0009106953 FALSE FALSE -22 23649242 G T 0.001061643 FALSE FALSE -22 23794844 G A -0.01198736 FALSE FALSE -22 23804670 G T -0.001119846 FALSE FALSE -22 23819697 T G -0.01028722 FALSE FALSE -22 23873076 T C 0.009509027 FALSE FALSE -22 23892145 T C 0.0135128 FALSE FALSE -22 23925779 C T -0.004127647 FALSE FALSE -22 23960187 T C -0.008475905 FALSE FALSE -22 24035970 T C -0.001334318 FALSE FALSE -22 24086107 G A -0.01652957 FALSE FALSE -22 24105789 A G 0.01813091 FALSE FALSE -22 24106448 A G 0.001834095 FALSE FALSE -22 24186809 C T -0.01426541 FALSE FALSE -22 24235360 G A 0.0003168635 FALSE FALSE -22 24255296 T C 0.01624252 FALSE FALSE -22 24300540 T C -0.00322576 FALSE FALSE -22 24376584 A G -0.006223068 FALSE FALSE -22 24406778 A C 0.00304654 FALSE FALSE -22 24618331 G A -0.0006506681 FALSE FALSE -22 24802564 A G -0.006695797 FALSE FALSE -22 24912232 T C -0.01536303 FALSE FALSE -22 24943582 A G -0.001687764 FALSE FALSE -22 24995668 G A -0.03537331 FALSE FALSE -22 25123505 C T -0.0160099 FALSE FALSE -22 25145094 T C -0.005584047 FALSE FALSE -22 25145453 T C -0.001388536 FALSE FALSE -22 25185823 A G -0.009228375 FALSE FALSE -22 25265972 A G 0.01088906 FALSE FALSE -22 25309448 A G -0.002238693 FALSE FALSE -22 25363411 A G 0.004035775 FALSE FALSE -22 25410895 G A 0.0009720734 FALSE FALSE -22 25442369 C T 0.01660527 FALSE FALSE -22 25454658 C A 0.01200285 FALSE FALSE -22 25465065 C T 0.01320801 FALSE FALSE -22 25524916 C T 0.01147501 FALSE FALSE -22 25603008 T C -0.01262741 FALSE FALSE -22 25619025 G T -0.01212511 FALSE FALSE -22 25621591 T C 0.01051851 FALSE FALSE -22 25643483 T G 0.01373474 FALSE FALSE -22 25661725 A G -0.005936431 FALSE FALSE -22 25667883 G A 0.01547775 FALSE FALSE -22 25668730 A C 0.02616493 FALSE FALSE -22 25678577 T C 0.0304018 FALSE FALSE -22 25761309 T C -0.001760112 FALSE FALSE -22 25761936 T C -0.005171998 FALSE FALSE -22 25938977 T C 0.01966116 FALSE FALSE -22 25994013 A G 0.0006268228 FALSE FALSE -22 26081873 T C 0.05232603 FALSE FALSE -22 26132612 A G -0.006457239 FALSE FALSE -22 26133775 T C -0.001181527 FALSE FALSE -22 26159289 A G -0.008399401 FALSE FALSE -22 26181767 C T 0.01044769 FALSE FALSE -22 26190915 G A 0.004287533 FALSE FALSE -22 26218164 G A -0.002803502 FALSE FALSE -22 26231312 C G 0.006105629 FALSE FALSE -22 26237826 C T 0.004981479 FALSE FALSE -22 26239850 A C 0.004144037 FALSE FALSE -22 26273893 C G 0.005616213 FALSE FALSE -22 26278128 G T -0.003965338 FALSE FALSE -22 26280462 T C -0.0008324497 FALSE FALSE -22 26290588 T C -0.0130732 FALSE FALSE -22 26292659 G A 4.294309E-05 FALSE FALSE -22 26343593 G A 0.007813758 FALSE FALSE -22 26369358 T C -0.00483665 FALSE FALSE -22 26390964 A G -0.007849451 FALSE FALSE -22 26415475 T C -0.001219281 FALSE FALSE -22 26456367 G A -0.01285326 FALSE FALSE -22 26460519 T C -0.008695338 FALSE FALSE -22 26528054 A G 0.01973023 FALSE FALSE -22 26617260 T A -0.01384025 FALSE FALSE -22 26638906 G T 0.01229772 FALSE FALSE -22 26735648 A G 0.0007879673 FALSE FALSE -22 26782251 G A 0.0005096459 FALSE FALSE -22 26812632 C T -0.01850814 FALSE FALSE -22 26939781 C T -0.0009222796 FALSE FALSE -22 26960648 A C -0.005679255 FALSE FALSE -22 27038865 T G -0.0001487706 FALSE FALSE -22 27042828 A G 0.02957737 FALSE FALSE -22 27161060 A G 0.002844558 FALSE FALSE -22 27191643 T C 0.008953731 FALSE FALSE -22 27216426 G A 0.00912099 FALSE FALSE -22 27217018 A G 0.01510616 FALSE FALSE -22 27240025 T G -0.0297174 FALSE FALSE -22 27242642 G A -0.009822927 FALSE FALSE -22 27246070 C T -0.001554199 FALSE FALSE -22 27252454 C T -0.006560251 FALSE FALSE -22 27264880 G T -0.01323094 FALSE FALSE -22 27337886 A G -0.009600014 FALSE FALSE -22 27339284 T C -0.009944488 FALSE FALSE -22 27353810 T C -0.002171555 FALSE FALSE -22 27370273 T C -0.009798478 FALSE FALSE -22 27378884 A G 0.05145072 FALSE FALSE -22 27398749 C T 0.001012263 FALSE FALSE -22 27403571 C T -0.01745865 FALSE FALSE -22 27405012 T C -0.005425419 FALSE FALSE -22 27415255 C T -0.01499362 FALSE FALSE -22 27426628 G C 0.0228946 FALSE FALSE -22 27430724 A G -0.007068064 FALSE FALSE -22 27435577 C T -0.008632412 FALSE FALSE -22 27487580 G A 0.003691502 FALSE FALSE -22 27498426 A G -0.006801544 FALSE FALSE -22 27526095 G A -0.0008086267 FALSE FALSE -22 27563274 C A 0.0136965 FALSE FALSE -22 27584680 A G -0.002139188 FALSE FALSE -22 27628151 C G 0.02130389 FALSE FALSE -22 27652290 T G 0.004815735 FALSE FALSE -22 27660675 A G 0.004899654 FALSE FALSE -22 27674832 G T 0.0001248065 FALSE FALSE -22 27718775 A G 0.02292384 FALSE FALSE -22 27729742 G A 0.004951261 FALSE FALSE -22 27762155 C T 0.00485666 FALSE FALSE -22 27781736 A C -0.008336242 FALSE FALSE -22 27829565 G A 0.00285409 FALSE FALSE -22 27832985 G C -0.01668955 FALSE FALSE -22 27836311 G A -0.00775625 FALSE FALSE -22 27839704 T C -0.02492106 FALSE FALSE -22 27864471 A C 0.00218995 FALSE FALSE -22 27873024 G A 0.002721729 FALSE FALSE -22 27883265 G A 0.02961735 FALSE FALSE -22 27890684 A G -0.008057355 FALSE FALSE -22 27927298 T C 0.02054268 FALSE FALSE -22 27934290 G A 0.004751755 FALSE FALSE -22 27951176 A G -0.0004329547 FALSE FALSE -22 27974819 C A 0.01439093 FALSE FALSE -22 27975451 G A -0.03648208 FALSE FALSE -22 28007741 C T -0.01635917 FALSE FALSE -22 28016883 C A 0.008564085 FALSE FALSE -22 28046561 T C 0.01535905 FALSE FALSE -22 28060034 A G 0.03097228 FALSE FALSE -22 28076058 C T 0.02848654 FALSE FALSE -22 28094845 G A -0.02659077 FALSE FALSE -22 28130130 C T -0.01640387 FALSE FALSE -22 28136977 A C -0.003962775 FALSE FALSE -22 28150109 G A 0.0006071392 FALSE FALSE -22 28150815 A G 0.01604724 FALSE FALSE -22 28151825 A G -0.005390282 FALSE FALSE -22 28155404 T C 0.005030388 FALSE FALSE -22 28172577 G T 0.005704168 FALSE FALSE -22 28185452 G T -0.006896853 FALSE FALSE -22 28200176 G A -0.006474674 FALSE FALSE -22 28206912 C A -0.006175542 FALSE FALSE -22 28270372 G T -0.0006768204 FALSE FALSE -22 28412908 G T 0.01763639 FALSE FALSE -22 28501414 T C -0.2304747 FALSE FALSE -22 29106733 C T -0.01074749 FALSE FALSE -22 29318724 T C 0.001743333 FALSE FALSE -22 29378610 C T 0.0006690876 FALSE FALSE -22 29478760 C T -0.03029428 FALSE FALSE -22 29533572 G C -0.01269604 FALSE FALSE -22 29626515 A G -0.0117113 FALSE FALSE -22 29630337 A G 0.02658049 FALSE FALSE -22 29669648 C G -0.008550535 FALSE FALSE -22 29692497 T G 0.001234896 FALSE FALSE -22 29837537 C T 0.01321112 FALSE FALSE -22 29961986 T G 0.001878853 FALSE FALSE -22 30151687 C T 0.003418302 FALSE FALSE -22 30163526 G A 0.01576261 FALSE FALSE -22 30494371 A G 0.007959801 FALSE FALSE -22 30592487 G C -0.1047403 FALSE FALSE -22 30621613 A C -0.01382104 FALSE FALSE -22 30658082 C T -0.03794014 FALSE FALSE -22 30688659 T C 0.0225714 FALSE FALSE -22 30762140 A G 0.02079806 FALSE FALSE -22 30793137 A G -0.004609306 FALSE FALSE -22 30901592 C T -0.00833404 FALSE FALSE -22 30927975 T C 0.003226189 FALSE FALSE -22 30953295 T C -0.00768579 FALSE FALSE -22 30992651 G A -0.025658 FALSE FALSE -22 31018975 C T 0.04241226 FALSE FALSE -22 31032920 G A -0.02311985 FALSE FALSE -22 31063804 G GT -0.0002081808 FALSE FALSE -22 31114086 G T 0.02825476 FALSE FALSE -22 31139653 A G 2.640129E-06 FALSE FALSE -22 31214382 G A 0.01137657 FALSE FALSE -22 31216506 C T 0.005531311 FALSE FALSE -22 31272930 T C -0.001056118 FALSE FALSE -22 31333631 C T -0.01235089 FALSE FALSE -22 31378447 A G 0.01020507 FALSE FALSE -22 31442308 A G -0.002479126 FALSE FALSE -22 31477361 C G -0.01263667 FALSE FALSE -22 31514348 G A 0.00580324 FALSE FALSE -22 31521404 A G 0.01097391 FALSE FALSE -22 31659495 C T 0.02663412 FALSE FALSE -22 31884405 C T -0.0003950834 FALSE FALSE -22 32200849 T C 0.01585735 FALSE FALSE -22 32341684 T C -0.02960328 FALSE FALSE -22 32559835 G A -0.02170436 FALSE FALSE -22 32569263 C T -0.001296006 FALSE FALSE -22 32624139 C T 0.005619574 FALSE FALSE -22 32702816 A G -0.01534023 FALSE FALSE -22 32756652 G A 0.02512177 FALSE FALSE -22 32831540 T C 0.001868495 FALSE FALSE -22 32832874 T C 6.028815E-05 FALSE FALSE -22 32853660 G A 0.0138221 FALSE FALSE -22 32854391 C A 0.0001960825 FALSE FALSE -22 32875190 A G -0.006426637 FALSE FALSE -22 32934713 C CT -0.009057754 FALSE FALSE -22 32952012 A C -0.00380248 FALSE FALSE -22 32954443 G A 0.002210369 FALSE FALSE -22 32993032 C T -0.002429979 FALSE FALSE -22 32997766 T C -0.008424246 FALSE FALSE -22 33045573 T C -0.03107145 FALSE FALSE -22 33046110 G C -0.06954732 FALSE FALSE -22 33048039 T C 0.01138346 FALSE FALSE -22 33056341 C T -0.06477198 FALSE FALSE -22 33108536 T C -0.03426392 FALSE FALSE -22 33108981 T C -0.07404035 FALSE FALSE -22 33116435 T C 0.06542471 FALSE FALSE -22 33143528 G A 0.02195059 FALSE FALSE -22 33146363 A G 0.000810539 FALSE FALSE -22 33259625 C T 0.02309793 FALSE FALSE -22 33336039 T G -0.02554387 FALSE FALSE -22 33408519 T C -0.0075563 FALSE FALSE -22 33660345 C G 0.002190743 FALSE FALSE -22 33804893 C T 0.006680774 FALSE FALSE -22 33844303 C T 0.008923314 FALSE FALSE -22 33846914 T C 0.006295378 FALSE FALSE -22 33898906 A C 1.958759E-05 FALSE FALSE -22 34022284 A G -0.00257933 FALSE FALSE -22 34137784 G A 0.004460828 FALSE FALSE -22 34208570 T C -0.003365869 FALSE FALSE -22 34217757 T C 0.009289431 FALSE FALSE -22 34256923 A C 0.01439384 FALSE FALSE -22 34265402 G A -0.0163661 FALSE FALSE -22 34284173 G A -0.02315559 FALSE FALSE -22 34296093 C A -0.004688326 FALSE FALSE -22 34378012 A G 0.002276664 FALSE FALSE -22 34436795 C T 0.0001337033 FALSE FALSE -22 34488452 A G -0.000428831 FALSE FALSE -22 34501541 A G 0.002763614 FALSE FALSE -22 34514810 C A 0.003976601 FALSE FALSE -22 34526428 C T 0.01088864 FALSE FALSE -22 34583078 A G 0.001802495 FALSE FALSE -22 34620754 T C 0.01466546 FALSE FALSE -22 34691035 A G -0.0002082615 FALSE FALSE -22 34758540 T C 0.005165532 FALSE FALSE -22 34851377 A C 0.0137118 FALSE FALSE -22 35371707 T C -0.0004985554 FALSE FALSE -22 35382268 A C -0.004931336 FALSE FALSE -22 35419122 C T -0.01077953 FALSE FALSE -22 35478529 A G 0.0001760523 FALSE FALSE -22 35481493 T C 0.01056439 FALSE FALSE -22 35526281 G A -0.002766891 FALSE FALSE -22 35603836 A G -0.0001783939 FALSE FALSE -22 35660875 T G 0.03988231 FALSE FALSE -22 35745196 G T 0.0001750545 FALSE FALSE -22 35750980 A G -0.007651136 FALSE FALSE -22 35783413 G A 0.001649791 FALSE FALSE -22 35918270 C T 0.006918713 FALSE FALSE -22 35959242 A G 0.01697538 FALSE FALSE -22 35962060 G A 0.005181476 FALSE FALSE -22 35964158 G C 0.002769931 FALSE FALSE -22 35984385 A G -0.01280623 FALSE FALSE -22 36001258 C T 0.01342405 FALSE FALSE -22 36072262 T C 0.00489549 FALSE FALSE -22 36180535 G A -0.03250252 FALSE FALSE -22 36517307 C T 0.01366076 FALSE FALSE -22 36519596 A C -0.00349956 FALSE FALSE -22 36532058 A G -0.01214487 FALSE FALSE -22 36543489 C G 0.007838149 FALSE FALSE -22 36600841 G A 0.02644389 FALSE FALSE -22 36629633 C A -0.006871468 FALSE FALSE -22 36635967 G A -0.02634742 FALSE FALSE -22 36655735 A G -0.005385142 FALSE FALSE -22 36661646 A G -0.01560741 FALSE FALSE -22 36684354 C T -0.005170111 FALSE FALSE -22 36705622 A G 0.01713234 FALSE FALSE -22 36708049 C CTCCTGTGA -0.05187051 FALSE FALSE -22 36751101 A C -0.0244065 FALSE FALSE -22 36764788 G A 0.02784116 FALSE FALSE -22 36897427 C T 0.02603792 FALSE FALSE -22 36900806 G A 0.007366207 FALSE FALSE -22 36923144 T C -0.001875563 FALSE FALSE -22 36924714 G A -0.003632594 FALSE FALSE -22 36946643 T G 0.01333137 FALSE FALSE -22 36954939 T C 0.01105894 FALSE FALSE -22 36998907 T C -0.0006084687 FALSE FALSE -22 37001495 G T -0.01224147 FALSE FALSE -22 37013167 G C 0.01866849 FALSE FALSE -22 37077364 C T 0.007294257 FALSE FALSE -22 37080738 C G -0.004873355 FALSE FALSE -22 37101890 C T 0.03991764 FALSE FALSE -22 37118535 A G -0.001713909 FALSE FALSE -22 37184521 G A 0.006515894 FALSE FALSE -22 37206341 G T 0.0002566936 FALSE FALSE -22 37256262 A G 0.001152626 FALSE FALSE -22 37258503 C T -0.009761102 FALSE FALSE -22 37323988 T C -0.0073182 FALSE FALSE -22 37329545 G A 0.005775806 FALSE FALSE -22 37337409 T C -0.02534399 FALSE FALSE -22 37343000 A C -0.0004011777 FALSE FALSE -22 37398195 T C -0.01001198 FALSE FALSE -22 37401532 A G -0.003244795 FALSE FALSE -22 37407109 C G 0.04335972 FALSE FALSE -22 37477732 T C 0.0003669548 FALSE FALSE -22 37507019 A G -0.0009259451 FALSE FALSE -22 37513316 A G 0.001153887 FALSE FALSE -22 37532441 A G 0.01802306 FALSE FALSE -22 37571497 G A -0.005785311 FALSE FALSE -22 37581383 T C 0.03172492 FALSE FALSE -22 37621269 C A 0.004460405 FALSE FALSE -22 37644621 T C -0.008386907 FALSE FALSE -22 37671896 A G 0.02303688 FALSE FALSE -22 37679763 G A -0.002658396 FALSE FALSE -22 37720268 G A 0.02120184 FALSE FALSE -22 37753256 C T 0.008984539 FALSE FALSE -22 37757099 G A -0.01560347 FALSE FALSE -22 37780522 C G -0.01496708 FALSE FALSE -22 37800175 T C -0.005510833 FALSE FALSE -22 37846448 G A 0.01152963 FALSE FALSE -22 37896749 C T 0.005447068 FALSE FALSE -22 37908435 C T 0.001909131 FALSE FALSE -22 37977481 T C 0.01465308 FALSE FALSE -22 37992699 G A 0.0008339179 FALSE FALSE -22 38032762 G GA 0.01693041 FALSE FALSE -22 38054262 C A 0.04354146 FALSE FALSE -22 38083101 C T -0.02092117 FALSE FALSE -22 38119213 A G 0.03948165 FALSE FALSE -22 38122122 C T 0.04377277 FALSE FALSE -22 38204089 T C 0.02977743 FALSE FALSE -22 38435786 T G -0.007684278 FALSE FALSE -22 38544298 G A 0.05090446 FALSE FALSE -22 38597378 T G -0.01997927 FALSE FALSE -22 38606780 G A -0.009182016 FALSE FALSE -22 38630272 C T 0.007393137 FALSE FALSE -22 38663819 G A -0.006392021 FALSE FALSE -22 38673234 A G -0.01106705 FALSE FALSE -22 38685131 C T -0.004493352 FALSE FALSE -22 38695406 T C -0.01155972 FALSE FALSE -22 38708506 A G 0.01701713 FALSE FALSE -22 38744184 C T -0.02112956 FALSE FALSE -22 38819613 A G -0.005625806 FALSE FALSE -22 38877461 G T 0.001108728 FALSE FALSE -22 38918894 G T -0.008094286 FALSE FALSE -22 38928269 G T -0.02114917 FALSE FALSE -22 39027286 C CAG 0.003840735 FALSE FALSE -22 39067524 G A 0.01200232 FALSE FALSE -22 39159201 C T 0.003096214 FALSE FALSE -22 39178701 G A 0.002148449 FALSE FALSE -22 39260032 T C 0.03574634 FALSE FALSE -22 39268785 T G 0.009377414 FALSE FALSE -22 39281774 G T 0.03816951 FALSE FALSE -22 39300265 C T 0.03540156 FALSE FALSE -22 39332623 T C -0.004449842 FALSE FALSE -22 39415780 G A 0.01479946 FALSE FALSE -22 39448465 A G 0.003065974 FALSE FALSE -22 39480697 G A -0.04005617 FALSE FALSE -22 39487665 G A -0.0001218988 FALSE FALSE -22 39493294 C T -0.03115929 FALSE FALSE -22 39510995 G A -0.02069106 FALSE FALSE -22 39542292 A G 0.009653575 FALSE FALSE -22 39543000 T C -0.004069841 FALSE FALSE -22 39573724 A C 0.02683694 FALSE FALSE -22 39575692 A C 0.01451305 FALSE FALSE -22 39581277 A C 0.01766406 FALSE FALSE -22 39626572 A G -0.02901981 FALSE FALSE -22 39658626 C T 0.004177065 FALSE FALSE -22 39665395 G A 0.01264611 FALSE FALSE -22 39687484 G A 0.005418141 FALSE FALSE -22 39708279 A G -0.04281532 FALSE FALSE -22 39708357 T C 0.008605574 FALSE FALSE -22 39793066 G T 0.03658209 FALSE FALSE -22 39798127 G A 0.002302129 FALSE FALSE -22 39843409 T C 0.01065699 FALSE FALSE -22 39865475 G A 0.001588501 FALSE FALSE -22 39932516 A G -0.01179841 FALSE FALSE -22 39963426 G A -0.01503908 FALSE FALSE -22 40023636 C T 0.006443146 FALSE FALSE -22 40046176 C T -0.0007416552 FALSE FALSE -22 40067818 T C 0.00455936 FALSE FALSE -22 40092864 G A 0.02400297 FALSE FALSE -22 40127293 T C -0.0008870038 FALSE FALSE -22 40358148 T C -0.01079902 FALSE FALSE -22 40420786 G C -0.008092115 FALSE FALSE -22 40454069 G T 0.00789888 FALSE FALSE -22 40541981 G A 0.0174264 FALSE FALSE -22 40652873 G A 0.005853057 FALSE FALSE -22 40676672 G T -0.001894274 FALSE FALSE -22 40729614 G A 0.0195994 FALSE FALSE -22 40820151 C T -0.01628066 FALSE FALSE -22 40986372 G C -0.01983507 FALSE FALSE -22 41494925 A G -0.02918069 FALSE FALSE -22 41646738 G A 0.0003521847 FALSE FALSE -22 41680898 T C 0.01402732 FALSE FALSE -22 41704872 T C 6.681484E-05 FALSE FALSE -22 41791536 C T -5.572333E-05 FALSE FALSE -22 41895409 A G -0.04407217 FALSE FALSE -22 41929175 G T -0.03186844 FALSE FALSE -22 42089623 C T 0.00532234 FALSE FALSE -22 42095658 G T 0.03846131 FALSE FALSE -22 42210985 C T -0.00313971 FALSE FALSE -22 42279653 G A -0.006596336 FALSE FALSE -22 42341308 G A -0.0006862491 FALSE FALSE -22 42524243 C CT -0.01181191 FALSE FALSE -22 42672124 G A -0.005278171 FALSE FALSE -22 42691238 T C -0.01642396 FALSE FALSE -22 42813753 C T -0.00386775 FALSE FALSE -22 42867898 G A -0.001352327 FALSE FALSE -22 42912097 T C -0.0007295657 FALSE FALSE -22 42932317 A G -0.05768556 FALSE FALSE -22 43010817 A G 0.01722077 FALSE FALSE -22 43080028 T C -0.0005527551 FALSE FALSE -22 43096507 T C -0.005556102 FALSE FALSE -22 43112475 T C -0.01350273 FALSE FALSE -22 43114824 G A -0.01963192 FALSE FALSE -22 43115576 C T -0.01880097 FALSE FALSE -22 43154299 G A -0.001621113 FALSE FALSE -22 43159948 T C -0.007980584 FALSE FALSE -22 43206950 C A -0.005783037 FALSE FALSE -22 43218397 C T -0.003976636 FALSE FALSE -22 43283255 C A -0.01426668 FALSE FALSE -22 43290583 C T -0.03955775 FALSE FALSE -22 43333156 A G -0.03127845 FALSE FALSE -22 43426262 G A -0.00366804 FALSE FALSE -22 43483242 T C -0.02540203 FALSE FALSE -22 43515108 C T -0.01570749 FALSE FALSE -22 43529314 C G 0.01738127 FALSE FALSE -22 43551513 G A 0.02565386 FALSE FALSE -22 43558972 A G -0.01962819 FALSE FALSE -22 43577214 T C -0.02270478 FALSE FALSE -22 43579049 C T -0.001193909 FALSE FALSE -22 43610207 G A -0.007621661 FALSE FALSE -22 43623395 G C -0.04852519 FALSE FALSE -22 43640512 C T -0.005533207 FALSE FALSE -22 43649701 C T 0.07724845 FALSE FALSE -22 43661080 T C -0.04251741 FALSE FALSE -22 43683088 A G -0.003582388 FALSE FALSE -22 43707996 A G -0.02547044 FALSE FALSE -22 43711080 C G -0.005784446 FALSE FALSE -22 43721519 C A 0.000365885 FALSE FALSE -22 43729401 C T 0.008557013 FALSE FALSE -22 43763757 T G -0.0178981 FALSE FALSE -22 43836198 G T 0.002427697 FALSE FALSE -22 43976396 A G -0.01277457 FALSE FALSE -22 44031042 C T 0.003593107 FALSE FALSE -22 44193626 C A -0.006865434 FALSE FALSE -22 44221247 G A 0.01833991 FALSE FALSE -22 44296372 T C 0.006169212 FALSE FALSE -22 44298838 A G 0.007441756 FALSE FALSE -22 44342116 G A 0.02810328 FALSE FALSE -22 44368122 G A 0.0129968 FALSE FALSE -22 44379838 G A 0.001648422 FALSE FALSE -22 44380033 C T -0.002136788 FALSE FALSE -22 44395451 C T -0.006698507 FALSE FALSE -22 44419871 C T 0.0181613 FALSE FALSE -22 44424108 T C 0.01036733 FALSE FALSE -22 44467899 C T -0.002592364 FALSE FALSE -22 44498134 T C 0.007281423 FALSE FALSE -22 44522312 C T -0.0002636447 FALSE FALSE -22 44526130 G A -0.00388298 FALSE FALSE -22 44530286 A G 0.02528159 FALSE FALSE -22 44530420 C T -0.01233654 FALSE FALSE -22 44548944 G A -0.003947209 FALSE FALSE -22 44551755 G A 0.01262458 FALSE FALSE -22 44566434 A G -0.004290306 FALSE FALSE -22 44581046 T C -0.0147995 FALSE FALSE -22 44643161 C T 0.01439493 FALSE FALSE -22 44677081 C T -0.01030513 FALSE FALSE -22 44681612 G A -0.001269762 FALSE FALSE -22 44695088 T C 0.006324859 FALSE FALSE -22 44707716 G T 0.002288939 FALSE FALSE -22 44725343 G A 0.003534678 FALSE FALSE -22 44738406 G A 0.02320049 FALSE FALSE -22 44746729 A G -0.01754216 FALSE FALSE -22 44751158 G A -0.006539695 FALSE FALSE -22 44757439 A G 0.02480295 FALSE FALSE -22 44759519 G A 0.002111274 FALSE FALSE -22 44761797 A T -0.00531172 FALSE FALSE -22 44763352 C G 0.01452737 FALSE FALSE -22 44783779 G A 0.009142699 FALSE FALSE -22 44791807 C T -0.02371876 FALSE FALSE -22 44818986 C T -0.006740622 FALSE FALSE -22 44894913 G A -5.179871E-05 FALSE FALSE -22 45058431 C T 0.01098259 FALSE FALSE -22 45066035 A G -0.01484374 FALSE FALSE -22 45069410 T C 0.01530441 FALSE FALSE -22 45081330 G A 0.00135012 FALSE FALSE -22 45082168 C A 0.003663354 FALSE FALSE -22 45090008 G A 0.002811861 FALSE FALSE -22 45116664 C T 0.01247728 FALSE FALSE -22 45244930 T C -0.01450041 FALSE FALSE -22 45258457 G A -0.003500519 FALSE FALSE -22 45323989 T C 0.001111338 FALSE FALSE -22 45415987 A G -0.01398184 FALSE FALSE -22 45451355 G A -0.005566982 FALSE FALSE -22 45471607 C T 0.01148978 FALSE FALSE -22 45497738 C T -0.005029327 FALSE FALSE -22 45502829 C T -0.03893521 FALSE FALSE -22 45519040 T G 0.002377071 FALSE FALSE -22 45523391 A G 0.01318997 FALSE FALSE -22 45573450 C A 0.0043856 FALSE FALSE -22 45589490 G A -0.008350439 FALSE FALSE -22 45668012 T C 0.01286879 FALSE FALSE -22 45671343 G A -2.940682E-06 FALSE FALSE -22 45672574 T C 0.005743608 FALSE FALSE -22 45693923 A G -0.002675069 FALSE FALSE -22 45718743 G A -0.02092804 FALSE FALSE -22 45723807 C G 0.001670159 FALSE FALSE -22 45728370 A G 0.0001879231 FALSE FALSE -22 45741537 G T 0.01420045 FALSE FALSE -22 45749983 T G -0.04591012 FALSE FALSE -22 45809624 A C 0.002185772 FALSE FALSE -22 45821935 A G 0.02250782 FALSE FALSE -22 45837410 G A -0.002756449 FALSE FALSE -22 45846371 T C 0.07910102 FALSE FALSE -22 45864934 T C 0.008535181 FALSE FALSE -22 45871507 G C -0.007764056 FALSE FALSE -22 45892656 G T -0.003885653 FALSE FALSE -22 45897997 C T 0.0003935204 FALSE FALSE -22 45929577 C T -0.02532217 FALSE FALSE -22 45936350 A G -0.008001698 FALSE FALSE -22 45942726 T G -0.01415551 FALSE FALSE -22 45996298 G A 0.05643525 FALSE FALSE -22 46009063 G A 0.006464843 FALSE FALSE -22 46022070 G A 0.0224674 FALSE FALSE -22 46155548 G C -0.0324747 FALSE FALSE -22 46207955 C T -0.001354554 FALSE FALSE -22 46236425 A G 0.08398423 FALSE FALSE -22 46275529 T C 0.0022643 FALSE FALSE -22 46287720 A G -0.02237482 FALSE FALSE -22 46289699 T C 0.01872124 FALSE FALSE -22 46303347 T C -0.01283734 FALSE FALSE -22 46316057 A G 0.02312579 FALSE FALSE -22 46337043 G C 0.01701173 FALSE FALSE -22 46347519 C T 0.01574289 FALSE FALSE -22 46364161 A G -0.04466341 FALSE FALSE -22 46381234 G A 0.04730559 FALSE FALSE -22 46396925 G A 0.001783944 FALSE FALSE -22 46403715 A G -0.02132589 FALSE FALSE -22 46406782 A C 0.08439466 FALSE FALSE -22 46445002 G C -0.07613496 FALSE FALSE -22 46458123 G T 0.03328073 FALSE FALSE -22 46482948 C T 0.04241879 FALSE FALSE -22 46486508 C T -0.00968439 FALSE FALSE -22 46493852 T C -0.00675858 FALSE FALSE -22 46499120 C G -0.009873118 FALSE FALSE -22 46502870 T C -0.0179214 FALSE FALSE -22 46561713 G A 0.02604703 FALSE FALSE -22 46586110 A G -0.001256735 FALSE FALSE -22 46592168 C T 0.01417055 FALSE FALSE -22 46614274 G C -0.05854014 FALSE FALSE -22 46627603 T C 0.08004024 FALSE FALSE -22 46760086 T C 0.003229515 FALSE FALSE -22 46782382 T C -0.02470821 FALSE FALSE -22 46807234 C T 0.002324176 FALSE FALSE -22 46837114 G A 0.000944073 FALSE FALSE -22 46888399 T C 0.009911095 FALSE FALSE -22 46907779 G A 0.00653144 FALSE FALSE -22 46909355 T G -0.004780494 FALSE FALSE -22 46914277 A C 0.009689535 FALSE FALSE -22 46943687 G A -0.0130366 FALSE FALSE -22 46985917 A G 0.01893397 FALSE FALSE -22 47021226 G A -0.01322949 FALSE FALSE -22 47095235 A C -0.1156013 FALSE FALSE -22 47109621 C T 0.0004322858 FALSE FALSE -22 47125474 G A -0.01746025 FALSE FALSE -22 47147117 T C -0.02418349 FALSE FALSE -22 47156703 C T 0.0262897 FALSE FALSE -22 47245836 A G 0.001880575 FALSE FALSE -22 47271747 C T 0.001055264 FALSE FALSE -22 47301822 C T 0.003032158 FALSE FALSE -22 47345487 T C -0.002945945 FALSE FALSE -22 47372368 T C 0.02067644 FALSE FALSE -22 47380606 C T 0.04041426 FALSE FALSE -22 47437808 C T 0.001683027 FALSE FALSE -22 47450911 A G 0.01624479 FALSE FALSE -22 47511864 A C -0.004226735 FALSE FALSE -22 47519476 T C -0.003954111 FALSE FALSE -22 47529458 A G -0.0003602848 FALSE FALSE -22 47531320 T C -0.006899703 FALSE FALSE -22 47548321 T C 0.004925401 FALSE FALSE -22 47568291 C T 0.007726693 FALSE FALSE -22 47571203 A G -0.009744751 FALSE FALSE -22 47574009 C T -0.00532701 FALSE FALSE -22 47642100 T C 0.006976251 FALSE FALSE -22 47657635 T C 0.001798943 FALSE FALSE -22 47683805 C T -0.03475544 FALSE FALSE -22 47720973 T C -0.007868172 FALSE FALSE -22 47821952 G A -0.000885428 FALSE FALSE -22 47893053 A G -0.02449056 FALSE FALSE -22 47935365 C T -0.001599879 FALSE FALSE -22 47961708 G T -0.003593525 FALSE FALSE -22 47986332 T C -0.003976592 FALSE FALSE -22 48154645 C T 0.007608639 FALSE FALSE -22 48165452 C CT 0.002039503 FALSE FALSE -22 48207318 T C -0.009725168 FALSE FALSE -22 48213904 G C -0.01220367 FALSE FALSE -22 48215904 A G -2.488244E-05 FALSE FALSE -22 48220460 T C -0.002702163 FALSE FALSE -22 48230941 C A -0.001129522 FALSE FALSE -22 48271961 A G -0.005053446 FALSE FALSE -22 48284025 T C -0.003344182 FALSE FALSE -22 48297953 C T -0.01046958 FALSE FALSE -22 48362290 G A -0.02367254 FALSE FALSE -22 48362914 C A -0.003167719 FALSE FALSE -22 48387670 A G -0.008243989 FALSE FALSE -22 48415446 C T 0.002130715 FALSE FALSE -22 48460730 T C 0.002682476 FALSE FALSE -22 48491160 T C 0.001257794 FALSE FALSE -22 48519794 C T 0.003680757 FALSE FALSE -22 48537775 G A 0.002134692 FALSE FALSE -22 48543566 T C 0.007314089 FALSE FALSE -22 48593037 C T 0.009084708 FALSE FALSE -22 48687509 C T -0.0277196 FALSE FALSE -22 48692033 T C -0.02126264 FALSE FALSE -22 48699617 T C 0.0005093107 FALSE FALSE -22 48717568 T C -0.0008190281 FALSE FALSE -22 48811946 C T 0.007916515 FALSE FALSE -22 48823357 G A 0.01464317 FALSE FALSE -22 48840428 A C 0.003711229 FALSE FALSE -22 48851612 T C -0.005887765 FALSE FALSE -22 48874310 T C -0.01106607 FALSE FALSE -22 48968070 C T 0.01280691 FALSE FALSE -22 48991385 T C -0.01234119 FALSE FALSE -22 49004050 G A 0.02290755 FALSE FALSE -22 49014565 A G 0.001555565 FALSE FALSE -22 49086481 T C -0.006196369 FALSE FALSE -22 49107173 T C 0.01277272 FALSE FALSE -22 49180915 A G 0.006346977 FALSE FALSE -22 49262579 A G 0.02657134 FALSE FALSE -22 49270317 C T 0.001447665 FALSE FALSE -22 49313196 A G -0.007055532 FALSE FALSE -22 49335230 T C -0.006548281 FALSE FALSE -22 49366123 T C 0.01136486 FALSE FALSE -22 49372356 G C -0.02420841 FALSE FALSE -22 49443666 T C 0.01581736 FALSE FALSE -22 49496835 G A -0.01355414 FALSE FALSE -22 49524428 A G -0.004228482 FALSE FALSE -22 49530553 G C 0.008197389 FALSE FALSE -22 49537845 T C 0.0111255 FALSE FALSE -22 49557457 G A 0.009401926 FALSE FALSE -22 49562666 C A 0.01271701 FALSE FALSE -22 49574509 C T 0.0004703177 FALSE FALSE -22 49579141 A G 0.02448619 FALSE FALSE -22 49650863 T C 0.006739571 FALSE FALSE -22 49662549 T G -0.005769464 FALSE FALSE -22 49665841 T C -0.0007037069 FALSE FALSE -22 49677464 A G -0.02177735 FALSE FALSE -22 49696067 C T -0.003309682 FALSE FALSE -22 49700272 T G -0.002541948 FALSE FALSE -22 49706433 T C -0.01719402 FALSE FALSE -22 49713835 G A -0.01370754 FALSE FALSE -22 49719264 A C -0.01067852 FALSE FALSE -22 49743627 G A -0.0005970581 FALSE FALSE -22 49800265 C T 0.03098582 FALSE FALSE -22 49806863 A G 0.003940447 FALSE FALSE -22 49830851 C T -0.002742706 FALSE FALSE -22 49834624 G A -0.002820163 FALSE FALSE -22 49843235 G C -0.0004458281 FALSE FALSE -22 49847501 T G 0.002235016 FALSE FALSE -22 49861033 C T 0.01721243 FALSE FALSE -22 49881321 A G -0.00051278 FALSE FALSE -22 49908804 G A -0.009455892 FALSE FALSE -22 49911222 G T -0.01389666 FALSE FALSE -22 49925268 A G 0.01679984 FALSE FALSE -22 49927332 T C 0.00039298 FALSE FALSE -22 50109212 T C 0.01610819 FALSE FALSE -22 50118149 G C 0.007024666 FALSE FALSE -22 50184484 G T 0.01222581 FALSE FALSE -22 50219447 T C 0.05091891 FALSE FALSE -22 50278568 G A -0.02340672 FALSE FALSE -22 50319170 G A 0.01669806 FALSE FALSE -22 50350971 A G 0.0264016 FALSE FALSE -22 50356693 C T 0.003851499 FALSE FALSE -22 50435480 G A 0.0166363 FALSE FALSE -22 50439626 A G -0.002722154 FALSE FALSE -22 50466542 C T -0.002560094 FALSE FALSE -22 50470516 T C -0.01621986 FALSE FALSE -22 50491150 G A 0.01828674 FALSE FALSE -22 50515270 C T 0.01439904 FALSE FALSE -22 50529850 C T 0.02054628 FALSE FALSE -22 50570755 C G 0.007077514 FALSE FALSE -22 50582626 G A -0.003588854 FALSE FALSE -22 50672154 A G 0.007660848 FALSE FALSE -22 50722134 C T -0.01747164 FALSE FALSE -22 50722408 C T -0.001063465 FALSE FALSE -22 50728062 C T 0.02159223 FALSE FALSE -22 50750481 T C 0.01877272 FALSE FALSE -22 50758873 T C 0.004001731 FALSE FALSE -22 50835040 A G -0.006374259 FALSE FALSE -22 50859049 C T 0.0003480749 FALSE FALSE -22 50885775 G A -0.01358311 FALSE FALSE -22 50926768 T C 0.001798498 FALSE FALSE -22 50928026 A G 0.004775504 FALSE FALSE -22 50971266 C T 0.02160893 FALSE FALSE -22 50989197 T C -0.01328884 FALSE FALSE -22 50989326 G A 0.01037054 FALSE FALSE -22 50999681 G A -0.01226224 FALSE FALSE -22 51046163 T C -0.02754002 FALSE FALSE -22 51117580 C T 0.03573542 FALSE FALSE -22 51171497 A G -0.01951606 FALSE FALSE -22 51174939 T C -0.006178519 FALSE FALSE diff --git a/tests/test_combine.py b/tests/test_combine.py index bc82faf..fe9cb0a 100644 --- a/tests/test_combine.py +++ b/tests/test_combine.py @@ -65,13 +65,11 @@ def test_effect_type_combine(effect_type_path, tmp_path, combine_output_header): with open(out_path.parent / "log_combined.json") as f: header = json.load(f)[0] - assert ( - header["scorefile_dominant_and_recessive"]["pgs_name"] - == "PGS001229_22_DominantRecessiveExample" - ) - assert header["scorefile_dominant_and_recessive"]["genome_build"] == "GRCh37" - assert header["scorefile_dominant_and_recessive"]["variants_number"] == n - assert not header["scorefile_dominant_and_recessive"]["use_harmonised"] + assert header["PGS000802_hmPOS_GRCh37"]["pgs_name"] == "CRC_19" + assert header["PGS000802_hmPOS_GRCh37"]["genome_build"] is None + assert header["PGS000802_hmPOS_GRCh37"]["HmPOS_build"] == "GRCh37" + assert int(header["PGS000802_hmPOS_GRCh37"]["variants_number"]) == n + assert header["PGS000802_hmPOS_GRCh37"]["use_harmonised"] def test_custom_combine(custom_score_path, tmp_path, combine_output_header): @@ -111,7 +109,8 @@ def pgscatalog_path(scope="session"): @pytest.fixture def effect_type_path(scope="session"): - path = importlib.resources.files(combine) / "scorefile_dominant_and_recessive.txt" + # this scoring file contains dominant and recessive alleles + path = importlib.resources.files(combine) / "PGS000802_hmPOS_GRCh37.txt" return path From 5901b8e143c864b702b3411241c70673eb82b188 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Thu, 22 Feb 2024 10:23:04 +0000 Subject: [PATCH 2/5] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 88134e4..65ec28d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pgscatalog_utils" -version = "0.5.1" +version = "0.5.2" description = "Utilities for working with PGS Catalog API and scoring files" homepage = "https://github.com/PGScatalog/pgscatalog_utils" authors = ["Benjamin Wingfield ", "Samuel Lambert ", "Laurent Gil "] From b1760da8926c6b1698b6761dd42d95e05dc19242 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Thu, 22 Feb 2024 11:41:47 +0000 Subject: [PATCH 3/5] bump version --- pgscatalog_utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgscatalog_utils/__init__.py b/pgscatalog_utils/__init__.py index a987347..7225152 100644 --- a/pgscatalog_utils/__init__.py +++ b/pgscatalog_utils/__init__.py @@ -1 +1 @@ -__version__ = '0.4.2' +__version__ = "0.5.2" From a71344351205d93c7e8d6273e3541040773f5af0 Mon Sep 17 00:00:00 2001 From: smlmbrt Date: Sat, 2 Mar 2024 21:45:01 +0000 Subject: [PATCH 4/5] Replace very slow merges with concatenations (of df subsets), and VERY SLOW list comparisons with set overlaps. Signed-off-by: smlmbrt --- .../ancestry/ancestry_analysis.py | 19 ++++++++----------- pgscatalog_utils/ancestry/tools.py | 1 + 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pgscatalog_utils/ancestry/ancestry_analysis.py b/pgscatalog_utils/ancestry/ancestry_analysis.py index a7dbd8e..4795aeb 100644 --- a/pgscatalog_utils/ancestry/ancestry_analysis.py +++ b/pgscatalog_utils/ancestry/ancestry_analysis.py @@ -55,18 +55,15 @@ def ancestry_analysis(): scorecols = list(pgs.columns) ## There should be perfect target sample overlap - assert all( - [ - x in pgs.loc["reference"].index - for x in reference_df.index.get_level_values(1) - ] - ), "Error: PGS data missing for reference samples with PCA data." - reference_df = pd.merge(reference_df, pgs, left_index=True, right_index=True) + assert set(reference_df.index.get_level_values(1)).issubset(pgs.loc["reference"].index),\ + "Error: PGS data missing for reference samples with PCA data." + reference_df = reference_df.sort_index() + reference_df = pd.concat([reference_df, pgs.loc[reference_df.index]], axis=1) - assert all( - [x in pgs.loc[args.d_target].index for x in target_df.index.get_level_values(1)] - ), "Error: PGS data missing for reference samples with PCA data." - target_df = pd.merge(target_df, pgs, left_index=True, right_index=True) + assert set(target_df.index.get_level_values(1)).issubset(pgs.loc[args.d_target].index), \ + "Error: PGS data missing for target samples with PCA data." + target_df = target_df.sort_index() + target_df = pd.concat([target_df, pgs.loc[target_df.index]], axis=1) del pgs # clear raw PGS from memory # Compare target sample ancestry/PCs to reference panel diff --git a/pgscatalog_utils/ancestry/tools.py b/pgscatalog_utils/ancestry/tools.py index 47cffaa..8ed8af9 100644 --- a/pgscatalog_utils/ancestry/tools.py +++ b/pgscatalog_utils/ancestry/tools.py @@ -61,6 +61,7 @@ def compare_ancestry(ref_df: pd.DataFrame, ref_pop_col: str, target_df: pd.DataF :param p_threshold: used to define LowConfidence population assignments :return: dataframes for reference (predictions on training set) and target (predicted labels) datasets """ + logger.debug("Starting ancestry comparison") # Check that datasets have the correct columns assert method in comparison_method_threshold.keys(), 'comparison method parameter must be Mahalanobis or RF' if method == 'Mahalanobis': From 4b61fa8a5b5ec8d47a2a71beb9989829a5eee031 Mon Sep 17 00:00:00 2001 From: smlmbrt Date: Tue, 5 Mar 2024 12:37:20 +0000 Subject: [PATCH 5/5] Handle [rare] case where PGS has 0 variance by abstaining from PGS adjustment (outputting NAs and adding a WARNING to log). Signed-off-by: smlmbrt --- pgscatalog_utils/ancestry/tools.py | 174 ++++++++++++++++------------- 1 file changed, 95 insertions(+), 79 deletions(-) diff --git a/pgscatalog_utils/ancestry/tools.py b/pgscatalog_utils/ancestry/tools.py index 8ed8af9..11fa4a7 100644 --- a/pgscatalog_utils/ancestry/tools.py +++ b/pgscatalog_utils/ancestry/tools.py @@ -239,6 +239,7 @@ def pgs_adjust(ref_df, target_df, scorecols: list, ref_pop_col, target_pop_col, results_ref = {} results_target = {} results_models = {} # used to store regression information + scorecols_drop = set() for c_pgs in scorecols: # Makes melting easier later sum_col = 'SUM|{}'.format(c_pgs) @@ -246,6 +247,11 @@ def pgs_adjust(ref_df, target_df, scorecols: list, ref_pop_col, target_pop_col, results_target[sum_col] = target_df[c_pgs] results_models = {} + # Check that PGS has variance (e.g. not all 0) + if 0 in [np.var(results_ref[sum_col]), np.var(results_target[sum_col])]: + scorecols_drop.add(c_pgs) + logger.warning("Skipping adjustment: {} has 0 variance in PGS SUM".format(c_pgs)) + # Report PGS values with respect to distribution of PGS in the most similar reference population if 'empirical' in use_method: logger.debug("Adjusting PGS using most similar reference population distribution.") @@ -259,35 +265,36 @@ def pgs_adjust(ref_df, target_df, scorecols: list, ref_pop_col, target_pop_col, z_col = 'Z_MostSimilarPop|{}'.format(c_pgs) results_ref[z_col] = pd.Series(index=ref_df.index, dtype='float64') results_target[z_col] = pd.Series(index=target_df.index, dtype='float64') - - r_model = {} - # Adjust for each population - for pop in ref_populations: - r_pop = {} - i_ref_pop = (ref_df[ref_pop_col] == pop) - i_target_pop = (target_df[target_pop_col] == pop) + if c_pgs not in scorecols_drop: + r_model = {} + + # Adjust for each population + for pop in ref_populations: + r_pop = {} + i_ref_pop = (ref_df[ref_pop_col] == pop) + i_target_pop = (target_df[target_pop_col] == pop) - # Reference Score Distribution - c_pgs_pop_dist = ref_train_df.loc[ref_train_df[ref_pop_col] == pop, c_pgs] + # Reference Score Distribution + c_pgs_pop_dist = ref_train_df.loc[ref_train_df[ref_pop_col] == pop, c_pgs] - # Calculate Percentile - results_ref[percentile_col].loc[i_ref_pop] = percentileofscore(c_pgs_pop_dist, ref_df.loc[i_ref_pop, c_pgs]) - results_target[percentile_col].loc[i_target_pop] = percentileofscore(c_pgs_pop_dist, target_df.loc[i_target_pop, c_pgs]) - r_pop['percentiles'] = np.percentile(c_pgs_pop_dist, range(0,101,1)) + # Calculate Percentile + results_ref[percentile_col].loc[i_ref_pop] = percentileofscore(c_pgs_pop_dist, ref_df.loc[i_ref_pop, c_pgs]) + results_target[percentile_col].loc[i_target_pop] = percentileofscore(c_pgs_pop_dist, target_df.loc[i_target_pop, c_pgs]) + r_pop['percentiles'] = np.percentile(c_pgs_pop_dist, range(0,101,1)) - # Calculate Z - r_pop['mean'] = c_pgs_pop_dist.mean() - r_pop['std'] = c_pgs_pop_dist.std(ddof=0) + # Calculate Z + r_pop['mean'] = c_pgs_pop_dist.mean() + r_pop['std'] = c_pgs_pop_dist.std(ddof=0) - results_ref[z_col].loc[i_ref_pop] = (ref_df.loc[i_ref_pop, c_pgs] - r_pop['mean'])/r_pop['std'] - results_target[z_col].loc[i_target_pop] = (target_df.loc[i_target_pop, c_pgs] - r_pop['mean'])/r_pop['std'] + results_ref[z_col].loc[i_ref_pop] = (ref_df.loc[i_ref_pop, c_pgs] - r_pop['mean'])/r_pop['std'] + results_target[z_col].loc[i_target_pop] = (target_df.loc[i_target_pop, c_pgs] - r_pop['mean'])/r_pop['std'] - r_model[pop] = r_pop + r_model[pop] = r_pop - results_models['dist_empirical'][c_pgs] = r_model - # ToDo: explore handling of individuals who have low-confidence population labels - # -> Possible Soln: weighted average based on probabilities? Small Mahalanobis P-values will complicate this + results_models['dist_empirical'][c_pgs] = r_model + # ToDo: explore handling of individuals who have low-confidence population labels + # -> Possible Soln: weighted average based on probabilities? Small Mahalanobis P-values will complicate this # PCA-based adjustment if any([x in use_method for x in ['mean', 'mean+var']]): logger.debug("Adjusting PGS using PCA projections") @@ -313,63 +320,72 @@ def pgs_adjust(ref_df, target_df, scorecols: list, ref_pop_col, target_pop_col, target_norm[pc_col] = (target_norm[pc_col] - pc_mean) / pc_std for c_pgs in scorecols: - results_models['adjust_pcs']['PGS'][c_pgs] = {} - if norm_centerpgs: - pgs_mean = ref_train_df[c_pgs].mean() - ref_train_df[c_pgs] = (ref_train_df[c_pgs] - pgs_mean) - ref_norm[c_pgs] = (ref_norm[c_pgs] - pgs_mean) - target_norm[c_pgs] = (target_norm[c_pgs] - pgs_mean) - results_models['adjust_pcs']['PGS'][c_pgs]['pgs_offset'] = pgs_mean - - # Method 1 (Khera et al. Circulation (2019): normalize mean (doi:10.1161/CIRCULATIONAHA.118.035658) - adj_col = 'Z_norm1|{}'.format(c_pgs) - # Fit to Reference Data - pcs2pgs_fit = LinearRegression().fit(ref_train_df[cols_pcs], ref_train_df[c_pgs]) - ref_train_pgs_pred = pcs2pgs_fit.predict(ref_train_df[cols_pcs]) - ref_train_pgs_resid = ref_train_df[c_pgs] - ref_train_pgs_pred - ref_train_pgs_resid_mean = ref_train_pgs_resid.mean() - ref_train_pgs_resid_std = ref_train_pgs_resid.std(ddof=0) - - ref_pgs_resid = ref_norm[c_pgs] - pcs2pgs_fit.predict(ref_norm[cols_pcs]) - results_ref[adj_col] = ref_pgs_resid / ref_train_pgs_resid_std - # Apply to Target Data - target_pgs_pred = pcs2pgs_fit.predict(target_norm[cols_pcs]) - target_pgs_resid = target_norm[c_pgs] - target_pgs_pred - results_target[adj_col] = target_pgs_resid / ref_train_pgs_resid_std - results_models['adjust_pcs']['PGS'][c_pgs]['Z_norm1'] = package_skl_regression(pcs2pgs_fit) - - if 'mean+var' in use_method: - # Method 2 (Khan et al. Nature Medicine (2022)): normalize variance (doi:10.1038/s41591-022-01869-1) - # Normalize based on residual deviation from mean of the distribution [equalize population sds] - # (e.g. reduce the correlation between genetic ancestry and how far away you are from the mean) - # USE gamma distribution for predicted variance to constrain it to be positive (b/c using linear - # regression we can get negative predictions for the sd) - adj_col = 'Z_norm2|{}'.format(c_pgs) - pcs2var_fit_gamma = GammaRegressor(max_iter=1000).fit(ref_train_df[cols_pcs], ( - ref_train_pgs_resid - ref_train_pgs_resid_mean) ** 2) - if norm2_2step: - # Return 2-step adjustment - results_ref[adj_col] = ref_pgs_resid / np.sqrt(pcs2var_fit_gamma.predict(ref_norm[cols_pcs])) - results_target[adj_col] = target_pgs_resid / np.sqrt( - pcs2var_fit_gamma.predict(target_norm[cols_pcs])) - results_models['adjust_pcs']['PGS'][c_pgs]['Z_norm2'] = package_skl_regression(pcs2var_fit_gamma) - else: - # Return full-likelihood adjustment model - # This jointly re-fits the regression parameters from the mean and variance prediction to better - # fit the observed PGS distribution. It seems to mostly change the intercepts. This implementation is - # adapted from https://github.com/broadinstitute/palantir-workflows/blob/v0.14/ImputationPipeline/ScoringTasks.wdl, - # which is distributed under a BDS-3 license. - params_initial = np.concatenate([[pcs2pgs_fit.intercept_], pcs2pgs_fit.coef_, - [pcs2var_fit_gamma.intercept_], pcs2var_fit_gamma.coef_]) - pcs2full_fit = fullLL_fit(df_score=ref_train_df, scorecol=c_pgs, - predictors=cols_pcs, initial_params=params_initial) - - results_ref[adj_col] = fullLL_adjust(pcs2full_fit, ref_norm, c_pgs) - results_target[adj_col] = fullLL_adjust(pcs2full_fit, target_norm, c_pgs) - - if pcs2full_fit['params']['success'] is False: - logger.warning("{} full-likelihood: {} {}".format(c_pgs, pcs2full_fit['params']['status'], pcs2full_fit['params']['message'])) - results_models['adjust_pcs']['PGS'][c_pgs]['Z_norm2'] = pcs2full_fit + if c_pgs in scorecols_drop: + # fill the output with NAs + adj_cols = ['Z_norm1|{}'.format(c_pgs)] + if 'mean+var' in use_method: + adj_cols.append('Z_norm2|{}'.format(c_pgs)) + for adj_col in adj_cols: + results_ref[adj_col] = pd.Series(index=ref_df.index, dtype='float64') # fill na + results_target[adj_col] = pd.Series(index=target_df.index, dtype='float64') # fill na + else: + results_models['adjust_pcs']['PGS'][c_pgs] = {} + if norm_centerpgs: + pgs_mean = ref_train_df[c_pgs].mean() + ref_train_df[c_pgs] = (ref_train_df[c_pgs] - pgs_mean) + ref_norm[c_pgs] = (ref_norm[c_pgs] - pgs_mean) + target_norm[c_pgs] = (target_norm[c_pgs] - pgs_mean) + results_models['adjust_pcs']['PGS'][c_pgs]['pgs_offset'] = pgs_mean + + # Method 1 (Khera et al. Circulation (2019): normalize mean (doi:10.1161/CIRCULATIONAHA.118.035658) + adj_col = 'Z_norm1|{}'.format(c_pgs) + # Fit to Reference Data + pcs2pgs_fit = LinearRegression().fit(ref_train_df[cols_pcs], ref_train_df[c_pgs]) + ref_train_pgs_pred = pcs2pgs_fit.predict(ref_train_df[cols_pcs]) + ref_train_pgs_resid = ref_train_df[c_pgs] - ref_train_pgs_pred + ref_train_pgs_resid_mean = ref_train_pgs_resid.mean() + ref_train_pgs_resid_std = ref_train_pgs_resid.std(ddof=0) + + ref_pgs_resid = ref_norm[c_pgs] - pcs2pgs_fit.predict(ref_norm[cols_pcs]) + results_ref[adj_col] = ref_pgs_resid / ref_train_pgs_resid_std + # Apply to Target Data + target_pgs_pred = pcs2pgs_fit.predict(target_norm[cols_pcs]) + target_pgs_resid = target_norm[c_pgs] - target_pgs_pred + results_target[adj_col] = target_pgs_resid / ref_train_pgs_resid_std + results_models['adjust_pcs']['PGS'][c_pgs]['Z_norm1'] = package_skl_regression(pcs2pgs_fit) + + if 'mean+var' in use_method: + # Method 2 (Khan et al. Nature Medicine (2022)): normalize variance (doi:10.1038/s41591-022-01869-1) + # Normalize based on residual deviation from mean of the distribution [equalize population sds] + # (e.g. reduce the correlation between genetic ancestry and how far away you are from the mean) + # USE gamma distribution for predicted variance to constrain it to be positive (b/c using linear + # regression we can get negative predictions for the sd) + adj_col = 'Z_norm2|{}'.format(c_pgs) + pcs2var_fit_gamma = GammaRegressor(max_iter=1000).fit(ref_train_df[cols_pcs], ( + ref_train_pgs_resid - ref_train_pgs_resid_mean) ** 2) + if norm2_2step: + # Return 2-step adjustment + results_ref[adj_col] = ref_pgs_resid / np.sqrt(pcs2var_fit_gamma.predict(ref_norm[cols_pcs])) + results_target[adj_col] = target_pgs_resid / np.sqrt( + pcs2var_fit_gamma.predict(target_norm[cols_pcs])) + results_models['adjust_pcs']['PGS'][c_pgs]['Z_norm2'] = package_skl_regression(pcs2var_fit_gamma) + else: + # Return full-likelihood adjustment model + # This jointly re-fits the regression parameters from the mean and variance prediction to better + # fit the observed PGS distribution. It seems to mostly change the intercepts. This implementation is + # adapted from https://github.com/broadinstitute/palantir-workflows/blob/v0.14/ImputationPipeline/ScoringTasks.wdl, + # which is distributed under a BDS-3 license. + params_initial = np.concatenate([[pcs2pgs_fit.intercept_], pcs2pgs_fit.coef_, + [pcs2var_fit_gamma.intercept_], pcs2var_fit_gamma.coef_]) + pcs2full_fit = fullLL_fit(df_score=ref_train_df, scorecol=c_pgs, + predictors=cols_pcs, initial_params=params_initial) + + results_ref[adj_col] = fullLL_adjust(pcs2full_fit, ref_norm, c_pgs) + results_target[adj_col] = fullLL_adjust(pcs2full_fit, target_norm, c_pgs) + + if pcs2full_fit['params']['success'] is False: + logger.warning("{} full-likelihood: {} {}".format(c_pgs, pcs2full_fit['params']['status'], pcs2full_fit['params']['message'])) + results_models['adjust_pcs']['PGS'][c_pgs]['Z_norm2'] = pcs2full_fit # Only return results logger.debug("Outputting adjusted PGS & models") results_ref = pd.DataFrame(results_ref)