Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to filter to releasable trios for generating trio stats #638

Merged
merged 4 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 33 additions & 4 deletions gnomad_qc/v4/annotations/generate_variant_qc_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,13 +446,15 @@ def run_generate_trio_stats(
vds: hl.vds.VariantDataset,
fam_ped: hl.Pedigree,
fam_ht: hl.Table,
releasable_only: bool = False,
) -> hl.Table:
"""
Generate trio transmission stats from a VariantDataset and pedigree info.

:param vds: VariantDataset to generate trio stats from.
:param fam_ped: Pedigree containing trio info.
:param fam_ht: Table containing trio info.
:param releasable_only: Whether to only include releasable trios. Releasable trios are those where all three samples (proband, maternal, and paternal) are marked as 'releasable'.
:return: Table containing trio stats.
"""
# Filter the VDS to autosomes.
Expand All @@ -462,6 +464,17 @@ def run_generate_trio_stats(

# Filter the variant data to bi-allelic sites.
vmt = vmt.filter_rows(hl.len(vmt.alleles) == 2)
if releasable_only:
logger.info("Filtering to only releasable trios...")
meta = vmt.cols()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could add a logger stating that filtering to releasable trios here

fam_ht = fam_ht.annotate(
id_releasable=meta[fam_ht.key].meta.project_meta.releasable,
pat_releasable=meta[fam_ht.pat_id].meta.project_meta.releasable,
mat_releasable=meta[fam_ht.mat_id].meta.project_meta.releasable,
)
fam_ht = fam_ht.filter(
fam_ht.id_releasable & fam_ht.pat_releasable & fam_ht.mat_releasable
)

# Filter the variant data and reference data to only the trios.
vmt = filter_mt_to_trios(vmt, fam_ht)
Expand Down Expand Up @@ -666,6 +679,7 @@ def get_variant_qc_annotation_resources(
over_n_alleles: Optional[bool] = None,
combine_compute_info: bool = False,
true_positive_type: Optional[str] = None,
releasable_trios_only: bool = False,
) -> PipelineResourceCollection:
"""
Get PipelineResourceCollection for all resources needed in the variant QC annotation pipeline.
Expand All @@ -682,6 +696,7 @@ def get_variant_qc_annotation_resources(
produced by running --compute-info with --compute-info-split-n-alleles.
:param true_positive_type: Type of true positive variants to use for true positive
VCF path resource. Default is None.
:param releasable_trios_only: Whether to only include releasable trios in the trio stats.
:return: PipelineResourceCollection containing resources for all steps of the
variant QC annotation pipeline.
"""
Expand Down Expand Up @@ -751,7 +766,11 @@ def get_variant_qc_annotation_resources(
)
trio_stats = PipelineStepResourceCollection(
"--generate-trio-stats",
output_resources={"trio_stats_ht": get_trio_stats(test=test)},
output_resources={
"trio_stats_ht": get_trio_stats(
test=test, releasable_only=releasable_trios_only
)
},
input_resources={"identify_trios.py --finalize-ped": {"final_ped": pedigree()}},
)
sib_stats = PipelineStepResourceCollection(
Expand Down Expand Up @@ -819,6 +838,7 @@ def main(args):
sibling_singletons = args.sibling_singletons
retain_cdfs = args.retain_cdfs
cdf_k = args.cdf_k
releasable_trios_only = args.releasable_trios_only

max_n_alleles = min_n_alleles = over_n_alleles = None
if split_n_alleles is not None:
Expand All @@ -844,6 +864,7 @@ def main(args):
over_n_alleles=over_n_alleles,
combine_compute_info=combine_compute_info,
true_positive_type=true_positive_type,
releasable_trios_only=releasable_trios_only,
)
vds = get_gnomad_v4_vds(
test=test_dataset,
Expand Down Expand Up @@ -926,7 +947,10 @@ def main(args):
res = vqc_resources.generate_trio_stats
res.check_resource_existence()
ht = run_generate_trio_stats(
vds, res.final_ped.pedigree(), res.final_ped.ht()
vds,
res.final_ped.pedigree(),
res.final_ped.ht(),
releasable_only=args.releasable_trios_only,
)
ht.write(res.trio_stats_ht.path, overwrite=overwrite)

Expand Down Expand Up @@ -1069,15 +1093,20 @@ def get_script_argument_parser() -> argparse.ArgumentParser:
help="Version of VEPed context Table to use in vep_or_lookup_vep.",
default="105",
)
parser.add_argument(
trio_stat_args = parser.add_argument_group("Arguments used to generate trio stats.")
trio_stat_args.add_argument(
"--generate-trio-stats", help="Calculates trio stats", action="store_true"
)
trio_stat_args.add_argument(
"--releasable-trios-only",
help="Only include releasable trios. This option is only valid when --generate-trio-stats is true.",
action="store_true",
)
parser.add_argument(
"--generate-sibling-stats",
help="Calculate sibling variant sharing stats.",
action="store_true",
)

variant_qc_annotation_args = parser.add_argument_group(
"Variant QC annotation HT parameters"
)
Expand Down
10 changes: 7 additions & 3 deletions gnomad_qc/v4/resources/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,18 +117,22 @@ def validate_vep_path(
)


def get_trio_stats(test: bool = False) -> VersionedTableResource:
def get_trio_stats(
test: bool = False, releasable_only: bool = False
) -> VersionedTableResource:
"""
Get the gnomAD v4 trio stats VersionedTableResource.

:param test: Whether to use a tmp path for testing.
:param test: Whether to use a temporary path for testing.
:param releasable_only: Whether to use only releasable data.
:return: gnomAD v4 trio stats VersionedTableResource.
"""
return VersionedTableResource(
CURRENT_ANNOTATION_VERSION,
{
version: TableResource(
f"{_annotations_root(version, test=test)}/gnomad.exomes.v{version}.trio_stats.ht"
f"{_annotations_root(version, test=test)}/gnomad.exomes.v{version}."
f"{'releasable.' if releasable_only else ''}trio_stats.ht"
)
for version in ANNOTATION_VERSIONS
},
Expand Down
Loading