Skip to content

Commit

Permalink
fix assert which fails when no matches are found
Browse files Browse the repository at this point in the history
  • Loading branch information
nebfield committed Nov 28, 2023
1 parent 9427eb2 commit 29ff0cc
Showing 1 changed file with 26 additions and 7 deletions.
33 changes: 26 additions & 7 deletions pgscatalog_utils/match/combine_matches.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ def combine_matches():
config.OUTDIR = args.outdir

with pl.StringCache():
scorefile = read_scorefile(path=args.scorefile, chrom=None) # chrom=None to read all variants
scorefile = read_scorefile(path=args.scorefile,
chrom=None) # chrom=None to read all variants
logger.debug("Reading matches")
matches = pl.concat([pl.scan_ipc(x, memory_map=False, rechunk=False) for x in args.matches], rechunk=False)
matches = pl.concat(
[pl.scan_ipc(x, memory_map=False, rechunk=False) for x in args.matches],
rechunk=False)

logger.debug("Labelling match candidates")
params: dict[str, bool] = make_params_dict(args)
Expand All @@ -49,7 +52,20 @@ def _check_duplicate_vars(matches: pl.LazyFrame):
.collect()
.get_column('count')
.to_list())
assert max_occurrence == [1], "Duplicate IDs in final matches"

match n := max_occurrence[0]:
case None:
logger.critical("No variant matches found")
logger.critical(
"Did you set the correct genome build? Did you impute your genomes?")
raise ValueError
case _ if n > 1:
logger.critical("Duplicate IDs in final matches")
logger.critical(
"Please double check your genomes for duplicates and try again")
raise ValueError
case _:
logger.info("No duplicate variants found")


def _parse_args(args=None):
Expand All @@ -61,18 +77,21 @@ def _parse_args(args=None):
parser.add_argument('-m', '--matches', dest='matches', required=True, nargs='+',
help='<Required> List of match files')
parser.add_argument('--min_overlap', dest='min_overlap', required=True,
type=float, help='<Required> Minimum proportion of variants to match before error')
type=float,
help='<Required> Minimum proportion of variants to match before error')
parser.add_argument('-IDs', '--filter_IDs', dest='filter',
help='<Optional> Path to file containing list of variant IDs that can be included in the final scorefile.'
'[useful for limiting scoring files to variants present in multiple datasets]')
parser = add_match_args(parser) # params for labelling matches
parser = add_match_args(parser) # params for labelling matches
parser.add_argument('--outdir', dest='outdir', required=True,
help='<Required> Output directory')
parser.add_argument('--split', dest='split', default=False, action='store_true',
help='<Optional> Write scorefiles split per chromosome?')
parser.add_argument('--combined', dest='combined', default=False, action='store_true',
parser.add_argument('--combined', dest='combined', default=False,
action='store_true',
help='<Optional> Write scorefiles in combined format?')
parser.add_argument('-n', dest='n_threads', default=1, help='<Optional> n threads for matching', type=int)
parser.add_argument('-n', dest='n_threads', default=1,
help='<Optional> n threads for matching', type=int)
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
help='<Optional> Extra logging information')
return parser.parse_args(args)
Expand Down

0 comments on commit 29ff0cc

Please sign in to comment.