Skip to content

Commit

Permalink
Merge pull request #2213 from ganga-devs/splitter_improvement
Browse files Browse the repository at this point in the history
Check for duplicate in splitter
  • Loading branch information
mesmith75 authored Nov 24, 2023
2 parents 8f6c2fb + 6265938 commit 7ed83ff
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions ganga/GangaDirac/Lib/Splitters/OfflineGangaDiracSplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,15 @@ def OfflineGangaDiracSplitter(_inputs, filesPerJob, maxFiles, ignoremissing, ban
check_count = check_count + len(i)

if check_count != len(inputs) - len(bad_lfns):
#First check if there are duplicates causing this problem
allLFNs = [_lfn.lfn for _lfn in inputs]
lfnset = set(allLFNs)
del_list = list(allLFNs)
for _l in lfnset:
del_list.remove(_l)
if len(del_list)>0:
raise SplitterError("Duplicate LFNs found, check your inputdata! %s" % del_list)

logger.error("SERIOUS SPLITTING ERROR!!!!!")
logger.warning("%s != %s - %s" % (check_count, len(inputs), len(bad_lfns)))
logger.warning("inputs:\n%s" % str(inputs))
Expand Down

0 comments on commit 7ed83ff

Please sign in to comment.