-
Notifications
You must be signed in to change notification settings - Fork 1
/
HDRunFileRAIDList.py
78 lines (69 loc) · 3.22 KB
/
HDRunFileRAIDList.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Class to automatically determine how many files per run exist, given a list of runs
# For running inside the counting house, reads files from the RAID disk
#
# Author: Sean Dobbs ([email protected]), 2015
import os
import HDJobUtils
class HDRunFileRAIDList:
"""
Class to automatically determine how many files per run exist.
The list of runs is set using AddRuns() or SetRuns().
Once the list of runs is defined, call FillFiles() to set up the
list of files per run mapping
The results can then be obtained by directly accessing the dictionary HDRunFileRAIDList.files
"""
def __init__(self):
self.files = {} # mapping with run numbers as keys and list of file numbers as values
# figure out which runs on which RAID disk
self.server_run_map = {}
raid_dirs = [ "gluonraid1", "gluonraid2" ]
for the_raid_dir in raid_dirs:
print "scanning %s..."%the_raid_dir
runlist = []
basedir = "/%s/rawdata/volatile"%the_raid_dir
for the_run_period in [ d for d in os.listdir(basedir) if (d[:9]=="RunPeriod") ]:
# look for folders containing EVIO files
evio_dir = "%s/%s/rawdata"%(basedir,the_run_period)
for dirname in [ d for d in os.listdir(evio_dir) if (d[:3]=="Run") ]:
# get run number from directories following the naming scheme "RunRRRRRR"
try:
run = int(dirname[3:9])
runlist.append(run)
except:
print "Skipping invalid run directory %s ..."%dirname
self.server_run_map[the_raid_dir] = runlist
def GetRAIDDirFromRun(run, server_run_map):
for d in server_run_map.keys():
if run in server_run_map[d]:
return d
return ""
def Clear(self):
del self.files
self.files = {}
def AddRuns(self, runlist):
for run in runlist:
try:
self.files[int(run)] = []
except TypeError:
print "Invalid run number in RunFileList.SetRuns(): "+str(run)
def SetRuns(self, runlist):
self.Clear()
self.AddRuns(runlist)
def FillFiles(self):
for run in self.files.keys():
# figure out run period
the_run_period = HDJobUtils.GetRunPeriodFromRun(run)
the_raid_dir = self.GetRAIDDirFromRun(run,self.server_run_map)
if the_run_period is None:
print "Could not find run period for run "+str(run)
# count number of files on disk. Note that these are in the volatile area, so
# more files might exist on tape
evio_dir = "/%s/rawdata/volatile/%s/rawdata/Run%06d/"%(the_raid_dir,the_run_period,run)
if not os.path.isdir(evio_dir):
continue
#evio_files = [ f for f in os.listdir(evio_dir) if f[-5:] == ".evio" ]
try:
evio_file_numbers = [ int(f[:-5].split("_")[3]) for f in os.listdir(evio_dir) if f[-5:] == ".evio" ]
self.files[run] = sorted(evio_file_numbers)
except:
print "Error processing directory: "+str(evio_dir)