forked from CMSCompOps/WmAgentScripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
duplicateEventsGen.py
71 lines (61 loc) · 2.84 KB
/
duplicateEventsGen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
import json
import urllib2,urllib, httplib, sys, re, os, phedexSubscription, dbsTest, closeOutWorkflows, re
from xml.dom.minidom import getDOMImplementation
def getFilterEfficiency(url, workflow):
conn = httplib.HTTPSConnection(url, cert_file = os.getenv('X509_USER_PROXY'), key_file = os.getenv('X509_USER_PROXY'))
r1=conn.request("GET",'/reqmgr/reqMgr/request?requestName='+workflow)
r2=conn.getresponse()
request = json.read(r2.read())
if 'FilterEfficiency' in request.keys():
return request['FilterEfficiency']
else:
return 1
def getDatasetStatus(dataset):
querry="./dbssql --input='find dataset.status where dataset="+dataset+"' | awk '{print $2}' | tail -n 2 | head -n 1"
output=os.popen(querry).read()
return output[2:-3]
def duplicateLumi(dataset):
querry="./dbssql --limit=10000000 --input='find file, lumi where dataset="+dataset+"'| grep store| awk '{print $2}' | sort | uniq -c | awk '{print $1}' | sort | uniq | awk '{if ($1>1) print $1}'"
output=os.popen(querry).read()
if output:
return True
else:
return False
def classifyRequests(url, requests):
print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
print '| Request |req Type |Status Req | Dataset |Status Dataset | Percentage|FilterEfficiency| '
print '-----------------------------------------------------------------------------------------------------------------------------------------------------------'
classifiedRequests={}
for request in requests:
if 'type' in request:
name=request['request_name']
if request['type']=='MonteCarloFromGEN' or request['type']=='MonteCarlo':
datasetWorkflow=phedexSubscription.outputdatasetsWorkflow(url, name)
problem=False
percentage=0
if len(datasetWorkflow)<1:
continue
dataset=datasetWorkflow[0]
inputEvents=0.0001
inputEvents=inputEvents+int(dbsTest.getInputEvents(url, name))
outputEvents=dbsTest.getEventCountDataSet(dataset)
percentage=outputEvents/float(inputEvents)
duplicate=duplicateLumi(dataset)
problem=False
if duplicate:
problem=True
if problem:
FilterEfficiency=getFilterEfficiency(url, name)
datasetStatus=getDatasetStatus(dataset)
print '| %20s | %8s| %8s | %20s | %10s| %10s | %10s| ' % (name, request['type'], request['status'], dataset,datasetStatus, str(percentage*100), FilterEfficiency)
print '---------------------------------------------------------------------------------------------------------------------------'
def main():
url='cmsweb.cern.ch'
print "Gathering Requests"
requests=closeOutWorkflows.getOverviewRequest()
print "Classifying Requests"
classifyRequests(url, requests)
sys.exit(0);
if __name__ == "__main__":
main()