diff --git a/BackgroundEstimation/test/crab_BGEstSelections.py b/BackgroundEstimation/test/crab_BGEstSelections.py new file mode 100644 index 00000000..47160cd5 --- /dev/null +++ b/BackgroundEstimation/test/crab_BGEstSelections.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import os +from CRABClient.UserUtilities import config +config = config() + +config.General.requestName = '' +config.General.workArea = 'crab' +config.General.transferOutputs = True +config.General.transferLogs = True + +config.JobType.pluginName = 'Analysis' +config.JobType.psetName = 'config_2022_cfg.py' +config.JobType.allowUndistributedCMSSW = True + +config.Data.inputDataset = '/WtoLNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8/borzari-Test2CRAB-6c2551f7c8575c19e6da935800c0d62f/USER' +config.Data.inputDBS = 'phys03' +config.Data.splitting = 'FileBased' +config.Data.unitsPerJob = 5 # this is the amount of files processed per output file + +config.Data.publication = True +config.Data.outputDatasetTag = 'MuonTagPt55' # this is just an example; it will be part of the name of the output dataset + +# Uncomment one of the following pairs + +#config.Data.outLFNDirBase = '/store/group/lpclonglived/DisappTrks/' +#config.Site.storageSite = 'T3_US_FNALLPC' + +#config.Data.outLFNDirBase = '/store/user/%s/' % (user_name) +#config.Site.storageSite = 'T2_US_Purdue' + +#config.Data.outLFNDirBase = '/store/group/phys_exotica/disappearingTracks/' +#config.Site.storageSite = 'T2_CH_CERN' + +#config.Data.outLFNDirBase = '/store/user/borzari/' +#config.Site.storageSite = 'T2_BR_SPRACE' diff --git a/SignalSystematics/plugins/GenWeightsProducer.cc b/SignalSystematics/plugins/GenWeightsProducer.cc new file mode 100644 index 00000000..d8829f91 --- /dev/null +++ b/SignalSystematics/plugins/GenWeightsProducer.cc @@ -0,0 +1,1157 @@ +#include "CommonTools/UtilAlgos/interface/TFileService.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Run.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/transform.h" +#include "SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/LHERunInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoHeader.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "boost/algorithm/string.hpp" + +#include +#include + +#include +#include +#include +#include + +#include "TH1D.h" +#include "TH2D.h" +#include "TString.h" + +namespace { + /// ---- Cache object for running sums of weights ---- + struct Counter { + Counter() : num(0), sumw(0), sumw2(0), sumPDF(), sumScale(), sumRwgt(), sumNamed(), sumPS() {} + + // the counters + long long num; + long double sumw; + long double sumw2; + std::vector sumPDF, sumScale, sumRwgt, sumNamed, sumPS; + + void clear() { + num = 0; + sumw = 0; + sumw2 = 0; + sumPDF.clear(); + sumScale.clear(); + sumRwgt.clear(); + sumNamed.clear(), sumPS.clear(); + } + + // inc the counters + void incGenOnly(double w) { + num++; + sumw += w; + sumw2 += (w * w); + } + + void incPSOnly(double w0, const std::vector& wPS) { + if (!wPS.empty()) { + if (sumPS.empty()) + sumPS.resize(wPS.size(), 0); + for (unsigned int i = 0, n = wPS.size(); i < n; ++i) + sumPS[i] += (w0 * wPS[i]); + } + } + + void incLHE(double w0, + const std::vector& wScale, + const std::vector& wPDF, + const std::vector& wRwgt, + const std::vector& wNamed, + const std::vector& wPS) { + // add up weights + incGenOnly(w0); + // then add up variations + if (!wScale.empty()) { + if (sumScale.empty()) + sumScale.resize(wScale.size(), 0); + for (unsigned int i = 0, n = wScale.size(); i < n; ++i) + sumScale[i] += (w0 * wScale[i]); + } + if (!wPDF.empty()) { + if (sumPDF.empty()) + sumPDF.resize(wPDF.size(), 0); + for (unsigned int i = 0, n = wPDF.size(); i < n; ++i) + sumPDF[i] += (w0 * wPDF[i]); + } + if (!wRwgt.empty()) { + if (sumRwgt.empty()) + sumRwgt.resize(wRwgt.size(), 0); + for (unsigned int i = 0, n = wRwgt.size(); i < n; ++i) + sumRwgt[i] += (w0 * wRwgt[i]); + } + if (!wNamed.empty()) { + if (sumNamed.empty()) + sumNamed.resize(wNamed.size(), 0); + for (unsigned int i = 0, n = wNamed.size(); i < n; ++i) + sumNamed[i] += (w0 * wNamed[i]); + } + incPSOnly(w0, wPS); + } + + void merge(const Counter& other) { + num += other.num; + sumw += other.sumw; + sumw2 += other.sumw2; + if (sumScale.empty() && !other.sumScale.empty()) + sumScale.resize(other.sumScale.size(), 0); + if (sumPDF.empty() && !other.sumPDF.empty()) + sumPDF.resize(other.sumPDF.size(), 0); + if (sumRwgt.empty() && !other.sumRwgt.empty()) + sumRwgt.resize(other.sumRwgt.size(), 0); + if (sumNamed.empty() && !other.sumNamed.empty()) + sumNamed.resize(other.sumNamed.size(), 0); + if (sumPS.empty() && !other.sumPS.empty()) + sumPS.resize(other.sumPS.size(), 0); + if (!other.sumScale.empty()) + for (unsigned int i = 0, n = sumScale.size(); i < n; ++i) + sumScale[i] += other.sumScale[i]; + if (!other.sumPDF.empty()) + for (unsigned int i = 0, n = sumPDF.size(); i < n; ++i) + sumPDF[i] += other.sumPDF[i]; + if (!other.sumRwgt.empty()) + for (unsigned int i = 0, n = sumRwgt.size(); i < n; ++i) + sumRwgt[i] += other.sumRwgt[i]; + if (!other.sumNamed.empty()) + for (unsigned int i = 0, n = sumNamed.size(); i < n; ++i) + sumNamed[i] += other.sumNamed[i]; + if (!other.sumPS.empty()) + for (unsigned int i = 0, n = sumPS.size(); i < n; ++i) + sumPS[i] += other.sumPS[i]; + } + }; + + struct CounterMap { + std::map countermap; + Counter* active_el = nullptr; + std::string active_label = ""; + void merge(const CounterMap& other) { + for (const auto& y : other.countermap) + countermap[y.first].merge(y.second); + active_el = nullptr; + } + void clear() { + for (auto x : countermap) + x.second.clear(); + active_el = nullptr; + active_label = ""; + } + void setLabel(std::string label) { + active_el = &(countermap[label]); + active_label = label; + } + void checkLabelSet() { + if (!active_el) + throw cms::Exception("LogicError", "Called CounterMap::get() before setting the active label\n"); + } + Counter* get() { + checkLabelSet(); + return active_el; + } + std::string& getLabel() { + checkLabelSet(); + return active_label; + } + }; + + /// ---- RunCache object for dynamic choice of LHE IDs ---- + struct DynamicWeightChoice { + // choice of LHE weights + // ---- scale ---- + std::vector scaleWeightIDs; + std::string scaleWeightsDoc; + // ---- pdf ---- + std::vector pdfWeightIDs; + std::string pdfWeightsDoc; + // ---- rwgt ---- + std::vector rwgtIDs; + std::string rwgtWeightDoc; + }; + + constexpr std::array defPSWeightIDs = {{6, 7, 8, 9}}; + constexpr std::array defPSWeightIDs_alt = {{27, 5, 26, 4}}; + + struct DynamicWeightChoiceGenInfo { + // choice of LHE weights + // ---- scale ---- + std::vector scaleWeightIDs; + std::string scaleWeightsDoc; + // ---- pdf ---- + std::vector pdfWeightIDs; + std::string pdfWeightsDoc; + // ---- ps ---- + bool matchPS_alt = false; + std::vector psWeightIDs; + unsigned int psBaselineID = 1; + std::string psWeightsDoc; + + void setMissingWeight(int idx) { psWeightIDs[idx] = (matchPS_alt) ? defPSWeightIDs_alt[idx] : defPSWeightIDs[idx]; } + + bool empty() const { return scaleWeightIDs.empty() && pdfWeightIDs.empty() && psWeightIDs.empty(); } + }; + + struct LumiCacheInfoHolder { + CounterMap countermap; + void clear() { countermap.clear(); } + }; + + float stof_fortrancomp(const std::string& str) { + std::string::size_type match = str.find('d'); + if (match != std::string::npos) { + std::string pre = str.substr(0, match); + std::string post = str.substr(match + 1); + return std::stof(pre) * std::pow(10.0f, std::stof(post)); + } else { + return std::stof(str); + } + } + /// -------------- temporary objects -------------- + struct ScaleVarWeight { + std::string wid, label; + std::pair scales; + ScaleVarWeight(const std::string& id, const std::string& text, const std::string& muR, const std::string& muF) + : wid(id), label(text), scales(stof_fortrancomp(muR), stof_fortrancomp(muF)) {} + bool operator<(const ScaleVarWeight& other) { + return (scales == other.scales ? wid < other.wid : scales < other.scales); + } + }; + struct PDFSetWeights { + std::vector wids; + std::pair lhaIDs; + PDFSetWeights(const std::string& wid, unsigned int lhaID) : wids(1, wid), lhaIDs(lhaID, lhaID) {} + bool operator<(const PDFSetWeights& other) const { return lhaIDs < other.lhaIDs; } + void add(const std::string& wid, unsigned int lhaID) { + wids.push_back(wid); + lhaIDs.second = lhaID; + } + bool maybe_add(const std::string& wid, unsigned int lhaID) { + if (lhaID == lhaIDs.second + 1) { + lhaIDs.second++; + wids.push_back(wid); + return true; + } else { + return false; + } + } + }; +} // namespace + +class GenWeightsTableProducer : public edm::global::EDProducer, + edm::RunCache, + edm::LuminosityBlockCache, + edm::RunSummaryCache, + edm::EndRunProducer> { +public: + GenWeightsTableProducer(edm::ParameterSet const& params) + : genTag_(consumes(params.getParameter("genEvent"))), + lheLabel_(params.getParameter>("lheInfo")), + lheTag_(edm::vector_transform(lheLabel_, + [this](const edm::InputTag& tag) { return mayConsume(tag); })), + lheRunTag_(edm::vector_transform( + lheLabel_, [this](const edm::InputTag& tag) { return mayConsume(tag); })), + genLumiInfoHeadTag_( + mayConsume(params.getParameter("genLumiInfoHeader"))), + namedWeightIDs_(params.getParameter>("namedWeightIDs")), + namedWeightLabels_(params.getParameter>("namedWeightLabels")), + lheWeightPrecision_(params.getParameter("lheWeightPrecision")), + maxPdfWeights_(params.getParameter("maxPdfWeights")), + keepAllPSWeights_(params.getParameter("keepAllPSWeights")), + debug_(params.getUntrackedParameter("debug", false)), + debugRun_(debug_.load()), + hasIssuedWarning_(false), + psWeightWarning_(false) { + if (namedWeightIDs_.size() != namedWeightLabels_.size()) { + throw cms::Exception("Configuration", "Size mismatch between namedWeightIDs & namedWeightLabels"); + } + for (const edm::ParameterSet& pdfps : params.getParameter>("preferredPDFs")) { + const std::string& name = pdfps.getParameter("name"); + uint32_t lhaid = pdfps.getParameter("lhaid"); + preferredPDFLHAIDs_.push_back(lhaid); + lhaNameToID_[name] = lhaid; + lhaNameToID_[name + ".LHgrid"] = lhaid; + } + oneDHists_["wPS"] = fs_->make("wPS", "PS weights", 4, -0.5, 3.5); + oneDHists_["wScale"] = fs_->make("wScale", "Fact/Ren scales weights", 9, -0.5, 8.5); + oneDHists_["wPDF"] = fs_->make("wPDF", "PDF weights", 101, -0.5, 100.5); + oneDHists_["wPSNW"] = fs_->make("wPSNW", "PS weights - NoWeights", 4, -0.5, 3.5); + oneDHists_["wScaleNW"] = fs_->make("wScaleNW", "Fact/Ren scales weights - NoWeights", 9, -0.5, 8.5); + oneDHists_["wPDFNW"] = fs_->make("wPDFNW", "PDF weights - NoWeights", 101, -0.5, 100.5); + } + + ~GenWeightsTableProducer() override {} + + void produce(edm::StreamID id, edm::Event& iEvent, const edm::EventSetup& iSetup) const override { + // get my counter for weights + Counter* counter = streamCache(id)->countermap.get(); + + // generator information (always available) + edm::Handle genInfo; + iEvent.getByToken(genTag_, genInfo); + double weight = genInfo->weight(); + + std::string model_label = streamCache(id)->countermap.getLabel(); + auto outM = std::make_unique((!model_label.empty()) ? std::string("GenModel_") + model_label : ""); + bool getLHEweightsFromGenInfo = !model_label.empty(); + + edm::Handle lheInfo; + for (const auto& lheTag : lheTag_) { + iEvent.getByToken(lheTag, lheInfo); + if (lheInfo.isValid()) { + break; + } + } + + const auto genWeightChoice = luminosityBlockCache(iEvent.getLuminosityBlock().index()); + if (lheInfo.isValid()) { + if (getLHEweightsFromGenInfo && !hasIssuedWarning_.exchange(true)) + edm::LogWarning("LHETablesProducer") + << "Found both a LHEEventProduct and a GenLumiInfoHeader: will only save weights from LHEEventProduct.\n"; + // get the dynamic choice of weights + const DynamicWeightChoice* weightChoice = runCache(iEvent.getRun().index()); + // go fill tables + fillLHEWeightTables(counter, + weightChoice, + genWeightChoice, + weight, + *lheInfo, + *genInfo); + } else if (getLHEweightsFromGenInfo) { + fillLHEPdfWeightTablesFromGenInfo( + counter, genWeightChoice, weight, *genInfo); + } else { + // Still try to add the PS weights + fillOnlyPSWeightTable(counter, genWeightChoice, weight, *genInfo); + if (!hasIssuedWarning_.exchange(true)) { + edm::LogWarning("LHETablesProducer") << "No LHEEventProduct, so there will be no LHE Tables\n"; + } + } + } + + void fillLHEWeightTables(Counter* counter, + const DynamicWeightChoice* weightChoice, + const DynamicWeightChoiceGenInfo* genWeightChoice, + double genWeight, + const LHEEventProduct& lheProd, + const GenEventInfoProduct& genProd) const { + + bool lheDebug = debug_.exchange( + false); // make sure only the first thread dumps out this (even if may still be mixed up with other output, but nevermind) + + const std::vector& scaleWeightIDs = weightChoice->scaleWeightIDs; + const std::vector& pdfWeightIDs = weightChoice->pdfWeightIDs; + const std::vector& rwgtWeightIDs = weightChoice->rwgtIDs; + + double w0 = lheProd.originalXWGTUP(); + + std::vector wScale(scaleWeightIDs.size(), 1), wPDF(pdfWeightIDs.size(), 1), wRwgt(rwgtWeightIDs.size(), 1), + wNamed(namedWeightIDs_.size(), 1); + for (auto& weight : lheProd.weights()) { + if (lheDebug) + printf("Weight %+9.5f rel %+9.5f for id %s\n", weight.wgt, weight.wgt / w0, weight.id.c_str()); + // now we do it slowly, can be optimized + auto mScale = std::find(scaleWeightIDs.begin(), scaleWeightIDs.end(), weight.id); + if (mScale != scaleWeightIDs.end()) + wScale[mScale - scaleWeightIDs.begin()] = weight.wgt / w0; + + auto mPDF = std::find(pdfWeightIDs.begin(), pdfWeightIDs.end(), weight.id); + if (mPDF != pdfWeightIDs.end()) + wPDF[mPDF - pdfWeightIDs.begin()] = weight.wgt / w0; + + auto mRwgt = std::find(rwgtWeightIDs.begin(), rwgtWeightIDs.end(), weight.id); + if (mRwgt != rwgtWeightIDs.end()) + wRwgt[mRwgt - rwgtWeightIDs.begin()] = weight.wgt / w0; + + auto mNamed = std::find(namedWeightIDs_.begin(), namedWeightIDs_.end(), weight.id); + if (mNamed != namedWeightIDs_.end()) + wNamed[mNamed - namedWeightIDs_.begin()] = weight.wgt / w0; + } + + std::vector wPS; + std::string psWeightDocStr; + setPSWeightInfo(genProd.weights(), genWeightChoice, wPS, psWeightDocStr); + + int aux = 0; + for (const auto &wPS_iter : wPS){ + // printf("wPS[%d] %+9.5f ",aux, wPS_iter); + oneDHists_.at("wPS")->Fill(aux,wPS_iter); + oneDHists_.at("wPSNW")->Fill(aux); + aux = aux + 1; + } + // if(wPS.size() > 0) printf("for id %s\n", psWeightDocStr.c_str()); + + aux = 0; + for (const auto &wScale_iter : wScale){ + // printf("wScale[%d] %+9.5f ",aux, wScale_iter); + oneDHists_.at("wScale")->Fill(aux,wScale_iter); + oneDHists_.at("wScaleNW")->Fill(aux); + aux = aux + 1; + } + // if(wScale.size() > 0) printf("for id %s\n", (weightChoice->scaleWeightsDoc).c_str()); + + std::sort(wPDF.begin(),wPDF.end()); + aux = 0; + for (const auto &wPDF_iter : wPDF){ + // printf("wPDF[%d] %+9.5f ",aux, wPDF_iter); + oneDHists_.at("wPDF")->Fill(aux,wPDF_iter); + oneDHists_.at("wPDFNW")->Fill(aux); + aux = aux + 1; + } + // if(wPDF.size() > 0) printf("for id %s\n", weightChoice->pdfWeightsDoc.c_str()); + + counter->incLHE(genWeight, wScale, wPDF, wRwgt, wNamed, wPS); + } + + void fillLHEPdfWeightTablesFromGenInfo(Counter* counter, + const DynamicWeightChoiceGenInfo* weightChoice, + double genWeight, + const GenEventInfoProduct& genProd) const { + const std::vector& scaleWeightIDs = weightChoice->scaleWeightIDs; + const std::vector& pdfWeightIDs = weightChoice->pdfWeightIDs; + + auto weights = genProd.weights(); + double w0 = (weights.size() > 1) ? weights.at(1) : 1.; + double originalXWGTUP = (weights.size() > 1) ? weights.at(1) : 1.; + + std::vector wScale, wPDF, wPS; + for (auto id : scaleWeightIDs) + wScale.push_back(weights.at(id) / w0); + for (auto id : pdfWeightIDs) { + wPDF.push_back(weights.at(id) / w0); + } + + std::string psWeightsDocStr; + setPSWeightInfo(genProd.weights(), weightChoice, wPS, psWeightsDocStr); + + printf("wPS %+9.5f for id %s\n", wPS[0], psWeightsDocStr.c_str()); + printf("wScale %+9.5f for id %s\n", wScale[0], (weightChoice->scaleWeightsDoc).c_str()); + printf("wPDF %+9.5f for id %s\n", wPDF[0], (weightChoice->pdfWeightsDoc).c_str()); + printf("originalXWGTUP %+9.5f for id %s\n", originalXWGTUP, "Nominal event weight in the LHE file"); + + counter->incLHE(genWeight, wScale, wPDF, std::vector(), std::vector(), wPS); + } + + void fillOnlyPSWeightTable(Counter* counter, + const DynamicWeightChoiceGenInfo* genWeightChoice, + double genWeight, + const GenEventInfoProduct& genProd) const { + std::vector wPS; + std::string psWeightDocStr; + setPSWeightInfo(genProd.weights(), genWeightChoice, wPS, psWeightDocStr); + printf("wPS %+9.5f for id %s\n", wPS[0], psWeightDocStr.c_str()); + + counter->incGenOnly(genWeight); + counter->incPSOnly(genWeight, wPS); + } + + void setPSWeightInfo(const std::vector& genWeights, + const DynamicWeightChoiceGenInfo* genWeightChoice, + std::vector& wPS, + std::string& psWeightDocStr) const { + wPS.clear(); + // isRegularPSSet = keeping all weights and the weights are a usual size, ie + // all weights are PS weights (don't use header incase missing names) + bool isRegularPSSet = keepAllPSWeights_ && (genWeights.size() == 14 || genWeights.size() == 46); + if (!genWeightChoice->psWeightIDs.empty() && !isRegularPSSet) { + psWeightDocStr = genWeightChoice->psWeightsDoc; + double psNom = genWeights.at(genWeightChoice->psBaselineID); + for (auto wgtidx : genWeightChoice->psWeightIDs) { + wPS.push_back(genWeights.at(wgtidx) / psNom); + } + } else { + int vectorSize = + keepAllPSWeights_ ? (genWeights.size() - 2) : ((genWeights.size() == 14 || genWeights.size() == 46) ? 4 : 1); + + if (vectorSize > 1) { + double nominal = genWeights.at(1); // Called 'Baseline' in GenLumiInfoHeader + if (keepAllPSWeights_) { + for (int i = 0; i < vectorSize; i++) { + wPS.push_back(genWeights.at(i + 2) / nominal); + } + psWeightDocStr = "All PS weights (w_var / w_nominal)"; + } else { + if (!psWeightWarning_.exchange(true)) + edm::LogWarning("LHETablesProducer") + << "GenLumiInfoHeader not found: Central PartonShower weights will fill with the 6-10th entries \n" + << " This may incorrect for some mcs (madgraph 2.6.1 with its `isr:murfact=0.5` have a differnt " + "order )"; + for (std::size_t i = 6; i < 10; i++) { + wPS.push_back(genWeights.at(i) / nominal); + } + psWeightDocStr = + "PS weights (w_var / w_nominal); [0] is ISR=2 FSR=1; [1] is ISR=1 FSR=2" + "[2] is ISR=0.5 FSR=1; [3] is ISR=1 FSR=0.5;"; + } + } else { + wPS.push_back(1.0); + psWeightDocStr = "dummy PS weight (1.0) "; + } + } + } + + // create an empty counter + std::shared_ptr globalBeginRun(edm::Run const& iRun, edm::EventSetup const&) const override { + edm::Handle lheInfo; + + bool lheDebug = debugRun_.exchange( + false); // make sure only the first thread dumps out this (even if may still be mixed up with other output, but nevermind) + auto weightChoice = std::make_shared(); + + // getByToken throws since we're not in the endRun (see https://github.com/cms-sw/cmssw/pull/18499) + //if (iRun.getByToken(lheRunTag_, lheInfo)) { + for (const auto& lheLabel : lheLabel_) { + iRun.getByLabel(lheLabel, lheInfo); + if (lheInfo.isValid()) { + break; + } + } + if (lheInfo.isValid()) { + std::vector scaleVariationIDs; + std::vector pdfSetWeightIDs; + std::vector lheReweighingIDs; + bool isFirstGroup = true; + + std::regex weightgroupmg26x(""); + std::regex weightgroup(""); + std::regex weightgroupRwgt(""); + std::regex endweightgroup(""); + std::regex scalewmg26x( + ""); + std::regex scalewmg26xNew( + ""); + + // MUR=2.0 + std::regex scalew( + "\\s*(?:lhapdf=\\d+|dyn=\\s*-?\\d+)?\\s*((?:mu[rR]|renscfact)" + "=(\\S+)\\s+(?:mu[Ff]|facscfact)=(\\S+)(\\s+.*)?)"); + std::regex pdfw( + "\\s*(?:PDF set|lhapdf|PDF|pdfset)\\s*=\\s*(\\d+)\\s*(?:\\s.*)?"); + std::regex pdfwOld("\\s*Member \\s*(\\d+)\\s*(?:.*)"); + std::regex pdfwmg26x( + "\\s*(?:PDF=(\\d+)\\s*MemberID=(\\d+))?\\s*(?:\\s.*)?"); + // + + // PDF=325300 MemberID=0 + std::regex pdfwmg26xNew( + "" + "\\s*(?:PDF=(\\d+)\\s*MemberID=(\\d+))?\\s*(?:\\s.*)?"); + + std::regex rwgt("(.+)?()?"); + std::smatch groups; + for (auto iter = lheInfo->headers_begin(), end = lheInfo->headers_end(); iter != end; ++iter) { + if (iter->tag() != "initrwgt") { + if (lheDebug) + std::cout << "Skipping LHE header with tag" << iter->tag() << std::endl; + continue; + } + if (lheDebug) + std::cout << "Found LHE header with tag" << iter->tag() << std::endl; + std::vector lines = iter->lines(); + bool missed_weightgroup = + false; //Needed because in some of the samples ( produced with MG26X ) a small part of the header info is ordered incorrectly + bool ismg26x = false; + bool ismg26xNew = false; + for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines; + ++iLine) { //First start looping through the lines to see which weightgroup pattern is matched + boost::replace_all(lines[iLine], "<", "<"); + boost::replace_all(lines[iLine], ">", ">"); + if (std::regex_search(lines[iLine], groups, weightgroupmg26x)) { + ismg26x = true; + } else if (std::regex_search(lines[iLine], groups, scalewmg26xNew) || + std::regex_search(lines[iLine], groups, pdfwmg26xNew)) { + ismg26xNew = true; + } + } + for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines; ++iLine) { + if (lheDebug) + std::cout << lines[iLine]; + if (std::regex_search(lines[iLine], groups, ismg26x ? weightgroupmg26x : weightgroup)) { + std::string groupname = groups.str(2); + if (ismg26x) + groupname = groups.str(1); + if (lheDebug) + std::cout << ">>> Looks like the beginning of a weight group for '" << groupname << "'" << std::endl; + if (groupname.find("scale_variation") == 0 || groupname == "Central scale variation" || isFirstGroup) { + if (lheDebug && groupname.find("scale_variation") != 0 && groupname != "Central scale variation") + std::cout << ">>> First weight is not scale variation, but assuming is the Central Weight" << std::endl; + else if (lheDebug) + std::cout << ">>> Looks like scale variation for theory uncertainties" << std::endl; + isFirstGroup = false; + for (++iLine; iLine < nLines; ++iLine) { + if (lheDebug) { + std::cout << " " << lines[iLine]; + } + if (std::regex_search( + lines[iLine], groups, ismg26x ? scalewmg26x : (ismg26xNew ? scalewmg26xNew : scalew))) { + if (lheDebug) + std::cout << " >>> Scale weight " << groups[1].str() << " for " << groups[3].str() << " , " + << groups[4].str() << " , " << groups[5].str() << std::endl; + if (ismg26xNew) { + scaleVariationIDs.emplace_back(groups.str(4), groups.str(1), groups.str(3), groups.str(2)); + } else { + scaleVariationIDs.emplace_back(groups.str(1), groups.str(2), groups.str(3), groups.str(4)); + } + } else if (std::regex_search(lines[iLine], endweightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the end of a weight group" << std::endl; + if (!missed_weightgroup) { + break; + } else + missed_weightgroup = false; + } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " + "of the group." + << std::endl; + if (ismg26x || ismg26xNew) + missed_weightgroup = true; + --iLine; // rewind by one, and go back to the outer loop + break; + } + } + } else if (groupname == "PDF_variation" || groupname.find("PDF_variation ") == 0) { + if (lheDebug) + std::cout << ">>> Looks like a new-style block of PDF weights for one or more pdfs" << std::endl; + for (++iLine; iLine < nLines; ++iLine) { + if (lheDebug) + std::cout << " " << lines[iLine]; + if (std::regex_search(lines[iLine], groups, pdfw)) { + unsigned int lhaID = std::stoi(groups.str(2)); + if (lheDebug) + std::cout << " >>> PDF weight " << groups.str(1) << " for " << groups.str(2) << " = " << lhaID + << std::endl; + if (pdfSetWeightIDs.empty() || !pdfSetWeightIDs.back().maybe_add(groups.str(1), lhaID)) { + pdfSetWeightIDs.emplace_back(groups.str(1), lhaID); + } + } else if (std::regex_search(lines[iLine], endweightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the end of a weight group" << std::endl; + if (!missed_weightgroup) { + break; + } else + missed_weightgroup = false; + } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " + "of the group." + << std::endl; + if (ismg26x || ismg26xNew) + missed_weightgroup = true; + --iLine; // rewind by one, and go back to the outer loop + break; + } + } + } else if (groupname == "PDF_variation1" || groupname == "PDF_variation2") { + if (lheDebug) + std::cout << ">>> Looks like a new-style block of PDF weights for multiple pdfs" << std::endl; + unsigned int lastid = 0; + for (++iLine; iLine < nLines; ++iLine) { + if (lheDebug) + std::cout << " " << lines[iLine]; + if (std::regex_search(lines[iLine], groups, pdfw)) { + unsigned int id = std::stoi(groups.str(1)); + unsigned int lhaID = std::stoi(groups.str(2)); + if (lheDebug) + std::cout << " >>> PDF weight " << groups.str(1) << " for " << groups.str(2) << " = " << lhaID + << std::endl; + if (id != (lastid + 1) || pdfSetWeightIDs.empty()) { + pdfSetWeightIDs.emplace_back(groups.str(1), lhaID); + } else { + pdfSetWeightIDs.back().add(groups.str(1), lhaID); + } + lastid = id; + } else if (std::regex_search(lines[iLine], endweightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the end of a weight group" << std::endl; + if (!missed_weightgroup) { + break; + } else + missed_weightgroup = false; + } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " + "of the group." + << std::endl; + if (ismg26x || ismg26xNew) + missed_weightgroup = true; + --iLine; // rewind by one, and go back to the outer loop + break; + } + } + } else if (lhaNameToID_.find(groupname) != lhaNameToID_.end()) { + if (lheDebug) + std::cout << ">>> Looks like an old-style PDF weight for an individual pdf" << std::endl; + unsigned int firstLhaID = lhaNameToID_.find(groupname)->second; + bool first = true; + for (++iLine; iLine < nLines; ++iLine) { + if (lheDebug) + std::cout << " " << lines[iLine]; + if (std::regex_search( + lines[iLine], groups, ismg26x ? pdfwmg26x : (ismg26xNew ? pdfwmg26xNew : pdfwOld))) { + unsigned int member = 0; + if (!ismg26x && !ismg26xNew) { + member = std::stoi(groups.str(2)); + } else if (ismg26xNew) { + if (!groups.str(3).empty()) { + member = std::stoi(groups.str(3)); + } + } else { + if (!groups.str(4).empty()) { + member = std::stoi(groups.str(4)); + } + } + unsigned int lhaID = member + firstLhaID; + if (lheDebug) + std::cout << " >>> PDF weight " << groups.str(1) << " for " << member << " = " << lhaID + << std::endl; + //if (member == 0) continue; // let's keep also the central value for now + if (first) { + pdfSetWeightIDs.emplace_back(groups.str(1), lhaID); + first = false; + } else { + pdfSetWeightIDs.back().add(groups.str(1), lhaID); + } + } else if (std::regex_search(lines[iLine], endweightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the end of a weight group" << std::endl; + if (!missed_weightgroup) { + break; + } else + missed_weightgroup = false; + } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " + "of the group." + << std::endl; + if (ismg26x || ismg26xNew) + missed_weightgroup = true; + --iLine; // rewind by one, and go back to the outer loop + break; + } + } + } else if (groupname == "mass_variation" || groupname == "sthw2_variation" || + groupname == "width_variation") { + if (lheDebug) + std::cout << ">>> Looks like an EW parameter weight" << std::endl; + for (++iLine; iLine < nLines; ++iLine) { + if (lheDebug) + std::cout << " " << lines[iLine]; + if (std::regex_search(lines[iLine], groups, rwgt)) { + std::string rwgtID = groups.str(1); + if (lheDebug) + std::cout << " >>> LHE reweighting weight: " << rwgtID << std::endl; + if (std::find(lheReweighingIDs.begin(), lheReweighingIDs.end(), rwgtID) == lheReweighingIDs.end()) { + // we're only interested in the beggining of the block + lheReweighingIDs.emplace_back(rwgtID); + } + } else if (std::regex_search(lines[iLine], endweightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the end of a weight group" << std::endl; + } + } + } else { + for (++iLine; iLine < nLines; ++iLine) { + if (lheDebug) + std::cout << " " << lines[iLine]; + if (std::regex_search(lines[iLine], groups, endweightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the end of a weight group" << std::endl; + if (!missed_weightgroup) { + break; + } else + missed_weightgroup = false; + } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " + "of the group." + << std::endl; + if (ismg26x || ismg26xNew) + missed_weightgroup = true; + --iLine; // rewind by one, and go back to the outer loop + break; + } + } + } + } else if (std::regex_search(lines[iLine], groups, weightgroupRwgt)) { + std::string groupname = groups.str(1); + if (groupname.find("mg_reweighting") != std::string::npos) { + if (lheDebug) + std::cout << ">>> Looks like a LHE weights for reweighting" << std::endl; + for (++iLine; iLine < nLines; ++iLine) { + if (lheDebug) + std::cout << " " << lines[iLine]; + if (std::regex_search(lines[iLine], groups, rwgt)) { + std::string rwgtID = groups.str(1); + if (lheDebug) + std::cout << " >>> LHE reweighting weight: " << rwgtID << std::endl; + if (std::find(lheReweighingIDs.begin(), lheReweighingIDs.end(), rwgtID) == lheReweighingIDs.end()) { + // we're only interested in the beggining of the block + lheReweighingIDs.emplace_back(rwgtID); + } + } else if (std::regex_search(lines[iLine], endweightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the end of a weight group" << std::endl; + if (!missed_weightgroup) { + break; + } else + missed_weightgroup = false; + } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { + if (lheDebug) + std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " + "of the group." + << std::endl; + if (ismg26x) + missed_weightgroup = true; + --iLine; // rewind by one, and go back to the outer loop + break; + } + } + } + } + } + //std::cout << "============= END [ " << iter->tag() << " ] ============ \n\n" << std::endl; + + // ----- SCALE VARIATIONS ----- + std::sort(scaleVariationIDs.begin(), scaleVariationIDs.end()); + if (lheDebug) + std::cout << "Found " << scaleVariationIDs.size() << " scale variations: " << std::endl; + std::stringstream scaleDoc; + scaleDoc << "LHE scale variation weights (w_var / w_nominal); "; + for (unsigned int isw = 0, nsw = scaleVariationIDs.size(); isw < nsw; ++isw) { + const auto& sw = scaleVariationIDs[isw]; + if (isw) + scaleDoc << "; "; + scaleDoc << "[" << isw << "] is " << sw.label; + weightChoice->scaleWeightIDs.push_back(sw.wid); + if (lheDebug) + printf(" id %s: scales ren = % .2f fact = % .2f text = %s\n", + sw.wid.c_str(), + sw.scales.first, + sw.scales.second, + sw.label.c_str()); + } + if (!scaleVariationIDs.empty()) + weightChoice->scaleWeightsDoc = scaleDoc.str(); + + // ------ PDF VARIATIONS (take the preferred one) ----- + if (lheDebug) { + std::cout << "Found " << pdfSetWeightIDs.size() << " PDF set errors: " << std::endl; + for (const auto& pw : pdfSetWeightIDs) + printf("lhaIDs %6d - %6d (%3lu weights: %s, ... )\n", + pw.lhaIDs.first, + pw.lhaIDs.second, + pw.wids.size(), + pw.wids.front().c_str()); + } + + // ------ LHE REWEIGHTING ------- + if (lheDebug) { + std::cout << "Found " << lheReweighingIDs.size() << " reweighting weights" << std::endl; + } + std::copy(lheReweighingIDs.begin(), lheReweighingIDs.end(), std::back_inserter(weightChoice->rwgtIDs)); + + std::stringstream pdfDoc; + pdfDoc << "LHE pdf variation weights (w_var / w_nominal) for LHA IDs "; + bool found = false; + for (const auto& pw : pdfSetWeightIDs) { + for (uint32_t lhaid : preferredPDFLHAIDs_) { + if (pw.lhaIDs.first != lhaid && pw.lhaIDs.first != (lhaid + 1)) + continue; // sometimes the first weight is not saved if that PDF is the nominal one for the sample + if (pw.wids.size() == 1) + continue; // only consider error sets + pdfDoc << pw.lhaIDs.first << " - " << pw.lhaIDs.second; + weightChoice->pdfWeightIDs = pw.wids; + if (maxPdfWeights_ < pw.wids.size()) { + weightChoice->pdfWeightIDs.resize(maxPdfWeights_); // drop some replicas + pdfDoc << ", truncated to the first " << maxPdfWeights_ << " replicas"; + } + weightChoice->pdfWeightsDoc = pdfDoc.str(); + found = true; + break; + } + if (found) + break; + } + } + } + return weightChoice; + } + + // create an empty counter + std::unique_ptr beginStream(edm::StreamID) const override { + return std::make_unique(); + } + // inizialize to zero at begin run + void streamBeginRun(edm::StreamID id, edm::Run const&, edm::EventSetup const&) const override { + streamCache(id)->clear(); + } + + std::shared_ptr globalBeginLuminosityBlock(edm::LuminosityBlock const& lumiBlock, + edm::EventSetup const&) const override { + auto dynamicWeightChoiceGenInfo = std::make_shared(); + + edm::Handle genLumiInfoHead; + lumiBlock.getByToken(genLumiInfoHeadTag_, genLumiInfoHead); + if (!genLumiInfoHead.isValid()) + edm::LogWarning("LHETablesProducer") + << "No GenLumiInfoHeader product found, will not fill generator model string.\n"; + + if (genLumiInfoHead.isValid()) { + auto weightChoice = dynamicWeightChoiceGenInfo.get(); + + std::vector scaleVariationIDs; + std::vector pdfSetWeightIDs; + weightChoice->psWeightIDs.clear(); + + std::regex scalew("LHE,\\s+id\\s+=\\s+(\\d+),\\s+(.+)\\,\\s+mur=(\\S+)\\smuf=(\\S+)"); + std::regex pdfw("LHE,\\s+id\\s+=\\s+(\\d+),\\s+(.+),\\s+Member\\s+(\\d+)\\s+of\\ssets\\s+(\\w+\\b)"); + std::regex mainPSw("sr(Def|:murfac=)(Hi|Lo|_dn|_up|0.5|2.0)"); + std::smatch groups; + auto weightNames = genLumiInfoHead->weightNames(); + std::unordered_map knownPDFSetsFromGenInfo_; + unsigned int weightIter = 0; + for (const auto& line : weightNames) { + if (std::regex_search(line, groups, scalew)) { // scale variation + auto id = groups.str(1); + auto group = groups.str(2); + auto mur = groups.str(3); + auto muf = groups.str(4); + if (group.find("Central scale variation") != std::string::npos) + scaleVariationIDs.emplace_back(groups.str(1), groups.str(2), groups.str(3), groups.str(4)); + } else if (std::regex_search(line, groups, pdfw)) { // PDF variation + auto id = groups.str(1); + auto group = groups.str(2); + auto memberid = groups.str(3); + auto pdfset = groups.str(4); + if (group.find(pdfset) != std::string::npos) { + if (knownPDFSetsFromGenInfo_.find(pdfset) == knownPDFSetsFromGenInfo_.end()) { + knownPDFSetsFromGenInfo_[pdfset] = std::atoi(id.c_str()); + pdfSetWeightIDs.emplace_back(id, std::atoi(id.c_str())); + } else + pdfSetWeightIDs.back().add(id, std::atoi(id.c_str())); + } + } else if (line == "Baseline") { + weightChoice->psBaselineID = weightIter; + } else if (line.find("isr") != std::string::npos || line.find("fsr") != std::string::npos) { + weightChoice->matchPS_alt = line.find("sr:") != std::string::npos; // (f/i)sr: for new weights + if (keepAllPSWeights_) { + weightChoice->psWeightIDs.push_back(weightIter); // PS variations + } else if (std::regex_search(line, groups, mainPSw)) { + if (weightChoice->psWeightIDs.empty()) + weightChoice->psWeightIDs = std::vector(4, -1); + int psIdx = (line.find("fsr") != std::string::npos) ? 1 : 0; + psIdx += (groups.str(2) == "Hi" || groups.str(2) == "_up" || groups.str(2) == "2.0") ? 0 : 2; + weightChoice->psWeightIDs[psIdx] = weightIter; + } + } + weightIter++; + } + if (keepAllPSWeights_) { + weightChoice->psWeightsDoc = "All PS weights (w_var / w_nominal)"; + } else if (weightChoice->psWeightIDs.size() == 4) { + weightChoice->psWeightsDoc = + "PS weights (w_var / w_nominal); [0] is ISR=2 FSR=1; [1] is ISR=1 FSR=2" + "[2] is ISR=0.5 FSR=1; [3] is ISR=1 FSR=0.5;"; + for (int i = 0; i < 4; i++) { + if (static_cast(weightChoice->psWeightIDs[i]) == -1) + weightChoice->setMissingWeight(i); + } + } else { + weightChoice->psWeightsDoc = "dummy PS weight (1.0) "; + } + + weightChoice->scaleWeightIDs.clear(); + weightChoice->pdfWeightIDs.clear(); + + std::sort(scaleVariationIDs.begin(), scaleVariationIDs.end()); + std::stringstream scaleDoc; + scaleDoc << "LHE scale variation weights (w_var / w_nominal); "; + for (unsigned int isw = 0, nsw = scaleVariationIDs.size(); isw < nsw; ++isw) { + const auto& sw = scaleVariationIDs[isw]; + if (isw) + scaleDoc << "; "; + scaleDoc << "[" << isw << "] is " << sw.label; + weightChoice->scaleWeightIDs.push_back(std::atoi(sw.wid.c_str())); + } + if (!scaleVariationIDs.empty()) + weightChoice->scaleWeightsDoc = scaleDoc.str(); + std::stringstream pdfDoc; + pdfDoc << "LHE pdf variation weights (w_var / w_nominal) for LHA names "; + bool found = false; + for (const auto& pw : pdfSetWeightIDs) { + if (pw.wids.size() == 1) + continue; // only consider error sets + for (const auto& wantedpdf : lhaNameToID_) { + auto pdfname = wantedpdf.first; + if (knownPDFSetsFromGenInfo_.find(pdfname) == knownPDFSetsFromGenInfo_.end()) + continue; + uint32_t lhaid = knownPDFSetsFromGenInfo_.at(pdfname); + if (pw.lhaIDs.first != lhaid) + continue; + pdfDoc << pdfname; + for (const auto& x : pw.wids) + weightChoice->pdfWeightIDs.push_back(std::atoi(x.c_str())); + if (maxPdfWeights_ < pw.wids.size()) { + weightChoice->pdfWeightIDs.resize(maxPdfWeights_); // drop some replicas + pdfDoc << ", truncated to the first " << maxPdfWeights_ << " replicas"; + } + weightChoice->pdfWeightsDoc = pdfDoc.str(); + found = true; + break; + } + if (found) + break; + } + } + return dynamicWeightChoiceGenInfo; + } + + void globalEndLuminosityBlock(edm::LuminosityBlock const&, edm::EventSetup const&) const override {} + + void streamBeginLuminosityBlock(edm::StreamID id, + edm::LuminosityBlock const& lumiBlock, + edm::EventSetup const&) const override { + auto counterMap = &(streamCache(id)->countermap); + edm::Handle genLumiInfoHead; + lumiBlock.getByToken(genLumiInfoHeadTag_, genLumiInfoHead); + + std::string label; + if (genLumiInfoHead.isValid()) { + label = genLumiInfoHead->configDescription(); + boost::replace_all(label, "-", "_"); + boost::replace_all(label, "/", "_"); + } + counterMap->setLabel(label); + } + // create an empty counter + std::shared_ptr globalBeginRunSummary(edm::Run const&, edm::EventSetup const&) const override { + return std::make_shared(); + } + // add this stream to the summary + void streamEndRunSummary(edm::StreamID id, + edm::Run const&, + edm::EventSetup const&, + CounterMap* runCounterMap) const override { + runCounterMap->merge(streamCache(id)->countermap); + } + // nothing to do per se + void globalEndRunSummary(edm::Run const&, edm::EventSetup const&, CounterMap* runCounterMap) const override {} + // write the total to the run + void globalEndRunProduce(edm::Run& iRun, edm::EventSetup const&, CounterMap const* runCounterMap) const override { + + for (const auto& x : runCounterMap->countermap) { + auto runCounter = &(x.second); + std::string label = (!x.first.empty()) ? (std::string("_") + x.first) : ""; + std::string doclabel = (!x.first.empty()) ? (std::string(", for model label ") + x.first) : ""; + + // out->addInt("genEventCount" + label, "event count" + doclabel, runCounter->num); + // out->addFloat("genEventSumw" + label, "sum of gen weights" + doclabel, runCounter->sumw); + // out->addFloat("genEventSumw2" + label, "sum of gen (weight^2)" + doclabel, runCounter->sumw2); + + double norm = runCounter->sumw ? 1.0 / runCounter->sumw : 1; + auto sumScales = runCounter->sumScale; + for (auto& val : sumScales) + val *= norm; + // out->addVFloatWithNorm("LHEScaleSumw" + label, + // "Sum of genEventWeight * LHEScaleWeight[i], divided by genEventSumw" + doclabel, + // sumScales, + // runCounter->sumw); + auto sumPDFs = runCounter->sumPDF; + for (auto& val : sumPDFs) + val *= norm; + // out->addVFloatWithNorm("LHEPdfSumw" + label, + // "Sum of genEventWeight * LHEPdfWeight[i], divided by genEventSumw" + doclabel, + // sumPDFs, + // runCounter->sumw); + auto sumPS = runCounter->sumPS; + for (auto& val : sumPS) + val *= norm; + // out->addVFloatWithNorm("PSSumw" + label, + // "Sum of genEventWeight * PSWeight[i], divided by genEventSumw" + doclabel, + // sumPS, + // runCounter->sumw); + if (!runCounter->sumRwgt.empty()) { + auto sumRwgts = runCounter->sumRwgt; + for (auto& val : sumRwgts) + val *= norm; + // out->addVFloatWithNorm("LHEReweightingSumw" + label, + // "Sum of genEventWeight * LHEReweightingWeight[i], divided by genEventSumw" + doclabel, + // sumRwgts, + // runCounter->sumw); + } + if (!runCounter->sumNamed.empty()) { // it could be empty if there's no LHE info in the sample + for (unsigned int i = 0, n = namedWeightLabels_.size(); i < n; ++i) { + // out->addFloatWithNorm( + // "LHESumw_" + namedWeightLabels_[i] + label, + // "Sum of genEventWeight * LHEWeight_" + namedWeightLabels_[i] + ", divided by genEventSumw" + doclabel, + // runCounter->sumNamed[i] * norm, + // runCounter->sumw); + } + } + } + } + // nothing to do here + void globalEndRun(edm::Run const&, edm::EventSetup const&) const override {} + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("genEvent", edm::InputTag("generator")) + ->setComment("tag for the GenEventInfoProduct, to get the main weight"); + desc.add("genLumiInfoHeader", edm::InputTag("generator")) + ->setComment("tag for the GenLumiInfoProduct, to get the model string"); + desc.add>("lheInfo", std::vector{{"externalLHEProducer"}, {"source"}}) + ->setComment("tag(s) for the LHE information (LHEEventProduct and LHERunInfoProduct)"); + + edm::ParameterSetDescription prefpdf; + prefpdf.add("name"); + prefpdf.add("lhaid"); + desc.addVPSet("preferredPDFs", prefpdf, std::vector()) + ->setComment( + "LHA PDF Ids of the preferred PDF sets, in order of preference (the first matching one will be used)"); + desc.add>("namedWeightIDs")->setComment("set of LHA weight IDs for named LHE weights"); + desc.add>("namedWeightLabels") + ->setComment("output names for the namedWeightIDs (in the same order)"); + desc.add("lheWeightPrecision")->setComment("Number of bits in the mantissa for LHE weights"); + desc.add("maxPdfWeights")->setComment("Maximum number of PDF weights to save (to crop NN replicas)"); + desc.add("keepAllPSWeights")->setComment("Store all PS weights found"); + desc.addOptionalUntracked("debug")->setComment("dump out all LHE information for one event"); + descriptions.add("genWeightsTable", desc); + } + +protected: + const edm::EDGetTokenT genTag_; + const std::vector lheLabel_; + const std::vector> lheTag_; + const std::vector> lheRunTag_; + const edm::EDGetTokenT genLumiInfoHeadTag_; + + std::vector preferredPDFLHAIDs_; + std::unordered_map lhaNameToID_; + std::vector namedWeightIDs_; + std::vector namedWeightLabels_; + int lheWeightPrecision_; + unsigned int maxPdfWeights_; + bool keepAllPSWeights_; + + mutable std::atomic debug_, debugRun_, hasIssuedWarning_, psWeightWarning_; + + edm::Service fs_; + std::map oneDHists_; +}; + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(GenWeightsTableProducer); \ No newline at end of file diff --git a/SignalSystematics/test/analyzeGenWeights.py b/SignalSystematics/test/analyzeGenWeights.py new file mode 100644 index 00000000..f39dab16 --- /dev/null +++ b/SignalSystematics/test/analyzeGenWeights.py @@ -0,0 +1,64 @@ +import FWCore.ParameterSet.Config as cms +import sys +from os.path import exists + +########################################################### +##### Set up process ##### +########################################################### + +process = cms.Process ('LHESC') +process.load ('FWCore.MessageService.MessageLogger_cfi') +process.MessageLogger.cerr.FwkReport.reportEvery = 10000 + +#output file name when running interactively +process.TFileService = cms.Service ('TFileService', + fileName = cms.string ( + 'file:LHEScalesPlot.root', + ) +) +process.maxEvents = cms.untracked.PSet ( + input = cms.untracked.int32 (-1) +) + +files = [] +for i in range(650): + if exists("/data/users/borzari/condor/SignalMC/Run3/2022/step4/100cm/700GeV/hist_"+str(i)+".root"): files.append("file:/data/users/borzari/condor/SignalMC/Run3/2022/step4/100cm/700GeV/hist_"+str(i)+".root") + +process.source = cms.Source ("PoolSource", + fileNames = cms.untracked.vstring ( + files + ) +) + +########################################################### +##### Set up the analyzer ##### +########################################################### + +process.genWeightsTable = cms.EDProducer("GenWeightsTableProducer", + genEvent = cms.InputTag("generator"), + genLumiInfoHeader = cms.InputTag("generator"), + lheInfo = cms.VInputTag(cms.InputTag("externalLHEProducer"), cms.InputTag("source")), + preferredPDFs = cms.VPSet( # see https://lhapdf.hepforge.org/pdfsets.html + cms.PSet( name = cms.string("NNPDF31_nnlo_hessian_pdfas"), lhaid = cms.uint32(306000) ), + cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_hessian"), lhaid = cms.uint32(304400) ), + cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_mc_hessian_pdfas"), lhaid = cms.uint32(325300) ), + cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_mc"), lhaid = cms.uint32(316200) ), + cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_nf_4_mc_hessian"), lhaid = cms.uint32(325500) ), + cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_nf_4"), lhaid = cms.uint32(320900) ), + cms.PSet( name = cms.string("NNPDF30_nlo_as_0118"), lhaid = cms.uint32(260000) ), # for some 92X samples. Note that the nominal weight, 260000, is not included in the LHE ... + cms.PSet( name = cms.string("NNPDF30_lo_as_0130"), lhaid = cms.uint32(262000) ), # some MLM 80X samples have only this (e.g. /store/mc/RunIISummer16MiniAODv2/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v2/120000/02A210D6-F5C3-E611-B570-008CFA197BD4.root ) + cms.PSet( name = cms.string("NNPDF30_nlo_nf_4_pdfas"), lhaid = cms.uint32(292000) ), # some FXFX 80X samples have only this (e.g. WWTo1L1Nu2Q, WWTo4Q) + cms.PSet( name = cms.string("NNPDF30_nlo_nf_5_pdfas"), lhaid = cms.uint32(292200) ), # some FXFX 80X samples have only this (e.g. DYJetsToLL_Pt, WJetsToLNu_Pt, DYJetsToNuNu_Pt) + cms.PSet( name = cms.string("PDF4LHC15_nnlo_30_pdfas"), lhaid = cms.uint32(91400) ), + cms.PSet( name = cms.string("PDF4LHC15_nlo_30_pdfas"), lhaid = cms.uint32(90400) ), + cms.PSet( name = cms.string("PDF4LHC15_nlo_30"), lhaid = cms.uint32(90900) ), + ), + namedWeightIDs = cms.vstring(), + namedWeightLabels = cms.vstring(), + lheWeightPrecision = cms.int32(14), + maxPdfWeights = cms.uint32(150), + keepAllPSWeights = cms.bool(False), + debug = cms.untracked.bool(False), +) + +process.myPath = cms.Path(process.genWeightsTable) diff --git a/SignalSystematics/test/config_2022_cfg.py b/SignalSystematics/test/config_2022_cfg.py new file mode 100644 index 00000000..bd519662 --- /dev/null +++ b/SignalSystematics/test/config_2022_cfg.py @@ -0,0 +1,13 @@ +from DisappTrks.SignalSystematics.config_cfg import * +from DisappTrks.StandardAnalysis.customize import * + +if not os.environ["CMSSW_VERSION"].startswith ("CMSSW_12_4_") and not os.environ["CMSSW_VERSION"].startswith ("CMSSW_13_0_"): + print("Please use a CMSSW_12_4_X or CMSSW_13_0_X release...") + sys.exit (0) + +process = customize (process, "2022", applyPUReweighting = True, applyTriggerReweighting = True, applyMissingHitsCorrections = True, runMETFilters = False) + +# When using these channels for calculating a new weight in MC, use the following customization instead: +# ZtoMuMuISRStudy, ZtoMuMuISRStudyJet30 +# hitsSystematicsCtrlSelection, muonCtrlSelection +# process = customize (process, "2018", applyPUReweighting = True, applyISRReweighting = False, applyTriggerReweighting = False, applyMissingHitsCorrections = False, runMETFilters = False) diff --git a/SignalSystematics/test/createGenWgtUncertainties.C b/SignalSystematics/test/createGenWgtUncertainties.C new file mode 100644 index 00000000..d2e4cad1 --- /dev/null +++ b/SignalSystematics/test/createGenWgtUncertainties.C @@ -0,0 +1,92 @@ +void createUncertainties() +{ + TFile *f = TFile::Open("LHEScalesPlot.root"); + TDirectory *dir = (TDirectory*)f->Get("genWeightsTable"); + dir->cd(); + TH1 *den1; + TH1 *den2; + TH1 *den3; + dir->GetObject("wPSNW", den1); + dir->GetObject("wScaleNW", den2); + dir->GetObject("wPDFNW", den3); + TH1 *num1; + TH1 *num2; + TH1 *num3; + dir->GetObject("wPS", num1); + dir->GetObject("wScale", num2); + dir->GetObject("wPDF", num3); + + std::cout << num1->TH1::Integral() << std::endl; + std::cout << den1->TH1::Integral() << std::endl; + std::cout << num2->TH1::Integral() << std::endl; + std::cout << den2->TH1::Integral() << std::endl; + std::cout << num3->TH1::Integral() << std::endl; + std::cout << den3->TH1::Integral() << std::endl; + + + int marker = 20; + const char *ylabel = "Trigger Efficiency"; + const char *xlabelnomu = "PF E_{T}^{miss, no mu}"; + + TCanvas *c1 = new TCanvas("c1", "c1", 700, 700); + TCanvas *c2 = new TCanvas("c2", "c2", 700, 700); + TCanvas *c3 = new TCanvas("c3", "c3", 700, 700); + + TFile *file = new TFile("uncertainties.root", "RECREATE"); + TH1D *pEff1 = (TH1D*)num1->Clone("pEff1"); + pEff1->Divide(den1); + pEff1->SetTitle(""); + pEff1->SetMarkerStyle(marker); + pEff1->SetMinimum(0.0); + pEff1->SetMaximum(1.5); + pEff1->GetYaxis()->SetTitle(ylabel); + pEff1->GetXaxis()->SetTitle(xlabelnomu); + pEff1->GetYaxis()->SetTitleSize(0.04); + pEff1->GetXaxis()->SetTitleSize(0.04); + pEff1->GetYaxis()->SetLabelSize(0.04); + pEff1->GetXaxis()->SetLabelSize(0.04); + pEff1->GetXaxis()->SetLimits(-0.5, 3.5); + c1->cd(); + pEff1->Draw("ap"); + c1->Write(); + c1->Print("uncPS.png"); + + TH1D *pEff2 = (TH1D*)num2->Clone("pEff2"); + pEff2->Divide(den2); + pEff2->SetTitle(""); + pEff2->SetMarkerStyle(marker); + pEff2->SetMinimum(0.0); + pEff2->SetMaximum(1.5); + pEff2->GetYaxis()->SetTitle(ylabel); + pEff2->GetXaxis()->SetTitle(xlabelnomu); + pEff2->GetYaxis()->SetTitleSize(0.04); + pEff2->GetXaxis()->SetTitleSize(0.04); + pEff2->GetYaxis()->SetLabelSize(0.04); + pEff2->GetXaxis()->SetLabelSize(0.04); + pEff2->GetXaxis()->SetLimits(-0.5, 8.5); + c2->cd(); + pEff2->Draw("ap"); + c2->Write(); + c2->Print("uncScale.png"); + + TH1D *pEff3 = (TH1D*)num3->Clone("pEff3"); + pEff3->Divide(den3); + pEff3->SetTitle(""); + pEff3->SetMarkerStyle(marker); + pEff3->SetMinimum(0.0); + pEff3->SetMaximum(1.5); + pEff3->GetYaxis()->SetTitle(ylabel); + pEff3->GetXaxis()->SetTitle(xlabelnomu); + pEff3->GetYaxis()->SetTitleSize(0.04); + pEff3->GetXaxis()->SetTitleSize(0.04); + pEff3->GetYaxis()->SetLabelSize(0.04); + pEff3->GetXaxis()->SetLabelSize(0.04); + pEff3->GetXaxis()->SetLimits(-0.5, 100.5); + c3->cd(); + pEff3->Draw("ap"); + c3->Write(); + c3->Print("uncPDF.png"); + + f->Close(); + file->Close(); +} \ No newline at end of file diff --git a/SignalSystematics/test/localConfig_2022.py b/SignalSystematics/test/localConfig_2022.py new file mode 100644 index 00000000..9891c71a --- /dev/null +++ b/SignalSystematics/test/localConfig_2022.py @@ -0,0 +1,36 @@ +from DisappTrks.StandardAnalysis.localConfig import * + +config_file = "config_2022_cfg.py" + +intLumi = lumi["MET_2022F"] + +datasetsData = [ + 'MET_2018A', + 'MET_2018B', + 'MET_2018C', + 'MET_2018D', +] + +# datasetsSig = ["AMSB_chargino_700GeV_10000cm_124X"] +# datasetsSig = ["AMSB_chargino_700GeV_1000cm_124X"] +# datasetsSig = ["AMSB_chargino_700GeV_100cm_124X"] +# datasetsSig = ["AMSB_chargino_700GeV_10cm_124X"] +# datasetsSig = ["AMSB_chargino_400GeV_10000cm_124X"] +# datasetsSig = ["AMSB_chargino_400GeV_1000cm_124X"] +# datasetsSig = ["AMSB_chargino_400GeV_100cm_124X"] +# datasetsSig = ["AMSB_chargino_400GeV_10cm_124X"] +# datasetsSig = ["AMSB_chargino_100GeV_10000cm_124X"] +# datasetsSig = ["AMSB_chargino_100GeV_1000cm_124X"] +# datasetsSig = ["AMSB_chargino_100GeV_100cm_124X"] +datasetsSig = ["AMSB_chargino_100GeV_10cm_124X"] +# datasetsSig = ["AMSB_chargino_1100GeV_10000cm_124X"] +# datasetsSig = ["AMSB_chargino_1100GeV_1000cm_124X"] +# datasetsSig = ["AMSB_chargino_1100GeV_100cm_124X"] +# datasetsSig = ["AMSB_chargino_1100GeV_10cm_124X"] + +# datasets = datasetsBkgd + datasetsData + datasetsSig +datasets = datasetsSig + +#setNJobs (datasets, composite_dataset_definitions, nJobs, 500) +#setDatasetType (datasets, composite_dataset_definitions, types, "bgMC") +#InputCondorArguments["hold"] = "True" diff --git a/StandardAnalysis/python/Cuts.py b/StandardAnalysis/python/Cuts.py index eab7e02a..a6dbde93 100644 --- a/StandardAnalysis/python/Cuts.py +++ b/StandardAnalysis/python/Cuts.py @@ -378,6 +378,11 @@ cutString = cms.string("pt > 30"), numberRequired = cms.string(">= 1"), ) +cutTrkPt25 = cms.PSet( # LOWER PT CUT FOR SKIMMING + inputCollection = cms.vstring("tracks"), + cutString = cms.string("pt > 25"), + numberRequired = cms.string(">= 1"), +) cutTrkPt20 = cms.PSet( inputCollection = cms.vstring("tracks"), cutString = cms.string("pt > 20"), @@ -450,6 +455,11 @@ cutString = cms.string("isFiducialECALTrack"), numberRequired = cms.string(">= 1"), ) +cutTrkIsHighPurity = cms.PSet( # to be used in signal MC trigger efficiency + inputCollection = cms.vstring("tracks"), + cutString = cms.string("isHighPurityTrack"), + numberRequired = cms.string(">= 1"), +) # cutTrkEtaEcalCrackVeto = cms.PSet( # TRACK ETA: NOT IN ECAL CRACKS: UPDATE CRACK BOUNDARIES # inputCollection = cms.vstring("tracks"), # cutString = cms.string("fabs ( eta ) "), diff --git a/StandardAnalysis/python/EventSelections.py b/StandardAnalysis/python/EventSelections.py index 63389233..1646f5ba 100644 --- a/StandardAnalysis/python/EventSelections.py +++ b/StandardAnalysis/python/EventSelections.py @@ -23,6 +23,19 @@ def createHitsVariations (ch, chName): ) ) +########################################################################## +##### Skimming ##### +########################################################################## + +skimming = cms.PSet( + name = cms.string("Skimming"), + triggers = triggersAllSkimming, + cuts = cms.VPSet( + cutTrkPt25, + cutTrkEta, + ) +) + ########################################################################## ##### Preselection ##### ########################################################################## diff --git a/StandardAnalysis/python/Triggers.py b/StandardAnalysis/python/Triggers.py index 75413c27..b3a658f8 100644 --- a/StandardAnalysis/python/Triggers.py +++ b/StandardAnalysis/python/Triggers.py @@ -291,8 +291,7 @@ 'HLT_PFMETTypeOne130_PFMHT130_IDTight_v', ) - - - - - +# Although the EXO_DisappTrks selection uses many more triggers, since this is just for BG MC samples +# which will be subjected to one of the single lepton triggers at some point, the choice to only use +# these is to select/save fewer events to keep the files size manageable +triggersAllSkimming = triggersSingleMu + triggersSingleEle + triggersSingleTau diff --git a/StandardAnalysis/python/config_cfg.py b/StandardAnalysis/python/config_cfg.py index a4a8f5e4..45f263c3 100644 --- a/StandardAnalysis/python/config_cfg.py +++ b/StandardAnalysis/python/config_cfg.py @@ -6,6 +6,12 @@ # add_channels (process, [NoCuts], histSets, weights, [], collMap, variableProducers, False) ################################################################################ +################################################################################ +# Skimming channels +################################################################################ +# add_channels (process, [skimming], histSets, weights, [], collMap, variableProducers, True, isCRAB=True) +################################################################################ + ################################################################################ # MET channels ################################################################################ diff --git a/StandardAnalysis/test/crab_StdAnaSelections.py b/StandardAnalysis/test/crab_StdAnaSelections.py new file mode 100644 index 00000000..b1b2f47f --- /dev/null +++ b/StandardAnalysis/test/crab_StdAnaSelections.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +import os +from CRABClient.UserUtilities import config +config = config() + +config.General.requestName = '' +config.General.workArea = 'crab' +config.General.transferOutputs = True +config.General.transferLogs = True + +config.JobType.pluginName = 'Analysis' +config.JobType.psetName = 'config_2022_cfg.py' +config.JobType.allowUndistributedCMSSW = True + +# always use MINIAOD as inputDataset and AOD as secondaryInputDataset + +# config.Data.inputDataset = '/ZZ_TuneCP5_13p6TeV_pythia8/Run3Summer22EEMiniAODv3-124X_mcRun3_2022_realistic_postEE_v1-v3/MINIAODSIM' +# config.Data.secondaryInputDataset = '/ZZ_TuneCP5_13p6TeV_pythia8/Run3Summer22EEDRPremix-124X_mcRun3_2022_realistic_postEE_v1-v3/AODSIM' + +# config.Data.inputDataset = '/WtoLNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer22EEMiniAODv3-124X_mcRun3_2022_realistic_postEE_v1-v2/MINIAODSIM' +# config.Data.secondaryInputDataset = '/WtoLNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer22EEDRPremix-124X_mcRun3_2022_realistic_postEE_v1-v2/AODSIM' + +config.Data.inputDataset = '/DYJetsToLL_M-50_TuneCP5_13p6TeV-madgraphMLM-pythia8/Run3Summer22EEMiniAODv3-forPOG_124X_mcRun3_2022_realistic_postEE_v1-v3/MINIAODSIM' +config.Data.secondaryInputDataset = '/DYJetsToLL_M-50_TuneCP5_13p6TeV-madgraphMLM-pythia8/Run3Summer22EEDRPremix-forPOG_124X_mcRun3_2022_realistic_postEE_v1-v3/AODSIM' + +config.Data.inputDBS = 'global' +config.Data.splitting = 'FileBased' +config.Data.unitsPerJob = 1 # leave this as one to avoid too much wall time and jobs failing + +config.Data.publication = True +config.Data.outputDatasetTag = 'Skimming' # this is just an example; it will be part of the name of the output dataset + +# Uncomment one of the following pairs + +#config.Data.outLFNDirBase = '/store/group/lpclonglived/DisappTrks/' +#config.Site.storageSite = 'T3_US_FNALLPC' + +#config.Data.outLFNDirBase = '/store/user/%s/' % (user_name) +#config.Site.storageSite = 'T2_US_Purdue' + +#config.Data.outLFNDirBase = '/store/group/phys_exotica/disappearingTracks/' +#config.Site.storageSite = 'T2_CH_CERN' + +#config.Data.outLFNDirBase = '/store/user/borzari/' +#config.Site.storageSite = 'T2_BR_SPRACE'