-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
First PyTorch tests for TorchScript inference CPU/CUDA #43475
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<use name="FWCore/Framework"/> | ||
<use name="FWCore/MessageLogger"/> | ||
<use name="FWCore/Utilities"/> | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="pytorch"/> | ||
<export> | ||
<lib name="1"/> | ||
</export> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<use name="python_tools"/> | ||
|
||
<bin name="testTorchSimpleDnn" file="testRunner.cc,testTorchSimpleDnn.cc"> | ||
<use name="pytorch"/> | ||
<use name="boost_filesystem"/> | ||
<use name="cppunit"/> | ||
</bin> | ||
|
||
<iftool name="cuda"> | ||
<bin name="testTorchSimpleDnnCUDA" file="testRunner.cc,testTorchSimpleDnnCUDA.cc"> | ||
<use name="boost_filesystem"/> | ||
<use name="catch2"/> | ||
<use name="cppunit"/> | ||
<use name="cuda"/> | ||
<use name="pytorch"/> | ||
<use name="FWCore/ParameterSet"/> | ||
<use name="FWCore/ParameterSetReader"/> | ||
<use name="FWCore/PluginManager"/> | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="FWCore/Utilities"/> | ||
<use name="HeterogeneousCore/CUDAServices"/> | ||
</bin> | ||
</iftool> | ||
|
||
|
||
<bin name="testTorch" file="testTorch.cc"> | ||
<use name="pytorch"/> | ||
</bin> | ||
|
||
<bin name="testTorchTimeSeries" file="time_serie_prediction.cpp"> | ||
<use name="pytorch"/> | ||
<use name="pytorch-cuda"/> | ||
</bin> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import sys | ||
import os | ||
import torch | ||
|
||
# prepare the datadir | ||
if len(sys.argv) >= 2: | ||
datadir = sys.argv[1] | ||
else: | ||
thisdir = os.path.dirname(os.path.abspath(__file__)) | ||
datadir = os.path.join(os.path.dirname(thisdir), "bin", "data") | ||
|
||
os.makedirs(datadir, exist_ok=True) | ||
|
||
class MyModule(torch.nn.Module): | ||
def __init__(self, N, M): | ||
super(MyModule, self).__init__() | ||
self.weight = torch.nn.Parameter(torch.ones(N, M)) | ||
self.bias = torch.nn.Parameter(torch.ones(N)) | ||
|
||
def forward(self, input): | ||
return torch.sum(torch.nn.functional.elu(self.weight.mv(input) + self.bias)) | ||
|
||
|
||
module = MyModule(10, 10) | ||
x = torch.ones(10) | ||
|
||
tm = torch.jit.trace(module.eval(), x) | ||
|
||
tm.save(f"{datadir}/simple_dnn.pt") | ||
|
||
print("simple_dnn.pt created successfully!") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Base class for tests. | ||
* | ||
*/ | ||
|
||
#ifndef PHYSICSTOOLS_PYTORCH_TEST_TESTBASE_H | ||
#define PHYSICSTOOLS_PYTORCH_TEST_TESTBASE_H | ||
|
||
#include <boost/filesystem.hpp> | ||
#include <filesystem> | ||
#include <cppunit/extensions/HelperMacros.h> | ||
#include <stdexcept> | ||
|
||
class testBasePyTorch : public CppUnit::TestFixture { | ||
public: | ||
std::string dataPath_; | ||
|
||
void setUp(); | ||
void tearDown(); | ||
std::string cmsswPath(std::string path); | ||
|
||
virtual void test() = 0; | ||
|
||
virtual std::string pyScript() const = 0; | ||
}; | ||
|
||
void testBasePyTorch::setUp() { | ||
dataPath_ = | ||
cmsswPath("/test/" + std::string(std::getenv("SCRAM_ARCH")) + "/" + boost::filesystem::unique_path().string()); | ||
valsdav marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// create the graph using apptainer | ||
std::string testPath = cmsswPath("/src/PhysicsTools/PyTorch/test"); | ||
std::string cmd = "apptainer exec -B " + cmsswPath("") + | ||
" /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cmsml/cmsml:3.11 python " + testPath + "/" + | ||
pyScript() + " " + dataPath_; | ||
std::cout << "cmd: " << cmd << std::endl; | ||
std::array<char, 128> buffer; | ||
std::string result; | ||
std::shared_ptr<FILE> pipe(popen(cmd.c_str(), "r"), pclose); | ||
if (!pipe) { | ||
throw std::runtime_error("Failed to run apptainer to prepare the PyTorch test model: " + cmd); | ||
} | ||
while (!feof(pipe.get())) { | ||
if (fgets(buffer.data(), 128, pipe.get()) != NULL) { | ||
result += buffer.data(); | ||
} | ||
} | ||
std::cout << std::endl << result << std::endl; | ||
} | ||
|
||
void testBasePyTorch::tearDown() { | ||
if (std::filesystem::exists(dataPath_)) { | ||
std::filesystem::remove_all(dataPath_); | ||
} | ||
} | ||
|
||
std::string testBasePyTorch::cmsswPath(std::string path) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be feasible to use |
||
if (path.size() > 0 && path.substr(0, 1) != "/") { | ||
path = "/" + path; | ||
} | ||
|
||
std::string base = std::string(std::getenv("CMSSW_BASE")); | ||
std::string releaseBase = std::string(std::getenv("CMSSW_RELEASE_BASE")); | ||
|
||
return (std::filesystem::exists(base.c_str()) ? base : releaseBase) + path; | ||
} | ||
|
||
#endif // PHYSICSTOOLS_PYTORCH_TEST_TESTBASE_H |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Base class for tests. | ||
* | ||
*/ | ||
|
||
#ifndef PHYSICSTOOLS_PYTORCH_TEST_TESTBASECUDA_H | ||
#define PHYSICSTOOLS_PYTORCH_TEST_TESTBASECUDA_H | ||
|
||
#include <boost/filesystem.hpp> | ||
#include <filesystem> | ||
#include <cppunit/extensions/HelperMacros.h> | ||
#include <stdexcept> | ||
|
||
#include "FWCore/ParameterSet/interface/ParameterSet.h" | ||
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" | ||
#include "FWCore/ParameterSetReader/interface/ParameterSetReader.h" | ||
#include "FWCore/PluginManager/interface/PluginManager.h" | ||
#include "FWCore/PluginManager/interface/standard.h" | ||
#include "FWCore/ServiceRegistry/interface/Service.h" | ||
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h" | ||
#include "FWCore/ServiceRegistry/interface/ServiceToken.h" | ||
#include "FWCore/Utilities/interface/Exception.h" | ||
#include "FWCore/Utilities/interface/ResourceInformation.h" | ||
|
||
class testBasePyTorchCUDA : public CppUnit::TestFixture { | ||
public: | ||
std::string dataPath_; | ||
|
||
void setUp(); | ||
void tearDown(); | ||
std::string cmsswPath(std::string path); | ||
|
||
virtual std::string pyScript() const = 0; | ||
|
||
virtual void test() = 0; | ||
}; | ||
|
||
void testBasePyTorchCUDA::setUp() { | ||
dataPath_ = | ||
cmsswPath("/test/" + std::string(std::getenv("SCRAM_ARCH")) + "/" + boost::filesystem::unique_path().string()); | ||
Comment on lines
+39
to
+40
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This member variable seems unused. |
||
|
||
// create the graph using apptainer | ||
std::string testPath = cmsswPath("/src/PhysicsTools/PyTorch/test"); | ||
std::string cmd = "apptainer exec -B " + cmsswPath("") + | ||
" /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cmsml/cmsml:3.11 python " + testPath + "/" + | ||
pyScript() + " " + dataPath_; | ||
std::cout << "cmd: " << cmd << std::endl; | ||
std::array<char, 128> buffer; | ||
std::string result; | ||
std::shared_ptr<FILE> pipe(popen(cmd.c_str(), "r"), pclose); | ||
if (!pipe) { | ||
throw std::runtime_error("Failed to run apptainer to prepare the PyTorch test model: " + cmd); | ||
} | ||
while (!feof(pipe.get())) { | ||
if (fgets(buffer.data(), 128, pipe.get()) != NULL) { | ||
result += buffer.data(); | ||
} | ||
} | ||
std::cout << std::endl << result << std::endl; | ||
} | ||
void testBasePyTorchCUDA::tearDown() { | ||
if (std::filesystem::exists(dataPath_)) { | ||
std::filesystem::remove_all(dataPath_); | ||
} | ||
} | ||
|
||
std::string testBasePyTorchCUDA::cmsswPath(std::string path) { | ||
if (path.size() > 0 && path.substr(0, 1) != "/") { | ||
path = "/" + path; | ||
} | ||
|
||
std::string base = std::string(std::getenv("CMSSW_BASE")); | ||
std::string releaseBase = std::string(std::getenv("CMSSW_RELEASE_BASE")); | ||
|
||
return (std::filesystem::exists(base.c_str()) ? base : releaseBase) + path; | ||
} | ||
|
||
#endif // PHYSICSTOOLS_PYTORCH_TEST_TESTBASE_H |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
#include <Utilities/Testing/interface/CppUnit_testdriver.icpp> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#include <torch/script.h> | ||
#include "testBase.h" | ||
#include <iostream> | ||
#include <memory> | ||
#include <vector> | ||
|
||
class testSimpleDNN : public testBasePyTorch { | ||
CPPUNIT_TEST_SUITE(testSimpleDNN); | ||
CPPUNIT_TEST(test); | ||
CPPUNIT_TEST_SUITE_END(); | ||
|
||
public: | ||
std::string pyScript() const override; | ||
void test() override; | ||
}; | ||
|
||
CPPUNIT_TEST_SUITE_REGISTRATION(testSimpleDNN); | ||
|
||
std::string testSimpleDNN::pyScript() const { return "create_simple_dnn.py"; } | ||
|
||
void testSimpleDNN::test() { | ||
std::string model_path = dataPath_ + "/simple_dnn.pt"; | ||
torch::Device device(torch::kCPU); | ||
torch::jit::script::Module module; | ||
try { | ||
// Deserialize the ScriptModule from a file using torch::jit::load(). | ||
module = torch::jit::load(model_path); | ||
module.to(device); | ||
} catch (const c10::Error& e) { | ||
std::cerr << "error loading the model\n" << e.what() << std::endl; | ||
valsdav marked this conversation as resolved.
Show resolved
Hide resolved
|
||
CPPUNIT_ASSERT(false); | ||
} | ||
// Create a vector of inputs. | ||
std::vector<torch::jit::IValue> inputs; | ||
inputs.push_back(torch::ones(10, device)); | ||
|
||
// Execute the model and turn its output into a tensor. | ||
at::Tensor output = module.forward(inputs).toTensor(); | ||
std::cout << "output: " << output << '\n'; | ||
CPPUNIT_ASSERT(output.item<float_t>() == 110.); | ||
std::cout << "ok\n"; | ||
} |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,64 @@ | ||||
#include <torch/script.h> | ||||
#include "testBaseCUDA.h" | ||||
#include <iostream> | ||||
#include <memory> | ||||
#include <vector> | ||||
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" | ||||
|
||||
class testSimpleDNNCUDA : public testBasePyTorchCUDA { | ||||
CPPUNIT_TEST_SUITE(testSimpleDNNCUDA); | ||||
CPPUNIT_TEST(test); | ||||
CPPUNIT_TEST_SUITE_END(); | ||||
|
||||
public: | ||||
std::string pyScript() const override; | ||||
void test() override; | ||||
}; | ||||
|
||||
CPPUNIT_TEST_SUITE_REGISTRATION(testSimpleDNNCUDA); | ||||
|
||||
std::string testSimpleDNNCUDA::pyScript() const { return "create_simple_dnn.py"; } | ||||
|
||||
void testSimpleDNNCUDA::test() { | ||||
std::vector<edm::ParameterSet> psets; | ||||
edm::ServiceToken serviceToken = edm::ServiceRegistry::createSet(psets); | ||||
edm::ServiceRegistry::Operate operate(serviceToken); | ||||
|
||||
// Setup the CUDA Service | ||||
edmplugin::PluginManager::configure(edmplugin::standard::config()); | ||||
|
||||
std::string const config = R"_(import FWCore.ParameterSet.Config as cms | ||||
process = cms.Process('Test') | ||||
process.add_(cms.Service('ResourceInformationService')) | ||||
process.add_(cms.Service('CUDAService')) | ||||
)_"; | ||||
std::unique_ptr<edm::ParameterSet> params; | ||||
edm::makeParameterSets(config, params); | ||||
edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params))); | ||||
edm::ServiceRegistry::Operate operate2(tempToken); | ||||
edm::Service<CUDAInterface> cuda; | ||||
std::cout << "CUDA service enabled: " << cuda->enabled() << std::endl; | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Setting up the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I replicated the structure used in the TensorFlow tests: I think this was to make sure that GPUs are available and used for the CUDA tests There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reminding myself,
and for that to be filled correctly the CUDAService is needed there.
If you foresee similar structure here, I'm fine with keeping this (presently unnecessary) Service complexity in this test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed I foresee to add a similar structure in the PyTorch interface to correctly setup the backend and check for the accelerator presence. |
||||
|
||||
std::cout << "Testing CUDA backend" << std::endl; | ||||
|
||||
std::string model_path = dataPath_ + "/simple_dnn.pt"; | ||||
torch::Device device(torch::kCUDA); | ||||
torch::jit::script::Module module; | ||||
try { | ||||
// Deserialize the ScriptModule from a file using torch::jit::load(). | ||||
module = torch::jit::load(model_path); | ||||
module.to(device); | ||||
} catch (const c10::Error& e) { | ||||
std::cerr << "error loading the model\n" << e.what() << std::endl; | ||||
valsdav marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
CPPUNIT_ASSERT(false); | ||||
} | ||||
// Create a vector of inputs. | ||||
std::vector<torch::jit::IValue> inputs; | ||||
inputs.push_back(torch::ones(10, device)); | ||||
|
||||
// Execute the model and turn its output into a tensor. | ||||
at::Tensor output = module.forward(inputs).toTensor(); | ||||
std::cout << "output: " << output << '\n'; | ||||
CPPUNIT_ASSERT(output.item<float_t>() == 110.); | ||||
std::cout << "ok\n"; | ||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This header file seems to be very tied to the
testTorchSimpleDnn.cc
. Would it be feasible to just include the contents of the header in the source file? Or is the header expected to be used by multiple source files in the future?Same question for
testBaseCUDA.h
andtestTorchSimpleDnnCUDA.cc
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would like to follow the same approach as in the TensorFlow tests https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/TensorFlow/test/testBaseCUDA.h and I'm planning to add more tests with a similar structure