fastmachinelearning · dsrankin · Nov 16, 2020 · Nov 16, 2020 · Nov 21, 2020 · Nov 23, 2020
diff --git a/HeterogeneousCore/SonicTriton/BuildFile.xml b/HeterogeneousCore/SonicTriton/BuildFile.xml
@@ -1,6 +1,7 @@
 <use name="FWCore/Utilities"/>
 <use name="FWCore/ParameterSet"/>
 <use name="FWCore/MessageLogger"/>
+<use name="DataFormats/Common"/>
 <use name="HeterogeneousCore/SonicCore"/>
 <use name="triton-inference-server"/>
 <use name="protobuf"/>

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h b/HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h
@@ -0,0 +1,43 @@
+#ifndef HeterogeneousCore_SonicTriton_TritonConverterBase
+#define HeterogeneousCore_SonicTriton_TritonConverterBase
+
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "DataFormats/Common/interface/Handle.h"
+
+#include <string>
+
+template <typename DT>
+class TritonConverterBase {
+//class needs to be templated since the convert functions require the data type, but need to also be virtual, and virtual member function templates are not allowed in C++
+public:
+  TritonConverterBase(const std::string convName)
+      : converterName_(convName), byteSize_(sizeof(DT)) {}
+  TritonConverterBase(const std::string convName, size_t byteSize)
+      : converterName_(convName), byteSize_(byteSize) {}
+  TritonConverterBase(const TritonConverterBase&) = delete;
+  virtual ~TritonConverterBase() = default;
+  TritonConverterBase& operator=(const TritonConverterBase&) = delete;
+
+  virtual const uint8_t* convertIn (const DT* in) const = 0;
+  virtual const DT* convertOut (const uint8_t* in) const = 0;
+
+  const int64_t byteSize() const { return byteSize_; }
+
+  const std::string& name() const { return converterName_; }
+
+  virtual void clear() const {}
+
+private:
+  const std::string converterName_;
+  const int64_t byteSize_;
+};
+
+#include "FWCore/PluginManager/interface/PluginFactory.h"
+
+template <typename DT>
+using TritonConverterFactory = edmplugin::PluginFactory<TritonConverterBase<DT>*()>;
+
+#define DEFINE_TRITON_CONVERTER(input, type, name) DEFINE_EDM_PLUGIN(TritonConverterFactory<input>, type, name)
+#define DEFINE_TRITON_CONVERTER_SIMPLE(input, type) DEFINE_EDM_PLUGIN(TritonConverterFactory<input>, type, #type)
+
+#endif
diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -4,6 +4,9 @@
 #include "FWCore/Utilities/interface/Exception.h"
 #include "FWCore/Utilities/interface/Span.h"
 
+#include "FWCore/PluginManager/interface/PluginFactory.h"
+#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
+
 #include <vector>
 #include <string>
 #include <unordered_map>
@@ -58,6 +61,17 @@ class TritonData {
   //default to dims if shape isn't filled
   int64_t sizeShape() const { return variableDims_ ? dimProduct(shape_) : sizeDims(); }
 
+  std::string defaultConverter() const {
+    std::string base = "StandardConverter";
+    if (dname_ == "INT64") {
+      return "Int64"+base;
+    }
+    else if (dname_ == "FP32") {
+      return "Float"+base;
+    }
+    return "";
+  }
+
 private:
   friend class TritonClient;
 
@@ -69,6 +83,22 @@ class TritonData {
   void setResult(std::shared_ptr<Result> result) { result_ = result; }
   IO* data() { return data_.get(); }
 
+  void setConverterParams(std::string convName) {
+    converterName_ = convName;
+  }
+
+  template <typename DT>
+  std::shared_ptr<TritonConverterBase<DT>> createConverter() const { 
+    using ConverterType = std::shared_ptr<TritonConverterBase<DT>>;
+    //this construction catches bad any_cast without throwing std exception
+    if (auto ptr = std::any_cast<ConverterType>(&converter_)) {
+    } else { 
+      converter_ = ConverterType(TritonConverterFactory<DT>::get()->create(converterName_));
+      converter_clear_ = std::bind(&TritonConverterBase<DT>::clear, std::any_cast<ConverterType>(converter_).get());
+    }
+    return std::any_cast<ConverterType>(converter_);
+  }
+
   //helpers
   bool anyNeg(const ShapeView& vec) const {
     return std::any_of(vec.begin(), vec.end(), [](int64_t i) { return i < 0; });
@@ -93,6 +123,9 @@ class TritonData {
   int64_t byteSize_;
   std::any holder_;
   std::shared_ptr<Result> result_;
+  mutable std::any converter_;
+  std::string converterName_;
+  mutable std::function<void()> converter_clear_;
 };
 
 using TritonInputData = TritonData<nvidia::inferenceserver::client::InferInput>;

diff --git a/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml b/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml
@@ -0,0 +1,5 @@
+<library name="HeterogeneousCoreSonicTritonPlugins_converters" file="converters/*.cc">
+  <use name="HeterogeneousCore/SonicTriton"/>
+  <use name="hls"/>
+  <flags EDM_PLUGIN="1"/>
+</library>
diff --git a/HeterogeneousCore/SonicTriton/plugins/converters/FloatApFixed16Converter.cc b/HeterogeneousCore/SonicTriton/plugins/converters/FloatApFixed16Converter.cc
@@ -0,0 +1,44 @@
+#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
+
+#include <string>
+#include "ap_fixed.h"
+
+template <int I>
+class FloatApFixed16Converter : public TritonConverterBase<float> {
+public:
+  FloatApFixed16Converter() : TritonConverterBase<float>("FloatApFixed16F"+std::to_string(I)+"Converter", 2) {}
+
+  const uint8_t* convertIn(const float* in) const override {
+    auto temp_vec = std::make_shared<std::vector<ap_fixed<16, I>>>(std::move(this->makeVecIn(in)));
+    inputHolder_.push_back(temp_vec);
+    return reinterpret_cast<const uint8_t*>(temp_vec->data());
+  }
+  const float* convertOut(const uint8_t* in) const override {
+    auto temp_vec = std::make_shared<std::vector<float>>(std::move(this->makeVecOut(reinterpret_cast<const ap_fixed<16, I>*>(in))));
+    outputHolder_.push_back(temp_vec);
+    return temp_vec->data();
+  }
+
+  void clear() const override {
+    inputHolder_.clear();
+    outputHolder_.clear();
+  }
+
+private:
+  std::vector<ap_fixed<16, I>> makeVecIn(const float* in) const {
+    unsigned int nfeat = sizeof(in) / sizeof(float);
+    std::vector<ap_fixed<16, I>> temp_storage(in, in + nfeat);
+    return temp_storage;
+  }
+
+  std::vector<float> makeVecOut(const ap_fixed<16, I>* in) const {
+    unsigned int nfeat = sizeof(in) / sizeof(ap_fixed<16, I>);
+    std::vector<float> temp_storage(in, in + nfeat);
+    return temp_storage;
+  }
+
+  mutable std::vector<std::shared_ptr<std::vector<ap_fixed<16, I>>>> inputHolder_;
+  mutable std::vector<std::shared_ptr<std::vector<float>>> outputHolder_;
+};
+
+DEFINE_TRITON_CONVERTER(float, FloatApFixed16Converter<6>, "FloatApFixed16F6Converter");
diff --git a/HeterogeneousCore/SonicTriton/plugins/converters/FloatStandardConverter.cc b/HeterogeneousCore/SonicTriton/plugins/converters/FloatStandardConverter.cc
@@ -0,0 +1,11 @@
+#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
+
+class FloatStandardConverter : public TritonConverterBase<float> {
+public:
+  FloatStandardConverter() : TritonConverterBase<float>("FloatStandardConverter") {}
+
+  const uint8_t* convertIn(const float* in) const override { return reinterpret_cast<const uint8_t*>(in); }
+  const float* convertOut(const uint8_t* in) const override { return reinterpret_cast<const float*>(in); }
+};
+
+DEFINE_TRITON_CONVERTER_SIMPLE(float, FloatStandardConverter);
diff --git a/HeterogeneousCore/SonicTriton/plugins/converters/Int64StandardConverter.cc b/HeterogeneousCore/SonicTriton/plugins/converters/Int64StandardConverter.cc
@@ -0,0 +1,11 @@
+#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
+
+class Int64StandardConverter : public TritonConverterBase<int64_t> {
+public:
+  Int64StandardConverter() : TritonConverterBase<int64_t>("Int64StandardConverter") {}
+
+  const uint8_t* convertIn(const int64_t* in) const override { return reinterpret_cast<const uint8_t*>(in); }
+  const int64_t* convertOut(const uint8_t* in) const override { return reinterpret_cast<const int64_t*>(in); }
+};
+
+DEFINE_TRITON_CONVERTER_SIMPLE(int64_t, Int64StandardConverter);
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -79,6 +79,12 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
   if (!msg_str.empty())
     throw cms::Exception("ModelErrors") << msg_str;
 
+  const std::vector<edm::ParameterSet>& inputConverterDefs = params.getParameterSetVector("inputConverters");
+  std::unordered_map<std::string,std::string> inConvMap;
+  for (const auto converterDef : inputConverterDefs) {
+    inConvMap[converterDef.getParameter<std::string>("inputName")] = converterDef.getParameter<std::string>("converterName");
+  }
+
   //setup input map
   std::stringstream io_msg;
   if (verbose_)
@@ -90,6 +96,11 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
     auto [curr_itr, success] = input_.emplace(
         std::piecewise_construct, std::forward_as_tuple(iname), std::forward_as_tuple(iname, nicInput, noBatch_));
     auto& curr_input = curr_itr->second;
+    if ( inConvMap.find(iname) == inConvMap.end() ) {
+      curr_input.setConverterParams(curr_input.defaultConverter());
+    } else {
+      curr_input.setConverterParams(inConvMap[iname]);
+    }
     inputsTriton_.push_back(curr_input.data());
     if (verbose_) {
       io_msg << "  " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
@@ -101,6 +112,12 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
   const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
   std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());
 
+  const std::vector<edm::ParameterSet>& outputConverterDefs = params.getParameterSetVector("outputConverters");
+  std::unordered_map<std::string,std::string> outConvMap;
+  for (const auto converterDef : outputConverterDefs) {
+    outConvMap[converterDef.getParameter<std::string>("outputName")] = converterDef.getParameter<std::string>("converterName");
+  }
+
   //setup output map
   if (verbose_)
     io_msg << "Model outputs: "
@@ -113,6 +130,11 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
     auto [curr_itr, success] = output_.emplace(
         std::piecewise_construct, std::forward_as_tuple(oname), std::forward_as_tuple(oname, nicOutput, noBatch_));
     auto& curr_output = curr_itr->second;
+    if ( outConvMap.find(oname) == outConvMap.end() ) {
+      curr_output.setConverterParams(curr_output.defaultConverter());
+    } else {
+      curr_output.setConverterParams(outConvMap[oname]);
+    }
     outputsTriton_.push_back(curr_output.data());
     if (verbose_) {
       io_msg << "  " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
@@ -336,10 +358,19 @@ inference::ModelStatistics TritonClient::getServerSideStatus() const {
 
 //for fillDescriptions
 void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) {
+  edm::ParameterSetDescription descInConverter;
+  descInConverter.add<std::string>("converterName");
+  descInConverter.add<std::string>("inputName");
+  edm::ParameterSetDescription descOutConverter;
+  descOutConverter.add<std::string>("converterName");
+  descOutConverter.add<std::string>("outputName");
+  std::vector<edm::ParameterSet> blankVPSet;
   edm::ParameterSetDescription descClient;
   fillBasePSetDescription(descClient);
   descClient.add<std::string>("modelName");
   descClient.add<std::string>("modelVersion", "");
+  descClient.addVPSet("inputConverters", descInConverter, blankVPSet);
+  descClient.addVPSet("outputConverters", descOutConverter, blankVPSet);
   //server parameters should not affect the physics results
   descClient.addUntracked<unsigned>("batchSize");
   descClient.addUntracked<std::string>("address");

diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -1,5 +1,6 @@
 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
 #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"
+#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
 #include "FWCore/MessageLogger/interface/MessageLogger.h"
 
 #include "model_config.pb.h"
@@ -116,14 +117,16 @@ void TritonInputData::toServer(std::shared_ptr<TritonInput<DT>> ptr) {
   //shape must be specified for variable dims or if batch size changes
   data_->SetShape(fullShape_);
 
-  if (byteSize_ != sizeof(DT))
-    throw cms::Exception("TritonDataError") << name_ << " input(): inconsistent byte size " << sizeof(DT)
+  auto converter = createConverter<DT>();
+
+  if (byteSize_ != converter->byteSize())
+    throw cms::Exception("TritonDataError") << name_ << " input(): inconsistent byte size " << converter->byteSize()
                                             << " (should be " << byteSize_ << " for " << dname_ << ")";
 
   int64_t nInput = sizeShape();
   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
     const DT* arr = data_in[i0].data();
-    triton_utils::throwIfError(data_->AppendRaw(reinterpret_cast<const uint8_t*>(arr), nInput * byteSize_),
+    triton_utils::throwIfError(data_->AppendRaw(converter->convertIn(arr), nInput * byteSize_),
                                name_ + " input(): unable to set data for batch entry " + std::to_string(i0));
   }
 
@@ -138,7 +141,9 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
     throw cms::Exception("TritonDataError") << name_ << " output(): missing result";
   }
 
-  if (byteSize_ != sizeof(DT)) {
+  auto converter = createConverter<DT>();
+
+  if (byteSize_ != converter->byteSize()) {
     throw cms::Exception("TritonDataError") << name_ << " output(): inconsistent byte size " << sizeof(DT)
                                             << " (should be " << byteSize_ << " for " << dname_ << ")";
   }
@@ -147,14 +152,14 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
   TritonOutput<DT> dataOut;
   const uint8_t* r0;
   size_t contentByteSize;
-  size_t expectedContentByteSize = nOutput * byteSize_ * batchSize_;
+  size_t expectedContentByteSize = nOutput * converter->byteSize() * batchSize_;
   triton_utils::throwIfError(result_->RawData(name_, &r0, &contentByteSize), "output(): unable to get raw");
   if (contentByteSize != expectedContentByteSize) {
     throw cms::Exception("TritonDataError") << name_ << " output(): unexpected content byte size " << contentByteSize
                                             << " (expected " << expectedContentByteSize << ")";
   }
 
-  const DT* r1 = reinterpret_cast<const DT*>(r0);
+  const DT* r1 = converter->convertOut(r0);
   dataOut.reserve(batchSize_);
   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
     auto offset = i0 * nOutput;
@@ -168,11 +173,13 @@ template <>
 void TritonInputData::reset() {
   data_->Reset();
   holder_.reset();
+  converter_clear_();
 }
 
 template <>
 void TritonOutputData::reset() {
   result_.reset();
+  converter_clear_();
 }
 
 //explicit template instantiation declarations

diff --git a/HeterogeneousCore/SonicTriton/src/pluginFactories.cc b/HeterogeneousCore/SonicTriton/src/pluginFactories.cc
@@ -0,0 +1,4 @@
+#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
+
+EDM_REGISTER_PLUGINFACTORY(TritonConverterFactory<float>, "TritonConverterFloatFactory");
+EDM_REGISTER_PLUGINFACTORY(TritonConverterFactory<int64_t>, "TritonConverterInt64Factory");
diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
@@ -29,6 +29,40 @@
   "TritonGraphProducer": "gat_test",
 }
 
+inConvs = {
+  "TritonImageProducer": cms.VPSet(
+                           cms.PSet(
+                             converterName = cms.string("FloatStandardConverter"),
+                             inputName = cms.string("gpu_0/data"),
+                           ),
+                         ),
+  "TritonGraphProducer": cms.VPSet(
+                           cms.PSet(
+                             converterName = cms.string("FloatStandardConverter"),
+                             inputName = cms.string("x__0"),
+                           ),
+                           cms.PSet(
+                             converterName = cms.string("Int64StandardConverter"),
+                             inputName = cms.string("edgeindex__1"),
+                           ),
+                         ),
+}
+
+outConvs = {
+  "TritonImageProducer": cms.VPSet(
+                           cms.PSet(
+                             converterName = cms.string("FloatStandardConverter"),
+                             outputName = cms.string("gpu_0/softmax"),
+                           ),
+                         ),
+  "TritonGraphProducer": cms.VPSet(
+                           cms.PSet(
+                             converterName = cms.string("FloatStandardConverter"),
+                             outputName = cms.string("logits__0"),
+                           ),
+                         ),
+}
+
 if options.producer not in models:
     raise ValueError("Unknown producer: "+options.producer)
 
@@ -49,6 +83,8 @@
         modelVersion = cms.string(""),
         verbose = cms.untracked.bool(options.verbose),
         allowedTries = cms.untracked.uint32(0),
+        inputConverters = inConvs[options.producer],
+        outputConverters = outConvs[options.producer],
     )
 )
 if options.producer=="TritonImageProducer":