NVIDIA-AI-IOT · cagdasbas · Jan 20, 2021 · Jan 20, 2021
diff --git a/Makefile b/Makefile
@@ -18,7 +18,7 @@ ifeq ($(TARGET_DEVICE),aarch64)
   CFLAGS:= -DPLATFORM_TEGRA
 endif
 
-SRCS:= deepstream_pose_estimation_app.cpp
+SRCS:= ../../apps-common/src/deepstream_perf.c deepstream_pose_estimation_app.cpp
 
 INCS:= $(wildcard *.h)
 

diff --git a/deepstream_pose_estimation_app.cpp b/deepstream_pose_estimation_app.cpp
@@ -8,6 +8,7 @@
 #include <stdio.h>
 
 #include "gstnvdsmeta.h"
+#include "deepstream_perf.h"
 #include "nvdsgstutils.h"
 #include "nvbufsurface.h"
 
@@ -31,6 +32,27 @@
  * based on the fastest source's framerate. */
 #define MUXER_BATCH_TIMEOUT_USEC 4000000
 
+#define MAX_STREAMS 64
+
+typedef struct {
+    /** identifies the stream ID */
+    guint32 stream_index;
+    gdouble fps[MAX_STREAMS];
+    gdouble fps_avg[MAX_STREAMS];
+    guint32 num_instances;
+    guint header_print_cnt;
+    GMutex fps_lock;
+    gpointer context;
+
+    /** Test specific info */
+    guint32 set_batch_size;
+} PerfCtx;
+
+typedef struct {
+    GMutex *lock;
+    int num_sources;
+} LatencyCtx;
+
 template <class T>
 using Vec1D = std::vector<T>;
 
@@ -42,6 +64,62 @@ using Vec3D = std::vector<Vec2D<T>>;
 
 gint frame_number = 0;
 
+/**
+ * callback function to print the performance numbers of each stream.
+*/
+static void
+perf_cb(gpointer context, NvDsAppPerfStruct *str) {
+  PerfCtx *thCtx = (PerfCtx *) context;
+
+  g_mutex_lock(&thCtx->fps_lock);
+  /** str->num_instances is == num_sources */
+  guint32 numf = str->num_instances;
+  guint32 i;
+
+  for (i = 0; i < numf; i++) {
+    thCtx->fps[i] = str->fps[i];
+    thCtx->fps_avg[i] = str->fps_avg[i];
+  }
+  thCtx->context = thCtx;
+  g_print("**PERF: ");
+  for (i = 0; i < numf; i++) {
+    g_print("%.2f (%.2f)\t", thCtx->fps[i], thCtx->fps_avg[i]);
+  }
+  g_print("\n");
+  g_mutex_unlock(&thCtx->fps_lock);
+}
+
+/**
+ * callback function to print the latency of each component in the pipeline.
+ */
+
+static GstPadProbeReturn
+latency_measurement_buf_prob(GstPad *pad, GstPadProbeInfo *info, gpointer u_data) {
+  LatencyCtx *ctx = (LatencyCtx *) u_data;
+  static int batch_num = 0;
+  guint i = 0, num_sources_in_batch = 0;
+  if (nvds_enable_latency_measurement) {
+    GstBuffer *buf = (GstBuffer *) info->data;
+    NvDsFrameLatencyInfo *latency_info = NULL;
+    g_mutex_lock(ctx->lock);
+    latency_info = (NvDsFrameLatencyInfo *)
+        calloc(1, ctx->num_sources * sizeof(NvDsFrameLatencyInfo));;
+    g_print("\n************BATCH-NUM = %d**************\n", batch_num);
+    num_sources_in_batch = nvds_measure_buffer_latency(buf, latency_info);
+
+    for (i = 0; i < num_sources_in_batch; i++) {
+      g_print("Source id = %d Frame_num = %d Frame latency = %lf (ms) \n",
+              latency_info[i].source_id,
+              latency_info[i].frame_num,
+              latency_info[i].latency);
+    }
+    g_mutex_unlock(ctx->lock);
+    batch_num++;
+  }
+
+  return GST_PAD_PROBE_OK;
+}
+
 /*Method to parse information returned from the model*/
 std::tuple<Vec2D<int>, Vec3D<float>>
 parse_objects_from_tensor_meta(NvDsInferTensorMeta *tensor_meta)
@@ -386,6 +464,28 @@ int main(int argc, char *argv[])
 
   /* Use convertor to convert from NV12 to RGBA as required by nvosd */
   nvvidconv = gst_element_factory_make("nvvideoconvert", "nvvideo-converter");
+  GstPad *sink_pad = gst_element_get_static_pad(nvvidconv, "src");
+  if (!sink_pad)
+    g_print("Unable to get sink pad\n");
+  else {
+    LatencyCtx *ctx = (LatencyCtx *) g_malloc0(sizeof(LatencyCtx));
+    ctx->lock = (GMutex *) g_malloc0(sizeof(GMutex));
+    ctx->num_sources = 1;
+    gst_pad_add_probe(sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
+                      latency_measurement_buf_prob, ctx, NULL);
+  }
+  gst_object_unref(sink_pad);
+  GstPad *conv_pad = gst_element_get_static_pad(nvvidconv, "sink");
+  if (!conv_pad)
+    g_print("Unable to get conv_pad pad\n");
+  else {
+    NvDsAppPerfStructInt *str = (NvDsAppPerfStructInt *) g_malloc0(sizeof(NvDsAppPerfStructInt));
+    PerfCtx *perf_ctx = (PerfCtx *) g_malloc0(sizeof(PerfCtx));
+    g_mutex_init(&perf_ctx->fps_lock);
+    str->context = perf_ctx;
+    enable_perf_measurement(str, conv_pad, 1, 1, 0, perf_cb);
+  }
+  gst_object_unref(conv_pad);
 
   queue = gst_element_factory_make("queue", "queue");
   filesink = gst_element_factory_make("filesink", "filesink");