Merge pull request #165 from adamjw24/develop_limit_delay

Scale and limit parse delay, and cleanups
fraunhoferhhi · Jan 24, 2024 · daf0426 · daf0426
2 parents 3228387 + 8b0e527
commit daf0426
Show file tree

Hide file tree

Showing 9 changed files with 27 additions and 84 deletions.
diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp
@@ -132,7 +132,7 @@ Distortion RdCost::xGetSAD8( const DistParam& rcDtParam )
   }
 
   uiSum <<= iSubShift;
-  return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth));
+  return uiSum;
 }
 
 Distortion RdCost::xGetSAD16( const DistParam& rcDtParam )
@@ -171,7 +171,7 @@ Distortion RdCost::xGetSAD16( const DistParam& rcDtParam )
   }
 
   uiSum <<= iSubShift;
-  return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth));
+  return uiSum;
 }
 
 void RdCost::xGetSAD8X5(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos) {

diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
@@ -203,35 +203,9 @@ const uint16_t g_log2SbbSize[MAX_LOG2_TU_SIZE_PLUS_ONE][MAX_LOG2_TU_SIZE_PLUS_ON
 // initialize ROM variables
 void initROM()
 {
-#if RExt__HIGH_BIT_DEPTH_SUPPORT || !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) )
+#if !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) )
   int c;
 
-#endif
-#if RExt__HIGH_BIT_DEPTH_SUPPORT
-  {
-    c = 64;
-    const double s = sqrt((double)c) * (64 << COM16_C806_TRANS_PREC);
-
-
-    for (int k = 0; k < c; k++)
-    {
-      for (int n = 0; n < c; n++)
-      {
-        double w0, v;
-        const double PI = 3.14159265358979323846;
-
-        // DCT-II
-        w0 = k == 0 ? sqrt(0.5) : 1;
-        v = cos(PI*(n + 0.5)*k / c) * w0 * sqrt(2.0 / c);
-        short sv = (short)(s * v + (v > 0 ? 0.5 : -0.5));
-        if (g_aiT64[0][0][c*c + k*c + n] != sv)
-        {
-          msg(WARNING, "trap");
-        }
-      }
-    }
-  }
-
 #endif
 #if !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) )
   // g_aucConvertToBit[ x ]: log2(x/4), if x=4 -> 0, x=8 -> 1, x=16 -> 2, ...

diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
@@ -787,8 +787,6 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   m_colFromL0Flag        = pSrc->m_colFromL0Flag;
   m_colRefIdx            = pSrc->m_colRefIdx;
 
-  if( cpyAlmostAll ) setLambdas(pSrc->getLambdas());
-
   m_uiTLayer                      = pSrc->m_uiTLayer;
   m_bTLayerSwitchingFlag          = pSrc->m_bTLayerSwitchingFlag;
 
@@ -2213,11 +2211,6 @@ ProfileLevelTierFeatures::extractPTLInformation(const SPS &sps)
   }
 }
 
-double ProfileLevelTierFeatures::getMinCr() const
-{
-  return (m_pLevelTier!=0 && m_pProfile!=0) ? (m_pProfile->minCrScaleFactorx100 * m_pLevelTier->minCrBase[m_tier?1:0])/100.0 : 0.0 ;
-}
-
 uint64_t ProfileLevelTierFeatures::getCpbSizeInBits() const
 {
   return (m_pLevelTier!=0 && m_pProfile!=0) ? uint64_t(m_pProfile->cpbVclFactor) * m_pLevelTier->maxCpb[m_tier?1:0] : uint64_t(0);

diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
@@ -2584,7 +2584,6 @@ class Slice
   bool                       m_colFromL0Flag                 = true;   // collocated picture from List0 flag
 
   uint32_t                   m_colRefIdx                     = 0;
-  double                     m_lambdas[MAX_NUM_COMPONENT]    = { 0.0, 0.0, 0.0 };
   uint32_t                   m_maxNumIBCMergeCand            = 0;
   bool                       m_disBdofDmvrFlag               = false;
 
@@ -2752,9 +2751,6 @@ class Slice
   bool                        isInterB() const                                       { return m_eSliceType == B_SLICE;                               }
   bool                        isInterP() const                                       { return m_eSliceType == P_SLICE;                               }
 
-  void                        setLambdas( const double lambdas[MAX_NUM_COMPONENT] )  { for (int component = 0; component < MAX_NUM_COMPONENT; component++) m_lambdas[component] = lambdas[component]; }
-  const double*               getLambdas() const                                     { return m_lambdas;                                             }
-
 
   uint32_t                    getCuQpDeltaSubdiv() const                             { return this->isIntra() ? m_pcPicHeader->getCuQpDeltaSubdivIntra() : m_pcPicHeader->getCuQpDeltaSubdivInter(); }
   uint32_t                    getCuChromaQpOffsetSubdiv() const                      { return this->isIntra() ? m_pcPicHeader->getCuChromaQpOffsetSubdivIntra() : m_pcPicHeader->getCuChromaQpOffsetSubdivInter(); }
@@ -2984,7 +2980,6 @@ class ProfileLevelTierFeatures
     const LevelTierFeatures   *getLevelTierFeatures() const { return m_pLevelTier; }
     Tier                       getTier()              const { return m_tier; }
     uint64_t                   getCpbSizeInBits()     const;
-    double                     getMinCr()             const;
     uint32_t                   getMaxDpbSize( uint32_t picSizeMaxInSamplesY ) const;
 };
 

diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
@@ -68,7 +68,13 @@ namespace vvdec
 #define RECO_WHILE_PARSE                                  1
 #define ALLOW_MIDER_LF_DURING_PICEXT                      1
 
-#define MAX_OUT_OF_ORDER_PICS                             3 // maximum number of pictures, that are reconstructed out of order
+#define MAX_OUT_OF_ORDER_PICS                             3  // maximum number of pictures, that are reconstructed out of order
+#if INTPTR_MAX == INT64_MAX
+#define DEFAULT_PARSE_DELAY_FACTOR                        24 // factor to set default parse delay based on number of threads (4-bit fixed point), equals a 1.5 slope 
+#else
+#define DEFAULT_PARSE_DELAY_FACTOR                        16 // factor to set default parse delay based on number of threads (4-bit fixed point), equals a 1.0 slope 
+#endif
+#define DEFAULT_PARSE_DELAY_MAX                           48 // maximum parse delay derived from thread count, when not set explicitly
 
 #define JVET_O1170_CHECK_BV_AT_DECODER                    0 // For decoder to check if a BV is valid or not
 
@@ -133,19 +139,13 @@ namespace vvdec
 // Tool Switches
 // ====================================================================================================================
 
-
-// This can be enabled by the makefile
-#ifndef RExt__HIGH_BIT_DEPTH_SUPPORT
-#define RExt__HIGH_BIT_DEPTH_SUPPORT                      0 ///< 0 (default) use data type definitions for 8-10 bit video, 1 = use larger data types to allow for up to 16-bit video (originally developed as part of N0188)
-#endif
-
 // SIMD optimizations
 #define SIMD_ENABLE                                       1
-#define ENABLE_SIMD_OPT                                 ( SIMD_ENABLE && !RExt__HIGH_BIT_DEPTH_SUPPORT )    ///< SIMD optimizations, no impact on RD performance
-#define ENABLE_SIMD_OPT_MCIF                            ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the interpolation filter, no impact on RD performance
-#define ENABLE_SIMD_OPT_BUFFER                          ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the buffer operations, no impact on RD performance
-#define ENABLE_SIMD_OPT_DIST                            ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance
-#define ENABLE_SIMD_OPT_ALF                             ( 1 && ENABLE_SIMD_OPT /*&& !ALF_FIX*/ )                            ///< SIMD optimization for ALF
+#define ENABLE_SIMD_OPT                                 ( SIMD_ENABLE )                                     ///< SIMD optimizations
+#define ENABLE_SIMD_OPT_MCIF                            ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the interpolation filter
+#define ENABLE_SIMD_OPT_BUFFER                          ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the buffer operations
+#define ENABLE_SIMD_OPT_DIST                            ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the distortion calculations (SAD)
+#define ENABLE_SIMD_OPT_ALF                             ( 1 && ENABLE_SIMD_OPT /*&& !ALF_FIX*/ )            ///< SIMD optimization for ALF
 #define ENABLE_SIMD_OPT_INTRAPRED                       ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for Intra Prediction
 #define ENABLE_SIMD_OPT_QUANT                           ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for Quant/Dequant
 #if ENABLE_SIMD_OPT_BUFFER
@@ -166,23 +166,6 @@ namespace vvdec
 
 #define LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET         1 /// JVET-L0414 (CE11.2.2) with explicit signalling of num interval, threshold and qpOffset
 
-// ====================================================================================================================
-// Derived macros
-// ====================================================================================================================
-
-#if RExt__HIGH_BIT_DEPTH_SUPPORT
-#define FULL_NBIT                                         1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion
-#else
-#define FULL_NBIT                                         1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion
-#endif
-
-#if FULL_NBIT
-#define DISTORTION_PRECISION_ADJUSTMENT(x)                0
-#else
-#define DISTORTION_ESTIMATION_BITS                        8
-#define DISTORTION_PRECISION_ADJUSTMENT(x)                ((x>DISTORTION_ESTIMATION_BITS)? ((x)-DISTORTION_ESTIMATION_BITS) : 0)
-#endif
-
 // ====================================================================================================================
 // Error checks
 // ====================================================================================================================

diff --git a/source/Lib/CommonLib/arm/RdCostARM.h b/source/Lib/CommonLib/arm/RdCostARM.h
@@ -148,7 +148,7 @@ Distortion xGetSAD_MxN_SIMD( const DistParam& rcDtParam )
 
   uiSum = vaddlvq_s16( vsum16 );
   uiSum <<= iSubShift;
-  return uiSum >> DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth );
+  return uiSum;
 }
 
 template<ARM_VEXT vext, bool isCalCentrePos>
@@ -217,9 +217,9 @@ void xGetSADX5_16xN_SIMDImp( const DistParam& rcDtParam, Distortion* cost )
   if( isCalCentrePos )
     sumTwo = vshlq_s32( sumTwo, vdupq_n_s32( iSubShift ) );
 
-  sum = vshrq_n_s32( sum, ( 1 + ( DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth ) ) ) );
+  sum = vshrq_n_s32( sum, 1 );
   if( isCalCentrePos )
-    sumTwo = vshrq_n_s32( sumTwo, ( 1 + ( DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth ) ) ) );
+    sumTwo = vshrq_n_s32( sumTwo, 1 );
 
   vst1q_lane_u64( (uint64_t*) &cost[ 0 ], (uint64x2_t) sum, 0 );
   if( isCalCentrePos )

diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h
@@ -196,7 +196,7 @@ Distortion xGetSAD_MxN_SIMD( const DistParam &rcDtParam )
   }
 
   uiSum <<= iSubShift;
-  return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
+  return uiSum;
 }
 
 template <X86_VEXT vext, bool isCalCentrePos>
@@ -276,8 +276,8 @@ void xGetSADX5_8xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {
   sum0 = _mm_slli_epi32(sum0, iSubShift);
   if (isCalCentrePos) sum2 = _mm_slli_epi32(sum2, iSubShift);
 
-  sum0 = _mm_srli_epi32(sum0, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
-  if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
+  sum0 = _mm_srli_epi32(sum0, 1);
+  if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, 1);
 
   _mm_storeu_si64( ( __m128i* ) &cost[0], sum0 );
   if (isCalCentrePos) cost[2] = (_mm_cvtsi128_si32(sum2));
@@ -502,13 +502,13 @@ void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {
 
     sum0134 = _mm_slli_epi32(sum0134, iSubShift);
 
-    sum0134 = _mm_srli_epi32(sum0134, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
+    sum0134 = _mm_srli_epi32(sum0134, 1);
 
     _mm_storeu_si64( ( __m128i* ) &cost[0], sum0134 );
     if (isCalCentrePos) {
       int tmp = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum2)) + _mm256_extract_epi32(sum2, 4);
       tmp <<= iSubShift;
-      tmp >>= (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)));
+      tmp >>= 1;
       cost[2] = tmp;
     }
     _mm_storeu_si64( ( __m128i* ) &cost[3], _mm_unpackhi_epi64( sum0134, sum0134 ) );
@@ -586,8 +586,8 @@ void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {
     sum0 = _mm_slli_epi32(sum0, iSubShift);
     if (isCalCentrePos) sum2 = _mm_slli_epi32(sum2, iSubShift);
 
-    sum0 = _mm_srli_epi32(sum0, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
-    if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
+    sum0 = _mm_srli_epi32(sum0, 1);
+    if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, 1);
 
     _mm_storeu_si64( ( __m128i* ) &cost[0], sum0 );
     if (isCalCentrePos) cost[2] = (_mm_cvtsi128_si32(sum2));

diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
@@ -131,7 +131,7 @@ void DecLib::create( int numDecThreads, int parserFrameDelay, const UserAllocato
   if( parserFrameDelay < 0 )
   {
     CHECK( numDecThreads < 0, "invalid number of threads" );
-    parserFrameDelay = numDecThreads;
+    parserFrameDelay = std::min<int>( ( numDecThreads * DEFAULT_PARSE_DELAY_FACTOR ) >> 4, DEFAULT_PARSE_DELAY_MAX );
   }
   m_parseFrameDelay = parserFrameDelay;
 

diff --git a/source/Lib/DecoderLib/DecLibParser.cpp b/source/Lib/DecoderLib/DecLibParser.cpp
@@ -1077,12 +1077,10 @@ void DecLibParser::xActivateParameterSets( const int layerId )
 
     xParsePrefixSEImessages();
 
-#if RExt__HIGH_BIT_DEPTH_SUPPORT == 0
-    if( /* sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ||*/ sps->getBitDepth( CHANNEL_TYPE_LUMA ) > 12 || sps->getBitDepth( CHANNEL_TYPE_CHROMA ) > 12 )
+    if( sps->getBitDepth( CHANNEL_TYPE_LUMA ) > 12 || sps->getBitDepth( CHANNEL_TYPE_CHROMA ) > 12 )
     {
       THROW( "High bit depth support must be enabled at compile-time in order to decode this bitstream\n" );
     }
-#endif
 
     applyReferencePictureListBasedMarking( m_apcSlicePilot, layerId, *pps );