Skip to content

Commit

Permalink
Merge pull request #165 from adamjw24/develop_limit_delay
Browse files Browse the repository at this point in the history
Scale and limit parse delay, and cleanups
  • Loading branch information
K-os authored Jan 24, 2024
2 parents 3228387 + 8b0e527 commit daf0426
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 84 deletions.
4 changes: 2 additions & 2 deletions source/Lib/CommonLib/RdCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Distortion RdCost::xGetSAD8( const DistParam& rcDtParam )
}

uiSum <<= iSubShift;
return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth));
return uiSum;
}

Distortion RdCost::xGetSAD16( const DistParam& rcDtParam )
Expand Down Expand Up @@ -171,7 +171,7 @@ Distortion RdCost::xGetSAD16( const DistParam& rcDtParam )
}

uiSum <<= iSubShift;
return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth));
return uiSum;
}

void RdCost::xGetSAD8X5(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos) {
Expand Down
28 changes: 1 addition & 27 deletions source/Lib/CommonLib/Rom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,35 +203,9 @@ const uint16_t g_log2SbbSize[MAX_LOG2_TU_SIZE_PLUS_ONE][MAX_LOG2_TU_SIZE_PLUS_ON
// initialize ROM variables
void initROM()
{
#if RExt__HIGH_BIT_DEPTH_SUPPORT || !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) )
#if !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) )
int c;

#endif
#if RExt__HIGH_BIT_DEPTH_SUPPORT
{
c = 64;
const double s = sqrt((double)c) * (64 << COM16_C806_TRANS_PREC);


for (int k = 0; k < c; k++)
{
for (int n = 0; n < c; n++)
{
double w0, v;
const double PI = 3.14159265358979323846;

// DCT-II
w0 = k == 0 ? sqrt(0.5) : 1;
v = cos(PI*(n + 0.5)*k / c) * w0 * sqrt(2.0 / c);
short sv = (short)(s * v + (v > 0 ? 0.5 : -0.5));
if (g_aiT64[0][0][c*c + k*c + n] != sv)
{
msg(WARNING, "trap");
}
}
}
}

#endif
#if !( ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 ) )
// g_aucConvertToBit[ x ]: log2(x/4), if x=4 -> 0, x=8 -> 1, x=16 -> 2, ...
Expand Down
7 changes: 0 additions & 7 deletions source/Lib/CommonLib/Slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -787,8 +787,6 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
m_colFromL0Flag = pSrc->m_colFromL0Flag;
m_colRefIdx = pSrc->m_colRefIdx;

if( cpyAlmostAll ) setLambdas(pSrc->getLambdas());

m_uiTLayer = pSrc->m_uiTLayer;
m_bTLayerSwitchingFlag = pSrc->m_bTLayerSwitchingFlag;

Expand Down Expand Up @@ -2213,11 +2211,6 @@ ProfileLevelTierFeatures::extractPTLInformation(const SPS &sps)
}
}

double ProfileLevelTierFeatures::getMinCr() const
{
return (m_pLevelTier!=0 && m_pProfile!=0) ? (m_pProfile->minCrScaleFactorx100 * m_pLevelTier->minCrBase[m_tier?1:0])/100.0 : 0.0 ;
}

uint64_t ProfileLevelTierFeatures::getCpbSizeInBits() const
{
return (m_pLevelTier!=0 && m_pProfile!=0) ? uint64_t(m_pProfile->cpbVclFactor) * m_pLevelTier->maxCpb[m_tier?1:0] : uint64_t(0);
Expand Down
5 changes: 0 additions & 5 deletions source/Lib/CommonLib/Slice.h
Original file line number Diff line number Diff line change
Expand Up @@ -2584,7 +2584,6 @@ class Slice
bool m_colFromL0Flag = true; // collocated picture from List0 flag

uint32_t m_colRefIdx = 0;
double m_lambdas[MAX_NUM_COMPONENT] = { 0.0, 0.0, 0.0 };
uint32_t m_maxNumIBCMergeCand = 0;
bool m_disBdofDmvrFlag = false;

Expand Down Expand Up @@ -2752,9 +2751,6 @@ class Slice
bool isInterB() const { return m_eSliceType == B_SLICE; }
bool isInterP() const { return m_eSliceType == P_SLICE; }

void setLambdas( const double lambdas[MAX_NUM_COMPONENT] ) { for (int component = 0; component < MAX_NUM_COMPONENT; component++) m_lambdas[component] = lambdas[component]; }
const double* getLambdas() const { return m_lambdas; }


uint32_t getCuQpDeltaSubdiv() const { return this->isIntra() ? m_pcPicHeader->getCuQpDeltaSubdivIntra() : m_pcPicHeader->getCuQpDeltaSubdivInter(); }
uint32_t getCuChromaQpOffsetSubdiv() const { return this->isIntra() ? m_pcPicHeader->getCuChromaQpOffsetSubdivIntra() : m_pcPicHeader->getCuChromaQpOffsetSubdivInter(); }
Expand Down Expand Up @@ -2984,7 +2980,6 @@ class ProfileLevelTierFeatures
const LevelTierFeatures *getLevelTierFeatures() const { return m_pLevelTier; }
Tier getTier() const { return m_tier; }
uint64_t getCpbSizeInBits() const;
double getMinCr() const;
uint32_t getMaxDpbSize( uint32_t picSizeMaxInSamplesY ) const;
};

Expand Down
41 changes: 12 additions & 29 deletions source/Lib/CommonLib/TypeDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ namespace vvdec
#define RECO_WHILE_PARSE 1
#define ALLOW_MIDER_LF_DURING_PICEXT 1

#define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order
#define MAX_OUT_OF_ORDER_PICS 3 // maximum number of pictures, that are reconstructed out of order
#if INTPTR_MAX == INT64_MAX
#define DEFAULT_PARSE_DELAY_FACTOR 24 // factor to set default parse delay based on number of threads (4-bit fixed point), equals a 1.5 slope
#else
#define DEFAULT_PARSE_DELAY_FACTOR 16 // factor to set default parse delay based on number of threads (4-bit fixed point), equals a 1.0 slope
#endif
#define DEFAULT_PARSE_DELAY_MAX 48 // maximum parse delay derived from thread count, when not set explicitly

#define JVET_O1170_CHECK_BV_AT_DECODER 0 // For decoder to check if a BV is valid or not

Expand Down Expand Up @@ -133,19 +139,13 @@ namespace vvdec
// Tool Switches
// ====================================================================================================================


// This can be enabled by the makefile
#ifndef RExt__HIGH_BIT_DEPTH_SUPPORT
#define RExt__HIGH_BIT_DEPTH_SUPPORT 0 ///< 0 (default) use data type definitions for 8-10 bit video, 1 = use larger data types to allow for up to 16-bit video (originally developed as part of N0188)
#endif

// SIMD optimizations
#define SIMD_ENABLE 1
#define ENABLE_SIMD_OPT ( SIMD_ENABLE && !RExt__HIGH_BIT_DEPTH_SUPPORT ) ///< SIMD optimizations, no impact on RD performance
#define ENABLE_SIMD_OPT_MCIF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the interpolation filter, no impact on RD performance
#define ENABLE_SIMD_OPT_BUFFER ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the buffer operations, no impact on RD performance
#define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance
#define ENABLE_SIMD_OPT_ALF ( 1 && ENABLE_SIMD_OPT /*&& !ALF_FIX*/ ) ///< SIMD optimization for ALF
#define ENABLE_SIMD_OPT ( SIMD_ENABLE ) ///< SIMD optimizations
#define ENABLE_SIMD_OPT_MCIF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the interpolation filter
#define ENABLE_SIMD_OPT_BUFFER ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the buffer operations
#define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations (SAD)
#define ENABLE_SIMD_OPT_ALF ( 1 && ENABLE_SIMD_OPT /*&& !ALF_FIX*/ ) ///< SIMD optimization for ALF
#define ENABLE_SIMD_OPT_INTRAPRED ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for Intra Prediction
#define ENABLE_SIMD_OPT_QUANT ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for Quant/Dequant
#if ENABLE_SIMD_OPT_BUFFER
Expand All @@ -166,23 +166,6 @@ namespace vvdec

#define LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET 1 /// JVET-L0414 (CE11.2.2) with explicit signalling of num interval, threshold and qpOffset

// ====================================================================================================================
// Derived macros
// ====================================================================================================================

#if RExt__HIGH_BIT_DEPTH_SUPPORT
#define FULL_NBIT 1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion
#else
#define FULL_NBIT 1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion
#endif

#if FULL_NBIT
#define DISTORTION_PRECISION_ADJUSTMENT(x) 0
#else
#define DISTORTION_ESTIMATION_BITS 8
#define DISTORTION_PRECISION_ADJUSTMENT(x) ((x>DISTORTION_ESTIMATION_BITS)? ((x)-DISTORTION_ESTIMATION_BITS) : 0)
#endif

// ====================================================================================================================
// Error checks
// ====================================================================================================================
Expand Down
6 changes: 3 additions & 3 deletions source/Lib/CommonLib/arm/RdCostARM.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ Distortion xGetSAD_MxN_SIMD( const DistParam& rcDtParam )

uiSum = vaddlvq_s16( vsum16 );
uiSum <<= iSubShift;
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth );
return uiSum;
}

template<ARM_VEXT vext, bool isCalCentrePos>
Expand Down Expand Up @@ -217,9 +217,9 @@ void xGetSADX5_16xN_SIMDImp( const DistParam& rcDtParam, Distortion* cost )
if( isCalCentrePos )
sumTwo = vshlq_s32( sumTwo, vdupq_n_s32( iSubShift ) );

sum = vshrq_n_s32( sum, ( 1 + ( DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth ) ) ) );
sum = vshrq_n_s32( sum, 1 );
if( isCalCentrePos )
sumTwo = vshrq_n_s32( sumTwo, ( 1 + ( DISTORTION_PRECISION_ADJUSTMENT( rcDtParam.bitDepth ) ) ) );
sumTwo = vshrq_n_s32( sumTwo, 1 );

vst1q_lane_u64( (uint64_t*) &cost[ 0 ], (uint64x2_t) sum, 0 );
if( isCalCentrePos )
Expand Down
14 changes: 7 additions & 7 deletions source/Lib/CommonLib/x86/RdCostX86.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ Distortion xGetSAD_MxN_SIMD( const DistParam &rcDtParam )
}

uiSum <<= iSubShift;
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
return uiSum;
}

template <X86_VEXT vext, bool isCalCentrePos>
Expand Down Expand Up @@ -276,8 +276,8 @@ void xGetSADX5_8xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {
sum0 = _mm_slli_epi32(sum0, iSubShift);
if (isCalCentrePos) sum2 = _mm_slli_epi32(sum2, iSubShift);

sum0 = _mm_srli_epi32(sum0, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
sum0 = _mm_srli_epi32(sum0, 1);
if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, 1);

_mm_storeu_si64( ( __m128i* ) &cost[0], sum0 );
if (isCalCentrePos) cost[2] = (_mm_cvtsi128_si32(sum2));
Expand Down Expand Up @@ -502,13 +502,13 @@ void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {

sum0134 = _mm_slli_epi32(sum0134, iSubShift);

sum0134 = _mm_srli_epi32(sum0134, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
sum0134 = _mm_srli_epi32(sum0134, 1);

_mm_storeu_si64( ( __m128i* ) &cost[0], sum0134 );
if (isCalCentrePos) {
int tmp = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum2)) + _mm256_extract_epi32(sum2, 4);
tmp <<= iSubShift;
tmp >>= (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)));
tmp >>= 1;
cost[2] = tmp;
}
_mm_storeu_si64( ( __m128i* ) &cost[3], _mm_unpackhi_epi64( sum0134, sum0134 ) );
Expand Down Expand Up @@ -586,8 +586,8 @@ void xGetSADX5_16xN_SIMDImp(const DistParam& rcDtParam, Distortion* cost) {
sum0 = _mm_slli_epi32(sum0, iSubShift);
if (isCalCentrePos) sum2 = _mm_slli_epi32(sum2, iSubShift);

sum0 = _mm_srli_epi32(sum0, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, (1 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth))));
sum0 = _mm_srli_epi32(sum0, 1);
if (isCalCentrePos) sum2 = _mm_srli_epi32(sum2, 1);

_mm_storeu_si64( ( __m128i* ) &cost[0], sum0 );
if (isCalCentrePos) cost[2] = (_mm_cvtsi128_si32(sum2));
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/DecoderLib/DecLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ void DecLib::create( int numDecThreads, int parserFrameDelay, const UserAllocato
if( parserFrameDelay < 0 )
{
CHECK( numDecThreads < 0, "invalid number of threads" );
parserFrameDelay = numDecThreads;
parserFrameDelay = std::min<int>( ( numDecThreads * DEFAULT_PARSE_DELAY_FACTOR ) >> 4, DEFAULT_PARSE_DELAY_MAX );
}
m_parseFrameDelay = parserFrameDelay;

Expand Down
4 changes: 1 addition & 3 deletions source/Lib/DecoderLib/DecLibParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1077,12 +1077,10 @@ void DecLibParser::xActivateParameterSets( const int layerId )

xParsePrefixSEImessages();

#if RExt__HIGH_BIT_DEPTH_SUPPORT == 0
if( /* sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ||*/ sps->getBitDepth( CHANNEL_TYPE_LUMA ) > 12 || sps->getBitDepth( CHANNEL_TYPE_CHROMA ) > 12 )
if( sps->getBitDepth( CHANNEL_TYPE_LUMA ) > 12 || sps->getBitDepth( CHANNEL_TYPE_CHROMA ) > 12 )
{
THROW( "High bit depth support must be enabled at compile-time in order to decode this bitstream\n" );
}
#endif

applyReferencePictureListBasedMarking( m_apcSlicePilot, layerId, *pps );

Expand Down

0 comments on commit daf0426

Please sign in to comment.