From 486fc43b8e4fe6835c35e822252892b1e88f2547 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Thu, 10 Aug 2023 16:48:54 +0200 Subject: [PATCH] Prepare for v2.1.1 - bugfixes (skipped RASLs, releasing internal Pictures) - clean RPR & simdFilter - improve makefile --- CMakeLists.txt | 4 +- Makefile | 49 +++--- include/vvdec/vvdec.h | 2 +- source/Lib/CommonLib/InterPrediction.cpp | 135 +++++--------- .../CommonLib/x86/InterpolationFilterX86.h | 164 ++++++------------ source/Lib/DecoderLib/DecLibParser.cpp | 26 +-- source/Lib/vvdec/vvdecimpl.cpp | 146 +++++----------- source/Lib/vvdec/vvdecimpl.h | 54 +++--- 8 files changed, 203 insertions(+), 377 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c22b0b6..8dd77c47 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ cmake_minimum_required( VERSION 3.12.0 FATAL_ERROR ) cmake_policy( VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} ) # project name -project( vvdec VERSION 2.1.0 ) +project( vvdec VERSION 2.1.1 ) # set alternative version numbering for release candidates #set( PROJECT_VERSION_RC rc1 ) if( PROJECT_VERSION_RC ) @@ -64,7 +64,7 @@ if( VVDEC_ENABLE_X86_SIMD OR VVDEC_ENABLE_ARM_SIMD ) if( "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "armv7|arm.*eabi" OR "${CMAKE_CXX_COMPILER}" MATCHES "armv7|arm.*eabi" OR "${CMAKE_OSX_ARCHITECTURES}" MATCHES "armv7" ) - message( WARNING "You are building for armv7 which is knnow to be broken. As a workaround, disable SIMD at runtime or build with VVDEC_ENABLE_X86_SIMD=0 and VVDEC_ENABLE_ARM_SIMD=0." ) + message( WARNING "You are building for armv7 which is known to be broken. As a workaround, disable SIMD at runtime or build with VVDEC_ENABLE_X86_SIMD=0 and VVDEC_ENABLE_ARM_SIMD=0." ) endif() endif() diff --git a/Makefile b/Makefile index da58cfe2..e7e1124f 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ # make -cp => clean + build variant=relwithdebinfo # -TARGETS := vvdec vvdecapp +TARGETS := vvdec vvdecapp ifneq ($(g),) @@ -172,22 +172,22 @@ DEFAULT_BUILD_TARGETS_SHARED := $(foreach t,$(DEFAULT_BUILD_TARGETS_STATIC),$(t) DEFAULT_BUILD_TARGETS := $(DEFAULT_BUILD_TARGETS_STATIC) $(DEFAULT_BUILD_TARGETS_SHARED) -release: $(BUILD_DIR-release) +release: $(BUILD_DIR-release)/CMakeCache.txt cmake $(BUILD_OPTIONS-$@) $(BUILD_JOBS) $(BUILD_TOOL_OPTIONS) -debug: $(BUILD_DIR-debug) +debug: $(BUILD_DIR-debug)/CMakeCache.txt cmake $(BUILD_OPTIONS-$@) $(BUILD_JOBS) $(BUILD_TOOL_OPTIONS) -relwithdebinfo: $(BUILD_DIR-relwithdebinfo) +relwithdebinfo: $(BUILD_DIR-relwithdebinfo)/CMakeCache.txt cmake $(BUILD_OPTIONS-$@) $(BUILD_JOBS) $(BUILD_TOOL_OPTIONS) -release-shared: $(BUILD_DIR-release-shared) +release-shared: $(BUILD_DIR-release-shared)/CMakeCache.txt cmake $(BUILD_OPTIONS-$@) $(BUILD_JOBS) $(BUILD_TOOL_OPTIONS) -debug-shared: $(BUILD_DIR-debug-shared) +debug-shared: $(BUILD_DIR-debug-shared)/CMakeCache.txt cmake $(BUILD_OPTIONS-$@) $(BUILD_JOBS) $(BUILD_TOOL_OPTIONS) -relwithdebinfo-shared: $(BUILD_DIR-relwithdebinfo-shared) +relwithdebinfo-shared: $(BUILD_DIR-relwithdebinfo-shared)/CMakeCache.txt cmake $(BUILD_OPTIONS-$@) $(BUILD_JOBS) $(BUILD_TOOL_OPTIONS) $(foreach t,$(DEFAULT_BUILD_TARGETS),clean-$(t)): @@ -213,37 +213,37 @@ install-relwithdebinfo-shared: relwithdebinfo-shared ifeq ($(CMAKE_MCONFIG),) -$(BUILD_DIR-release) configure-release: +$(BUILD_DIR-release)/CMakeCache.txt configure-release: cmake -S . -B $(BUILD_DIR-release) $(CONFIG_OPTIONS) -DCMAKE_BUILD_TYPE=Release -$(BUILD_DIR-debug) configure-debug: +$(BUILD_DIR-debug)/CMakeCache.txt configure-debug: cmake -S . -B $(BUILD_DIR-debug) $(CONFIG_OPTIONS) -DCMAKE_BUILD_TYPE=Debug -$(BUILD_DIR-relwithdebinfo) configure-relwithdebinfo: +$(BUILD_DIR-relwithdebinfo)/CMakeCache.txt configure-relwithdebinfo: cmake -S . -B $(BUILD_DIR-relwithdebinfo) $(CONFIG_OPTIONS) -DCMAKE_BUILD_TYPE=RelWithDebInfo -$(BUILD_DIR-release-shared) configure-release-shared: +$(BUILD_DIR-release-shared)/CMakeCache.txt configure-release-shared: cmake -S . -B $(BUILD_DIR-release-shared) $(CONFIG_OPTIONS) -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=1 -$(BUILD_DIR-debug-shared) configure-debug-shared: +$(BUILD_DIR-debug-shared)/CMakeCache.txt configure-debug-shared: cmake -S . -B $(BUILD_DIR-debug-shared) $(CONFIG_OPTIONS) -DCMAKE_BUILD_TYPE=Debug -DBUILD_SHARED_LIBS=1 -$(BUILD_DIR-relwithdebinfo-shared) configure-relwithdebinfo-shared: +$(BUILD_DIR-relwithdebinfo-shared)/CMakeCache.txt configure-relwithdebinfo-shared: cmake -S . -B $(BUILD_DIR-relwithdebinfo-shared) $(CONFIG_OPTIONS) -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=1 configure-static: $(foreach t,$(DEFAULT_BUILD_TARGETS_STATIC),configure-$(t)) configure-shared: $(foreach t,$(DEFAULT_BUILD_TARGETS_SHARED),configure-$(t)) else -$(BUILD_DIR_STATIC) configure-static $(foreach t,$(DEFAULT_BUILD_TARGETS_STATIC),configure-$(t)): +$(BUILD_DIR_STATIC)/CMakeCache.txt configure-static $(foreach t,$(DEFAULT_BUILD_TARGETS_STATIC),configure-$(t)): cmake -S . -B $(BUILD_DIR_STATIC) $(CONFIG_OPTIONS) -$(BUILD_DIR_SHARED) configure-shared $(foreach t,$(DEFAULT_BUILD_TARGETS_SHARED),configure-$(t)): +$(BUILD_DIR_SHARED)/CMakeCache.txt configure-shared $(foreach t,$(DEFAULT_BUILD_TARGETS_SHARED),configure-$(t)): cmake -S . -B $(BUILD_DIR_SHARED) $(CONFIG_OPTIONS) -DBUILD_SHARED_LIBS=1 endif static: $(DEFAULT_BUILD_TARGETS_STATIC) -shared: $(DEFAULT_BUILD_TARGETS_SHARED) +shared: $(DEFAULT_BUILD_TARGETS_SHARED) all: static shared @@ -310,27 +310,24 @@ TARGETS_RELEASE_CLEAN_FIRST := $(foreach t,$(TARGETS),$(t)-cr) TARGETS_DEBUG_CLEAN_FIRST := $(foreach t,$(TARGETS),$(t)-cd) TARGETS_RELWITHDEBINFO_CLEAN_FIRST := $(foreach t,$(TARGETS),$(t)-cp) -$(TARGETS_RELEASE): $(BUILD_DIR-release) +$(TARGETS_RELEASE): $(BUILD_DIR-release)/CMakeCache.txt cmake $(BUILD_OPTIONS-release) $(BUILD_JOBS) --target $(patsubst %-r,%,$@) $(BUILD_TOOL_OPTIONS) -$(TARGETS_RELEASE_CLEAN_FIRST): $(BUILD_DIR-release) +$(TARGETS_RELEASE_CLEAN_FIRST): $(BUILD_DIR-release)/CMakeCache.txt cmake $(BUILD_OPTIONS-release) $(BUILD_JOBS) --clean-first --target $(patsubst %-cr,%,$@) $(BUILD_TOOL_OPTIONS) -$(TARGETS_DEBUG): $(BUILD_DIR-debug) +$(TARGETS_DEBUG): $(BUILD_DIR-debug)/CMakeCache.txt cmake $(BUILD_OPTIONS-debug) $(BUILD_JOBS) --target $(patsubst %-d,%,$@) $(BUILD_TOOL_OPTIONS) -$(TARGETS_DEBUG_CLEAN_FIRST): $(BUILD_DIR-debug) +$(TARGETS_DEBUG_CLEAN_FIRST): $(BUILD_DIR-debug)/CMakeCache.txt cmake $(BUILD_OPTIONS-debug) $(BUILD_JOBS) --clean-first --target $(patsubst %-cd,%,$@) $(BUILD_TOOL_OPTIONS) -$(TARGETS_RELWITHDEBINFO): $(BUILD_DIR-relwithdebinfo) +$(TARGETS_RELWITHDEBINFO): $(BUILD_DIR-relwithdebinfo)/CMakeCache.txt cmake $(BUILD_OPTIONS-relwithdebinfo) $(BUILD_JOBS) --target $(patsubst %-p,%,$@) $(BUILD_TOOL_OPTIONS) -$(TARGETS_RELWITHDEBINFO_CLEAN_FIRST): $(BUILD_DIR-relwithdebinfo) +$(TARGETS_RELWITHDEBINFO_CLEAN_FIRST): $(BUILD_DIR-relwithdebinfo)/CMakeCache.txt cmake $(BUILD_OPTIONS-relwithdebinfo) $(BUILD_JOBS) --clean-first --target $(patsubst %-cp,%,$@) $(BUILD_TOOL_OPTIONS) -.PHONY: install +.PHONY: install clean realclean distclean -ifeq ($(OS),Windows_NT) .NOTPARALLEL: -endif - diff --git a/include/vvdec/vvdec.h b/include/vvdec/vvdec.h index 3827ef62..d59f252c 100644 --- a/include/vvdec/vvdec.h +++ b/include/vvdec/vvdec.h @@ -391,7 +391,7 @@ typedef struct vvdecPicAttributes vvdecSliceType sliceType; // slice type (I/P/B) */ bool isRefPic; // reference picture uint32_t temporalLayer; // temporal layer - uint64_t poc; // picture order count + int64_t poc; // picture order count uint32_t bits; // bits of the compr. image packet vvdecVui *vui; // if available, pointer to VUI (Video Usability Information) vvdecHrd *hrd; // if available, pointer to HRD (Hypothetical Reference Decoder) diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index c0c42e9a..12a76068 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -2065,119 +2065,74 @@ void InterPrediction::xPredInterBlkRPR( const std::pair& scalingRatio, { const bool rndRes = !bi; - int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleX( compID, chFmt ); - int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleY( compID, chFmt ); + const int csx = getComponentScaleX( compID, chFmt ); + const int csy = getComponentScaleY( compID, chFmt ); - int width = dstWidth; - int height = dstHeight; - CPelBuf refBuf; + int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + csx; + int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + csy; + + int width = dstWidth; + int height = dstHeight; + + CPelBuf refBuf; const Pel* refPtr; ptrdiff_t refStride; int row, col; - int refPicWidth = refPic->lwidth(); + int refPicWidth = refPic->lwidth(); int refPicHeight = refPic->lheight(); int xFilter = filterIndex; int yFilter = filterIndex; - const int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4; - const int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4; - if( filterIndex == 0 ) - { - if( scalingRatio.first > rprThreshold2 ) - { - xFilter = 4; - } - else if( scalingRatio.first > rprThreshold1 ) - { - xFilter = 3; - } - if( scalingRatio.second > rprThreshold2 ) - { - yFilter = 4; - } - else if( scalingRatio.second > rprThreshold1 ) - { - yFilter = 3; - } - } - if (filterIndex == 2) - { - if (isLuma(compID)) - { - if (scalingRatio.first > rprThreshold2) - { - xFilter = 6; - } - else if (scalingRatio.first > rprThreshold1) - { - xFilter = 5; - } + static constexpr int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4; + static constexpr int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4; - if (scalingRatio.second > rprThreshold2) - { - yFilter = 6; - } - else if (scalingRatio.second > rprThreshold1) - { - yFilter = 5; - } - } - else - { - if (scalingRatio.first > rprThreshold2) - { - xFilter = 4; - } - else if (scalingRatio.first > rprThreshold1) - { - xFilter = 3; - } + if ( scalingRatio.first > rprThreshold2 ) xFilter = 4; + else if( scalingRatio.first > rprThreshold1 ) xFilter = 3; - if (scalingRatio.second > rprThreshold2) - { - yFilter = 4; - } - else if (scalingRatio.second > rprThreshold1) - { - yFilter = 3; - } - } + if ( scalingRatio.second > rprThreshold2 ) yFilter = 4; + else if( scalingRatio.second > rprThreshold1 ) yFilter = 3; + + if( isLuma( compID ) && filterIndex == 2 ) + { + if( scalingRatio.first > rprThreshold1 ) xFilter += 2; + if( scalingRatio.second > rprThreshold1 ) yFilter += 2; } const int posShift = SCALE_RATIO_BITS - 4; - int stepX = ( scalingRatio.first + 8 ) >> 4; - int stepY = ( scalingRatio.second + 8 ) >> 4; + const int stepX = ( scalingRatio.first + 8 ) >> 4; + const int stepY = ( scalingRatio.second + 8 ) >> 4; + const int offX = 1 << ( posShift - shiftHor - 1 ); + const int offY = 1 << ( posShift - shiftVer - 1 ); + int64_t x0Int; int64_t y0Int; - int offX = 1 << ( posShift - shiftHor - 1 ); - int offY = 1 << ( posShift - shiftVer - 1 ); - const int64_t posX = ( ( blkPos.x << getComponentScaleX( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) ) >> getComponentScaleX( compID, chFmt ); - const int64_t posY = ( ( blkPos.y << getComponentScaleY( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) ) >> getComponentScaleY( compID, chFmt ); + const int64_t posX = ( ( blkPos.x << csx ) - ( pps.getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) ) >> csx; + const int64_t posY = ( ( blkPos.y << csy ) - ( pps.getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) ) >> csy; - int addX = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getHorCollocatedChromaFlag() ) * 8 * ( scalingRatio.first - SCALE_1X.first ); - int addY = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getVerCollocatedChromaFlag() ) * 8 * ( scalingRatio.second - SCALE_1X.second ); + const int addX = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getHorCollocatedChromaFlag() ) * 8 * ( scalingRatio.first - SCALE_1X.first ); + const int addY = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getVerCollocatedChromaFlag() ) * 8 * ( scalingRatio.second - SCALE_1X.second ); - x0Int = ( ( posX << ( 4 + getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() ) * (int64_t)scalingRatio.first + addX; - x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + getComponentScaleX( compID, chFmt ) ) ) + ( ( refPic->slices[0]->getPPS()->getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) << ( ( posShift - getComponentScaleX( compID, chFmt ) ) ) ); + x0Int = ( ( posX << ( 4 + csx ) ) + mv.getHor() ) * ( int64_t ) scalingRatio.first + addX; + x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( ( long long ) 1 << ( 7 + csx ) ) ) >> ( 8 + csx ) ) + ( ( refPic->slices[ 0 ]->getPPS()->getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) << ( ( posShift - csx ) ) ); - y0Int = ( ( posY << ( 4 + getComponentScaleY( compID, chFmt ) ) ) + mv.getVer() ) * (int64_t)scalingRatio.second + addY; - y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + getComponentScaleY( compID, chFmt ) ) ) + ( ( refPic->slices[0]->getPPS()->getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) << ( ( posShift - getComponentScaleY( compID, chFmt ) ) ) ); + y0Int = ( ( posY << ( 4 + csy ) ) + mv.getVer() ) * ( int64_t ) scalingRatio.second + addY; + y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( ( long long ) 1 << ( 7 + csy ) ) ) >> ( 8 + csy ) ) + ( ( refPic->slices[ 0 ]->getPPS()->getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) << ( ( posShift - csy ) ) ); const int extSize = isLuma( compID ) ? 1 : 2; int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA; - int yInt0 = ( (int32_t)y0Int + offY ) >> posShift; - yInt0 = std::min( std::max( -(NTAPS_LUMA / 2), yInt0 ), ( refPicHeight >> getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + int yInt0 = ( ( int32_t ) y0Int + offY ) >> posShift; + yInt0 = Clip3( -( NTAPS_LUMA / 2 ), ( refPicHeight >> csy ) + ( NTAPS_LUMA / 2 ), yInt0 ); - int xInt0 = ( (int32_t)x0Int + offX ) >> posShift; - xInt0 = std::min( std::max( -(NTAPS_LUMA / 2), xInt0 ), ( refPicWidth >> getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + int xInt0 = ( ( int32_t ) x0Int + offX ) >> posShift; + xInt0 = Clip3( -( NTAPS_LUMA / 2 ), ( refPicWidth >> csx ) + ( NTAPS_LUMA / 2 ), xInt0 ); - int refHeight = ((((int32_t)y0Int + (height-1) * stepY) + offY ) >> posShift) - ((((int32_t)y0Int + 0 * stepY) + offY ) >> posShift) + 1; + int refHeight = ( ( ( ( int32_t ) y0Int + ( height - 1 ) * stepY ) + offY ) >> posShift ) - ( ( ( ( int32_t ) y0Int + 0 * stepY ) + offY ) >> posShift ) + 1; refHeight = std::max( 1, refHeight ); CHECK_RECOVERABLE( MAX_CU_SIZE * MAX_SCALING_RATIO < refHeight + vFilterSize - 1 + extSize, "Buffer size is not enough, increase MAX_SCALING_RATIO" ); @@ -2191,9 +2146,9 @@ void InterPrediction::xPredInterBlkRPR( const std::pair& scalingRatio, for( col = 0; col < width; col++ ) { - int posX = (int32_t)x0Int + col * stepX; - xInt = ( posX + offX ) >> posShift; - xInt = std::min( std::max( -(NTAPS_LUMA / 2), xInt ), ( refPicWidth >> getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + int posX = ( int32_t ) x0Int + col * stepX; + xInt = ( posX + offX ) >> posShift; + xInt = Clip3( -( NTAPS_LUMA / 2 ), ( refPicWidth >> csx ) + ( NTAPS_LUMA / 2 ), xInt ); int xFrac = ( ( posX + offX ) >> ( posShift - shiftHor ) ) & ( ( 1 << shiftHor ) - 1 ); CHECK_RECOVERABLE( xInt0 > xInt, "Wrong horizontal starting point" ); @@ -2206,9 +2161,9 @@ void InterPrediction::xPredInterBlkRPR( const std::pair& scalingRatio, for( row = 0; row < height; row++ ) { - int posY = (int32_t)y0Int + row * stepY; - yInt = ( posY + offY ) >> posShift; - yInt = std::min( std::max( -(NTAPS_LUMA / 2), yInt ), ( refPicHeight >> getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + int posY = ( int32_t ) y0Int + row * stepY; + yInt = ( posY + offY ) >> posShift; + yInt = Clip3( -( NTAPS_LUMA / 2 ), ( refPicHeight >> csy ) + ( NTAPS_LUMA / 2 ), yInt ); int yFrac = ( ( posY + offY ) >> ( posShift - shiftVer ) ) & ( ( 1 << shiftVer ) - 1 ); CHECK_RECOVERABLE( yInt0 > yInt, "Wrong vertical starting point" ); diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h index db1cc404..ec32212e 100644 --- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h +++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h @@ -681,11 +681,8 @@ static void simdInterpolateHorM8_AVX2( const int16_t* src, ptrdiff_t srcStride, __m256i vshuf1 = _mm256_set_epi8( 0xd, 0xc, 0xb, 0xa, 0xb, 0xa, 0x9, 0x8, 0x9, 0x8, 0x7, 0x6, 0x7, 0x6, 0x5, 0x4, 0xd, 0xc, 0xb, 0xa, 0xb, 0xa, 0x9, 0x8, 0x9, 0x8, 0x7, 0x6, 0x7, 0x6, 0x5, 0x4 ); -#if __INTEL_COMPILER __m256i vcoeff[4]; -#else - __m256i vcoeff[N/2]; -#endif + for( int i=0; i( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c ); + return; + } + + if( isLast ) + { + shift += ( isFirst ) ? 0 : headRoom; + offset = 1 << ( shift - 1 ); + offset += ( isFirst ) ? 0 : IF_INTERNAL_OFFS << IF_FILTER_PREC; } else { - if( isLast ) - { - shift += ( isFirst ) ? 0 : headRoom; - offset = 1 << ( shift - 1 ); - offset += ( isFirst ) ? 0 : IF_INTERNAL_OFFS << IF_FILTER_PREC; - } - else - { - shift -= ( isFirst ) ? headRoom : 0; - offset = ( isFirst ) ? -IF_INTERNAL_OFFS *(1<< shift) : 0; - } + shift -= ( isFirst ) ? headRoom : 0; + offset = ( isFirst ) ? -IF_INTERNAL_OFFS *(1<< shift) : 0; } + if( !isVertical ) { - if( N == 8 && !( width & 0x07 ) ) + if( ( width & 7 ) == 0 ) { - if( !isVertical ) - { - if( vext >= AVX2 ) + if( vext >= AVX2 ) #if USE_M16_AVX2_IF - if( !( width & 15 ) ) - simdInterpolateHorM16_AVX2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else -#endif - simdInterpolateHorM8_AVX2 ( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + if( !( width & 15 ) ) + simdInterpolateHorM16_AVX2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); else - simdInterpolateHorM8( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - } - else - { - if( vext>= AVX2 ) -#if USE_M16_AVX2_IF - if( !( width & 15 ) ) - simdInterpolateVerM16_AVX2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else #endif - simdInterpolateVerM8_AVX2 ( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else - simdInterpolateVerM8( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - } - return; - } - else if( N == 8 && !( width & 0x03 ) ) - { - if( !isVertical ) - { - simdInterpolateHorM4( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - } + simdInterpolateHorM8_AVX2 ( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); else - simdInterpolateVerM4( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - return; + simdInterpolateHorM8( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); } - else if( N == 4 && ( width % 2 ) == 0 ) + else if( ( width & 3 ) == 0 ) + simdInterpolateHorM4( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + else if( ( width & 1 ) == 0 ) + simdInterpolateHorM2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + else + simdInterpolateHorM1( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + } + else + { + if( ( width & 7 ) == 0 ) { - CHECKD( ( width & 1 ), "Blocks of width 1 are not allowed!" ); - - if( !isVertical ) - { - if( ( width % 8 ) == 0 ) - { - if( vext>= AVX2 ) + if( vext >= AVX2 ) #if USE_M16_AVX2_IF - if( !( width & 15 ) ) - simdInterpolateHorM16_AVX2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else -#endif - simdInterpolateHorM8_AVX2 ( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else - simdInterpolateHorM8( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - } - else if( ( width % 4 ) == 0 ) - simdInterpolateHorM4( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + if( !( width & 15 ) ) + simdInterpolateVerM16_AVX2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); else - simdInterpolateHorM2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - - return; - } - else - { - if( ( width % 8 ) == 0 ) - { - if( vext >= AVX2 ) -#if USE_M16_AVX2_IF - if( !( width & 15 ) ) - simdInterpolateVerM16_AVX2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else #endif - simdInterpolateVerM8_AVX2 ( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else - simdInterpolateVerM8( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - } - else if( ( width % 4 ) == 0 ) - simdInterpolateVerM4( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - else - simdInterpolateVerM2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - - return; - } - } - else if( width == 1 ) - { - CHECK( isVertical, "Should never happen!" ); - simdInterpolateHorM1( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); - return; - } - else if( N == 2 ) - { - CHECKD( clpRng.bd > 10, "Bit depths over 10 not supported!" ); - simdInterpolateN2( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c ); - return; + simdInterpolateVerM8_AVX2 ( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + else + simdInterpolateVerM8( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); } + else if( ( width & 3 ) == 0 ) + simdInterpolateVerM4( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + else if( ( width & 1 ) == 0 ) + simdInterpolateVerM2( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c ); + else + goto scalar_if; } + return; + +scalar_if: for( row = 0; row < height; row++ ) { for( col = 0; col < width; col++ ) diff --git a/source/Lib/DecoderLib/DecLibParser.cpp b/source/Lib/DecoderLib/DecLibParser.cpp index 0b648eb0..86ca8496 100644 --- a/source/Lib/DecoderLib/DecLibParser.cpp +++ b/source/Lib/DecoderLib/DecLibParser.cpp @@ -1403,6 +1403,11 @@ void DecLibParser::prepareUnavailablePicture( bool isLost, const PPS* pps, int i #endif cFillPic->fillGrey( m_parameterSetManager.getFirstSPS() ); + + if( m_pocRandomAccess == MAX_INT ) + { + m_pocRandomAccess = iUnavailablePoc; + } } #if 0 @@ -1666,34 +1671,33 @@ void DecLibParser::checkNoOutputPriorPics() */ bool DecLibParser::isRandomAccessSkipPicture() { - if (m_pocRandomAccess == MAX_INT) // start of random access point, m_pocRandomAccess has not been set yet. + if( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) + { + m_pocRandomAccess = -MAX_INT; // no need to skip the reordered pictures in IDR, they are decodable. + } + else if( m_pocRandomAccess == MAX_INT ) // start of random access point, m_pocRandomAccess has not been set yet. { #if GDR_ADJ - if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR ) + if( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR ) #else - if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) + if( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) #endif { // set the POC random access since we need to skip the reordered pictures in the case of CRA/CRANT/BLA/BLANT. m_pocRandomAccess = m_apcSlicePilot->getPOC(); } - else if ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) - { - m_pocRandomAccess = -MAX_INT; // no need to skip the reordered pictures in IDR, they are decodable. - } else { - if(!m_warningMessageSkipPicture) + if( !m_warningMessageSkipPicture ) { - msg( WARNING, "Warning: this is not a valid random access point and the data is discarded until the first CRA picture\n"); + msg( WARNING, "Warning: this is not a valid random access point and the data is discarded until the first CRA picture\n" ); m_warningMessageSkipPicture = true; } return true; } } // skip the reordered pictures, if necessary - else if (m_apcSlicePilot->getPOC() < m_pocRandomAccess && (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL)) + else if( m_apcSlicePilot->getPOC() < m_pocRandomAccess && ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL ) ) { return true; } diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 47ffe695..5639d311 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -56,16 +56,6 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { -VVDecImpl::VVDecImpl() -{ - -} - -VVDecImpl::~VVDecImpl() -{ - -} - int VVDecImpl::init( const vvdecParams& params, vvdecCreateBufferCallback createBufCallback, vvdecUnrefBufferCallback unrefBufCallback ) { if( m_bInitialized ){ return VVDEC_ERR_INITIALIZE; } @@ -205,21 +195,17 @@ int VVDecImpl::reset() } }; - for( auto& frame : m_rcFrameList ) + for( auto& entry: m_rcFrameList ) { - vvdec_frame_reset( &frame ); + vvdec_frame_reset( &std::get( entry ) ); + if( std::get( entry ) ) + { + m_cDecLib->releasePicture( std::get( entry ) ); + } } m_rcFrameList.clear(); m_pcFrameNext = m_rcFrameList.end(); - - for( auto& pic : m_pcLibPictureList ) - { - m_cDecLib->releasePicture( pic ); - } - m_pcLibPictureList.clear(); - - for( auto& storage : m_cFrameStorageMap ) { if( storage.second.isAllocated()) @@ -436,7 +422,7 @@ int VVDecImpl::decode( vvdecAccessUnit& rcAccessUnit, vvdecFrame** ppcFrame ) } else { - *ppcFrame = &( *m_pcFrameNext ); + *ppcFrame = &std::get( *m_pcFrameNext ); m_uiSeqNumOutput = (*ppcFrame)->sequenceNumber; ++m_pcFrameNext; } @@ -549,9 +535,9 @@ int VVDecImpl::flush( vvdecFrame** ppframe ) } else { - *ppframe = &( *m_pcFrameNext ); - m_uiSeqNumOutput = (*ppframe)->sequenceNumber; - ++m_pcFrameNext; + *ppframe = &std::get( *m_pcFrameNext ); + m_uiSeqNumOutput = ( *ppframe )->sequenceNumber; + ++m_pcFrameNext; } } else @@ -620,27 +606,16 @@ vvdecSEI* VVDecImpl::findFrameSei( vvdecSEIPayloadType payloadType, vvdecFrame * } Picture* picture = nullptr; - for ( auto& pic : m_pcLibPictureList ) + for( auto& entry: m_rcFrameList ) { - if( frame->picAttributes != NULL ) - { - if( frame->picAttributes->poc == (uint64_t)pic->poc ) - { - picture = pic; - break; - } - } - else + if( frame == &std::get( entry ) ) { - if( frame->ctsValid && frame ->cts == pic->cts ) - { - picture = pic; - break; - } + picture = std::get( entry ); + break; } } - if( picture == nullptr) + if( picture == nullptr ) { msg(VERBOSE, "findFrameSei: cannot find pictue in internal list.\n"); return nullptr; @@ -669,44 +644,22 @@ int VVDecImpl::objectUnref( vvdecFrame* pcFrame ) return VVDEC_ERR_UNSPECIFIED; } - bool bPicFound = false; - for( auto& pic : m_rcFrameList ) - { - if( &pic == pcFrame ) - { - bPicFound = true; - vvdec_frame_reset( &pic ); - break; - } - } - - if( bPicFound ) + for( auto it = m_rcFrameList.begin(); it != m_rcFrameList.end(); ++it ) { - std::list::iterator itFrame = m_rcFrameList.end(); - for( std::list::iterator it = m_rcFrameList.begin(); it != m_rcFrameList.end(); it++ ) - { - if( &*it == pcFrame ) - { - itFrame = it; - break; - } - } - if( itFrame != m_rcFrameList.end()) - { - m_rcFrameList.erase(itFrame); - } - else + vvdecFrame* frame = &std::get( *it ); + if( frame == pcFrame ) { - m_cErrorString = "objectUnref() cannot find picture in picture list"; - return VVDEC_ERR_UNSPECIFIED; + vvdec_frame_reset( frame ); + if( std::get( *it ) ) + { + m_cDecLib->releasePicture( std::get( *it ) ); + } + m_rcFrameList.erase( it ); + return VVDEC_OK; } } - else - { - return VVDEC_ERR_UNSPECIFIED; - } - return VVDEC_OK; + return VVDEC_ERR_UNSPECIFIED; } int VVDecImpl::getNumberOfErrorsPictureHashSEI() @@ -826,7 +779,7 @@ vvdecNalType VVDecImpl::getNalUnitType ( vvdecAccessUnit& rcAccessUnit ) if( found ) { unsigned char uc = pcBuf[iOffset]; - int nalUnitType = ((uc >> 3) & 0x1F ); + int nalUnitType = ((uc >> 3) & 0x1F ); eNalType = (vvdecNalType)nalUnitType; } @@ -992,12 +945,12 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) { PelStorage upscaledPic; upscaledPic.create( cPicBuf.chromaFormat, Size( orgWidth, orgHeight ), 0, 0, 0, true, &m_cUserAllocator ); - + int xScale = ( ( uiWidth << SCALE_RATIO_BITS ) + ( orgWidth >> 1 ) ) / orgWidth; int yScale = ( ( uiHeight << SCALE_RATIO_BITS ) + ( orgHeight >> 1 ) ) / orgHeight; upscaledPic.rescaleBuf( cPicBuf, std::pair( xScale, yScale ), conf, defDisp, bitDepths, pcPic->cs->sps->getHorCollocatedChromaFlag(), pcPic->cs->sps->getVerCollocatedChromaFlag() ); - + // copy picture into target memory for( int comp=0; comp < maxComponent; comp++ ) { @@ -1055,7 +1008,6 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) cFrame.planes[comp].ptr = (unsigned char*)(planeOrigin + planeOffset); } - m_pcLibPictureList.push_back( pcPic ); } // set picture attributes @@ -1132,7 +1084,7 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) cFrame.picAttributes->hrd->hrdCpbCnt = hrd->getHrdCpbCntMinus1()+1; } } - + if( pcPic->slices.front()->getSPS()->getOlsHrdParameters() ) { const OlsHrdParams* ols = pcPic->slices.front()->getSPS()->getOlsHrdParameters(); @@ -1163,7 +1115,7 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) } } - m_rcFrameList.push_back( cFrame ); + m_rcFrameList.push_back( { cFrame, bCreateStorage ? nullptr : pcPic } ); if( m_pcFrameNext == m_rcFrameList.end() ) { @@ -1173,9 +1125,9 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) } else { - for( std::list::iterator it = m_rcFrameList.begin(); it != m_rcFrameList.end(); it++ ) + for( auto it = m_rcFrameList.begin(); it != m_rcFrameList.end(); it++ ) { - if( (*it).sequenceNumber > m_uiSeqNumOutput ) + if( std::get( *it ).sequenceNumber > m_uiSeqNumOutput ) { m_pcFrameNext = it; break; @@ -1186,7 +1138,7 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) m_uiSeqNumber++; - if ( bCreateStorage ) + if( bCreateStorage ) { // release library picture storage, because picture has been copied into new storage class m_cDecLib->releasePicture( pcPic ); @@ -1303,7 +1255,7 @@ int VVDecImpl::xCreateFrame( vvdecFrame& rcFrame, const CPelUnitBuf& rcPicBuf, u if( nBufSize == 0 ){ return VVDEC_ERR_ALLOCATE; } FrameStorage frameStorage; - + if( m_cUserAllocator.enabled ) { frameStorage.setExternAllocator(); @@ -1326,7 +1278,7 @@ int VVDecImpl::xCreateFrame( vvdecFrame& rcFrame, const CPelUnitBuf& rcPicBuf, u { frameStorage.allocateStorage( nBufSize ); rcFrame.planes[VVDEC_CT_Y].ptr = frameStorage.getStorage(); - + switch( rcPicBuf.chromaFormat ) { case CHROMA_400: @@ -1340,7 +1292,7 @@ int VVDecImpl::xCreateFrame( vvdecFrame& rcFrame, const CPelUnitBuf& rcPicBuf, u default: break; } } - m_cFrameStorageMap.insert( frameStorageMapType( rcFrame.sequenceNumber, frameStorage)); + m_cFrameStorageMap.insert( FrameStorageMapType( rcFrame.sequenceNumber, std::move( frameStorage ) ) ); } return 0; @@ -1498,7 +1450,7 @@ int VVDecImpl::xHandleOutput( Picture* pcPic ) */ bool VVDecImpl::isFrameConverted( vvdecFrame* frame ) { - frameStorageMap::iterator storageIter = m_cFrameStorageMap.find( frame->sequenceNumber ); + FrameStorageMap::iterator storageIter = m_cFrameStorageMap.find( frame->sequenceNumber ); if( storageIter != m_cFrameStorageMap.end() ) { if( storageIter->second.isAllocated() || storageIter->second.isExternAllocator() ) @@ -1550,17 +1502,15 @@ void VVDecImpl::vvdec_plane_default(vvdecPlane *plane) plane->allocator = nullptr; ///< opaque pointer to memory allocator (only valid, when memory is maintained by caller) } -void VVDecImpl::vvdec_frame_reset(vvdecFrame *frame ) +void VVDecImpl::vvdec_frame_reset(vvdecFrame *frame) { - bool bIsInternalLibStorage = true; - bool bIsExternAllocator = false; - frameStorageMap::iterator storageIter = m_cFrameStorageMap.find( frame->sequenceNumber ); + bool bIsExternAllocator = false; + FrameStorageMap::iterator storageIter = m_cFrameStorageMap.find( frame->sequenceNumber ); if( storageIter != m_cFrameStorageMap.end() ) { if( storageIter->second.isAllocated() ) { storageIter->second.freeStorage(); - bIsInternalLibStorage = false; } else if( storageIter->second.isExternAllocator() ) { @@ -1570,20 +1520,6 @@ void VVDecImpl::vvdec_frame_reset(vvdecFrame *frame ) m_cFrameStorageMap.erase (storageIter); } - if( bIsInternalLibStorage ) - { - // release internal picture memory - for( std::list::iterator itLibPic = m_pcLibPictureList.begin(); itLibPic != m_pcLibPictureList.end(); itLibPic++ ) - { - if( (*itLibPic)->cts == frame->cts ) - { - m_cDecLib->releasePicture( *itLibPic ); - m_pcLibPictureList.erase( itLibPic ); - break; - } - } - } - if( frame->picAttributes ) { if( frame->picAttributes->vui ) diff --git a/source/Lib/vvdec/vvdecimpl.h b/source/Lib/vvdec/vvdecimpl.h index b8428734..c520c133 100644 --- a/source/Lib/vvdec/vvdecimpl.h +++ b/source/Lib/vvdec/vvdecimpl.h @@ -99,50 +99,43 @@ class VVDecImpl public: /// Constructor - VVDecImpl(); + VVDecImpl() = default; /// Destructor - virtual ~VVDecImpl(); + ~VVDecImpl() = default; class FrameStorage { public: - FrameStorage() = default; - ~FrameStorage() = default; - int allocateStorage( size_t size ) { if( size == 0 ){ return VVDEC_ERR_ALLOCATE; } - m_ptr = new unsigned char [ size ]; + m_ptr.reset( new unsigned char[size] ); m_size = size; - m_isAllocated = true; return 0; } int freeStorage() { - if( !m_isAllocated) { return VVDEC_ERR_ALLOCATE; } - delete [] m_ptr; + if( !m_ptr ) { return VVDEC_ERR_ALLOCATE; } + m_ptr.reset(); m_size = 0; - m_isAllocated = false; return 0; } unsigned char * getStorage() { - if( !m_isAllocated) { return nullptr; } - return m_ptr; + return m_ptr.get(); } - bool isAllocated(){ return m_isAllocated; } - bool isExternAllocator(){ return m_isExternAllocator; } - void setExternAllocator(){ m_isExternAllocator = true; } - + bool isAllocated() { return !!m_ptr; } + bool isExternAllocator() { return m_isExternAllocator; } + void setExternAllocator() { m_isExternAllocator = true; } + private: - bool m_isAllocated = false; - unsigned char *m_ptr = nullptr; // pointer to plane buffer - size_t m_size = 0; - bool m_isExternAllocator = false; + std::unique_ptr m_ptr = nullptr; // pointer to plane buffer + size_t m_size = 0; + bool m_isExternAllocator = false; }; public: @@ -176,6 +169,9 @@ class VVDecImpl static const char* getNalUnitTypeAsString( vvdecNalType t ); static bool isNalUnitSlice ( vvdecNalType t ); + std::string m_cErrorString; + std::string m_cAdditionalErrorString; + private: int xAddPicture ( Picture* pcPic ); int xCreateFrame ( vvdecFrame& frame, const CPelUnitBuf& rcPicBuf, uint32_t uiWidth, uint32_t uiHeight, const BitDepths& rcBitDepths, bool bCreateStorage ); @@ -195,9 +191,10 @@ class VVDecImpl void vvdec_frame_reset(vvdecFrame *frame ); private: - typedef std::map frameStorageMap; - typedef frameStorageMap::value_type frameStorageMapType; -public: + typedef std::tuple FrameListEntry; + typedef std::map FrameStorageMap; + typedef FrameStorageMap::value_type FrameStorageMapType; + bool m_bInitialized = false; bool m_bRemovePadding = false; // copy picture before output to remove padding VVDecInternalState m_eState = INTERNAL_STATE_UNINITIALIZED; @@ -205,22 +202,17 @@ class VVDecImpl std::unique_ptr m_cDecLib; - std::list m_rcFrameList; - std::list::iterator m_pcFrameNext = m_rcFrameList.begin(); + std::list m_rcFrameList; + std::list::iterator m_pcFrameNext = m_rcFrameList.begin(); - std::list m_pcLibPictureList; // internal picture list - frameStorageMap m_cFrameStorageMap; // map of frame storage class( converted frames) + FrameStorageMap m_cFrameStorageMap; // map of frame storage class( converted frames) UserAllocator m_cUserAllocator; // user allocator object, valid if buffers are managed external std::string m_sDecoderInfo; std::string m_sDecoderCapabilities; - std::string m_cErrorString; - std::string m_cAdditionalErrorString; - uint64_t m_uiSeqNumber = 0; uint64_t m_uiSeqNumOutput = 0; - uint64_t m_uiPicCount = 0; };