From 2a08a205c35f74c249952504b7eabb29a3d242bf Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Thu, 23 May 2024 15:49:41 +0200 Subject: [PATCH] Update the math functions documentation - Function should be code in the rst table - Fix single and double precision intrinsic functions - OCML_BASIC_ROUNDED_OPERATIONS define macro is undefined by default - Update integer intrinsic math functions - PR findings - Remove function duplication and rename math function variables - Added missing math functions and updated description - Minor fixes --- docs/reference/kernel_language.rst | 1181 +++++++++++++++------------- 1 file changed, 653 insertions(+), 528 deletions(-) diff --git a/docs/reference/kernel_language.rst b/docs/reference/kernel_language.rst index 328d517042..ae87e9e11e 100644 --- a/docs/reference/kernel_language.rst +++ b/docs/reference/kernel_language.rst @@ -310,7 +310,6 @@ Synchronization functions The ``__syncthreads()`` built-in function is supported in HIP. The ``__syncthreads_count(int)``, ``__syncthreads_and(int)``, and ``__syncthreads_or(int)`` functions are under development. - Math functions ==================================================== @@ -319,6 +318,7 @@ These are described in the following sections. Single precision mathematical functions -------------------------------------------------------------------------------------------- + Following is the list of supported single precision mathematical functions. .. list-table:: Single precision mathematical functions @@ -327,979 +327,1104 @@ Following is the list of supported single precision mathematical functions. - **Supported on Host** - **Supported on Device** - * - | float acosf ( float x ) - | Calculate the arc cosine of the input argument. + * - | ``float abs(float x)`` + | Returns the absolute value of :math:`x` - ✓ - ✓ - * - | float acoshf ( float x ) - | Calculate the nonnegative arc hyperbolic cosine of the input argument. + * - | ``float acosf(float x)`` + | Returns the arc cosine of :math:`x`. - ✓ - ✓ - * - | float asinf ( float x ) - | Calculate the arc sine of the input argument + * - | ``float acoshf(float x)`` + | Returns the nonnegative arc hyperbolic cosine of :math:`x`. - ✓ - ✓ - * - | float asinhf ( float x ) - | Calculate the arc hyperbolic sine of the input argument. + * - | ``float asinf(float x)`` + | Returns the arc sine of :math:`x`. - ✓ - ✓ - * - | float atan2f ( float y, float x ) - | Calculate the arc tangent of the ratio of first and second input arguments. + * - | ``float asinhf(float x)`` + | Returns the arc hyperbolic sine of :math:`x`. - ✓ - ✓ - * - | float atanf ( float x ) - | Calculate the arc tangent of the input argument. + * - | ``float atanf(float x)`` + | Returns the arc tangent of :math:`x`. - ✓ - ✓ - * - | float atanhf ( float x ) - | Calculate the arc hyperbolic tangent of the input argument. + * - | ``float atan2f(float x, float y)`` + | Returns the arc tangent of the ratio of :math:`x` and :math:`y`. - ✓ - ✓ - * - | float cbrtf ( float x ) - | Calculate the cube root of the input argument. + * - | ``float atanhf(float x)`` + | Returns the arc hyperbolic tangent of :math:`x`. - ✓ - ✓ - * - | float ceilf ( float x ) - | Calculate ceiling of the input argument. + * - | ``float cbrtf(float x)`` + | Returns the cube root of :math:`x`. - ✓ - ✓ - * - | float copysignf ( float x, float y ) - | Create value with given magnitude, copying sign of second value. + * - | ``float ceilf(float x)`` + | Returns ceiling of :math:`x`. - ✓ - ✓ - * - | float copysignf ( float x, float y ) + * - | ``float copysignf(float x, float y)`` | Create value with given magnitude, copying sign of second value. - ✓ - ✓ - * - | float cosf ( float x ) - | Calculate the cosine of the input argument. + * - | ``float cosf(float x)`` + | Returns the cosine of :math:`x`. - ✓ - ✓ - * - | float coshf ( float x ) - | Calculate the hyperbolic cosine of the input argument. - - ✓ - - ✓ - * - | float erfcf ( float x ) - | Calculate the complementary error function of the input argument. + * - | ``float coshf(float x)`` + | Returns the hyperbolic cosine of :math:`x`. - ✓ - ✓ - * - | float erff ( float x ) - | Calculate the error function of the input argument. + * - | ``float cospif(float x)`` + | Returns the cosine of :math:`\pi \cdot x`. - ✓ - ✓ - * - | float exp10f ( float x ) - | Calculate the base 10 exponential of the input argument. + * - | ``float cyl_bessel_i0f(float x)`` + | Returns the value of the regular modified cylindrical Bessel function of order 0 for :math:`x`. + - ✗ + - ✗ + + * - | ``float cyl_bessel_i1f(float x)`` + | Returns the value of the regular modified cylindrical Bessel function of order 1 for :math:`x`. + - ✗ + - ✗ + + * - | ``float erff(float x)`` + | Returns the error function of :math:`x`. - ✓ - ✓ - * - | float exp2f ( float x ) - | Calculate the base 2 exponential of the input argument. + * - | ``float erfcf(float x)`` + | Returns the complementary error function of :math:`x`. - ✓ - ✓ - - * - | float expf ( float x ) - | Calculate the base e exponential of the input argument. + + * - | ``float erfcinvf(float x)`` + | Returns the inverse complementary function of :math:`x`. - ✓ - ✓ - * - | float expm1f ( float x ) - | Calculate the base e exponential of the input argument, minus 1. + * - | ``float erfcxf(float x)`` + | Returns the scaled complementary error function of :math:`x`. - ✓ - ✓ - * - | float fabsf ( float x ) - | Calculate the absolute value of its argument. + * - | ``float erfinvf(float x)`` + | Returns the inverse error function of :math:`x`. - ✓ - ✓ - - * - | float fdimf ( float x, float y ) - | Compute the positive difference between `x` and `y`. + + * - | ``float expf(float x)`` + | Returns :math:`e^x`. - ✓ - ✓ - * - | float floorf ( float x ) - | Calculate the largest integer less than or equal to `x`. + * - | ``float exp10f(float x)`` + | Returns :math:`10^x`. - ✓ - ✓ - * - | float fmaf ( float x, float y, float z ) - | Compute `x × y + z` as a single operation. + * - | ``float exp2f( float x)`` + | Returns :math:`2^x`. - ✓ - ✓ - * - | float fmaxf ( float x, float y ) - | Determine the maximum numeric value of the arguments. + * - | ``float expm1f(float x)`` + | Returns :math:`ln(x - 1)` - ✓ - ✓ - * - | float fminf ( float x, float y ) - | Determine the minimum numeric value of the arguments. + * - | ``float fabsf(float x)`` + | Returns the absolute value of `x` - ✓ - ✓ - - * - | float fmodf ( float x, float y ) - | Calculate the floating-point remainder of `x / y`. + + * - | ``float fdimf(float x, float y)`` + | Returns the positive difference between :math:`x` and :math:`y`. - ✓ - ✓ - * - | float frexpf ( float x, int* nptr ) - | Extract mantissa and exponent of a floating-point value. + * - | ``float fdividef(float x, float y)`` + | Divide two floating point values. + - ✓ - ✓ - - ✗ - * - | float hypotf ( float x, float y ) - | Calculate the square root of the sum of squares of two arguments. + * - | ``float floorf(float x)`` + | Returns the largest integer less than or equal to :math:`x`. - ✓ - ✓ - * - | int ilogbf ( float x ) - | Compute the unbiased integer exponent of the argument. + * - | ``float fmaf(float x, float y, float z)`` + | Returns :math:`x \cdot y + z` as a single operation. - ✓ - ✓ - * - | __RETURN_TYPE isfinite ( float a ) - | Determine whether argument is finite. + * - | ``float fmaxf(float x, float y)`` + | Determine the maximum numeric value of :math:`x` and :math:`y`. - ✓ - ✓ - * - | __RETURN_TYPE isinf ( float a ) - | Determine whether argument is infinite. + * - | ``float fminf(float x, float y)`` + | Determine the minimum numeric value of :math:`x` and :math:`y`. - ✓ - ✓ - - * - | __RETURN_TYPE isnan ( float a ) - | Determine whether argument is a NaN. + + * - | ``float fmodf(float x, float y)`` + | Returns the floating-point remainder of :math:`x / y`. - ✓ - ✓ - * - | float ldexpf ( float x, int exp ) - | Calculate the value of x ⋅ 2 of the exponent of the input argument. + * - | ``float modff(float x, float* iptr)`` + | Break down :math:`x` into fractional and integral parts. - ✓ + - ✗ + + * - | ``float frexpf(float x, int* nptr)`` + | Extract mantissa and exponent of :math:`x`. - ✓ + - ✗ - * - | loat log10f ( float x ) - | Calculate the base 10 logarithm of the input argument. + * - | ``float hypotf(float x, float y)`` + | Returns the square root of the sum of squares of :math:`x` and :math:`y`. - ✓ - ✓ - * - | float log1pf ( float x ) - | Calculate the value of the exponent of the input argument + * - | ``int ilogbf(float x)`` + | Returns the unbiased integer exponent of :math:`x`. - ✓ - ✓ - * - | float logbf ( float x ) - | Calculate the floating point representation of the exponent of the input argument. + * - | ``bool isfinite(float x)`` + | Determine whether :math:`x` is finite. - ✓ - ✓ - - * - | float log2f ( float x ) - | Calculate the base 2 logarithm of the input argument. + + * - | ``bool isinf(float x)`` + | Determine whether :math:`x` is infinite. - ✓ - ✓ - * - | float logf ( float x ) - | Calculate the natural logarithm of the input argument. + * - | ``bool isnan(float x)`` + | Determine whether :math:`x` is a ``NAN``. - ✓ - ✓ - * - | float modff ( float x, float* iptr ) - | Break down the input argument into fractional and integral parts. + * - | ``float j0f(float x)`` + | Returns the value of the Bessel function of the first kind of order 0 for :math:`x`. - ✓ - - ✗ - - * - | float nanf ( const char* tagp ) - | Returns "Not a Number" value. - - ✗ - ✓ - * - | float nearbyintf ( float x ) - | Round the input argument to the nearest integer. + * - | ``float j1f(float x)`` + | Returns the value of the Bessel function of the first kind of order 1 for :math:`x`. - ✓ - ✓ - * - | float powf ( float x, float y ) - | Calculate the value of first argument to the power of second argument. + * - | ``float jnf(int n, float x)`` + | Returns the value of the Bessel function of the first kind of order n for :math:`x`. - ✓ - ✓ - * - | float remainderf ( float x, float y ) - | Compute single-precision floating-point remainder. + * - | ``float ldexpf(float x, int exp)`` + | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`. - ✓ - ✓ - * - | float remquof ( float x, float y, int* quo ) - | Compute single-precision floating-point remainder and part of quotient. + * - | ``float lgammaf(float x)`` + | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`. - ✓ - - ✗ + - ✗ - * - | float roundf ( float x ) - | Round to nearest integer value in floating-point. + * - | ``long int lrintf(float x)`` + | Round :math:`x` to nearest integer value. - ✓ - ✓ - * - | float scalbnf ( float x, int n ) - | Scale floating-point input by integer power of two. + * - | ``long long int llrintf(float x)`` + | Round :math:`x` to nearest integer value. - ✓ - ✓ - * - | __RETURN_TYPE signbit ( float a ) - | Return the sign bit of the input. + * - | ``long int lroundf(float x)`` + | Round to nearest integer value. - ✓ - ✓ - * - | void sincosf ( float x, float* sptr, float* cptr ) - | Calculate the sine and cosine of the first input argument. + * - | ``long long int llroundf(float x)`` + | Round to nearest integer value. + - ✓ - ✓ - - ✗ - * - | float sinf ( float x ) - | Calculate the sine of the input argument. + * - | ``float log10f(float x)`` + | Returns the base 10 logarithm of :math:`x`. - ✓ - ✓ - * - | float sinhf ( float x ) - | Calculate the hyperbolic sine of the input argument. + * - | ``float log1pf(float x)`` + | Returns the natural logarithm of :math:`x + 1`. - ✓ - ✓ - - * - | float sqrtf ( float x ) - | Calculate the square root of the input argument. + + * - | ``float log2f(float x)`` + | Returns the base 2 logarithm of :math:`x`. - ✓ - ✓ - * - | float tanf ( float x ) - | Calculate the tangent of the input argument. + * - | ``float logf(float x)`` + | Returns the natural logarithm of :math:`x`. - ✓ - ✓ - * - | float tanhf ( float x ) - | Calculate the hyperbolic tangent of the input argument. + * - | ``float logbf(float x)`` + | Returns the floating point representation of the exponent of :math:`x`. - ✓ - ✓ - * - | float truncf ( float x ) - | Truncate input argument to the integral part. - - ✓ + * - | ``float nanf(const char* tagp)`` + | Returns "Not a Number" value. + - ✗ - ✓ - * - | float tgammaf ( float x ) - | Calculate the gamma function of the input argument. + * - | ``float nearbyintf(float x)`` + | Round :math:`x` to the nearest integer. - ✓ - ✓ - * - | float erfcinvf ( float y ) - | Calculate the inverse complementary function of the input argument. - - ✓ + * - | ``float nextafterf(float x, float y)`` + | Returns next representable single-precision floating-point value after argument. - ✓ + - ✗ - * - | float erfcxf ( float x ) - | Calculate the scaled complementary error function of the input argument. + * - | ``float norm3df(float x, float y, float z)`` + | Returns the square root of the sum of squares of :math:`x`, :math:`y` and :math:`z`. - ✓ - ✓ - * - | float erfinvf ( float y ) - | Calculate the inverse error function of the input argument. + * - | ``float norm4df(float x, float y, float z, float w)`` + | Returns the square root of the sum of squares of :math:`x`, :math:`y`, :math:`z` and :math:`w`. - ✓ - ✓ - - * - | float fdividef ( float x, float y ) - | Divide two floating point values. + + * - | ``float normcdff(float y)`` + | Returns the standard normal cumulative distribution function. - ✓ - ✓ - * - | float frexpf ( float x, `int *nptr` ) - | Extract mantissa and exponent of a floating-point value. + * - | ``float normcdfinvf(float y)`` + | Returns the inverse of the standard normal cumulative distribution function. - ✓ - ✓ - * - | float j0f ( float x ) - | Calculate the value of the Bessel function of the first kind of order 0 for the input argument. + * - | ``float normf(int dim, const float *a)`` + | Returns the square root of the sum of squares of any number of coordinates. - ✓ - ✓ - * - | float j1f ( float x ) - | Calculate the value of the Bessel function of the first kind of order 1 for the input argument. + * - | ``float powf(float x, float y)`` + | Returns :math:`x^y`. - ✓ - ✓ - * - | float jnf ( int n, float x ) - | Calculate the value of the Bessel function of the first kind of order n for the input argument. + * - | ``float powif(float base, int iexp)`` + | Returns the value of first argument to the power of second argument. - ✓ - ✓ - * - | float lgammaf ( float x ) - | Calculate the natural logarithm of the absolute value of the gamma function of the input argument. + * - | ``float remainderf(float x, float y)`` + | Returns single-precision floating-point remainder. - ✓ - ✓ - * - | long long int llrintf ( float x ) - | Round input to nearest integer value. - - ✓ + * - | ``float remquof(float x, float y, int* quo)`` + | Returns single-precision floating-point remainder and part of quotient. - ✓ + - ✓ - * - | long long int llroundf ( float x ) - | Round to nearest integer value. + * - | ``float roundf(float x)`` + | Round to nearest integer value in floating-point. - ✓ - ✓ - * - | long int lrintf ( float x ) - | Round input to nearest integer value. + * - | ``float rcbrtf(float x)`` + | Returns the reciprocal cube root function. - ✓ - ✓ - * - | long int lroundf ( float x ) - | Round to nearest integer value. + * - | ``float rhypotf(float x, float y)`` + | Returns one over the square root of the sum of squares of two arguments. - ✓ - ✓ - * - | float modff ( float x, `float *iptr` ) - | Break down the input argument into fractional and integral parts. + * - | ``float rintf(float x)`` + | Round input to nearest integer value in floating-point. - ✓ - ✓ - - * - | float nextafterf ( float x, float y ) - | Returns next representable single-precision floating-point value after argument. + + * - | ``float rnorm3df(float x, float y, float z)`` + | Returns one over the square root of the sum of squares of three coordinates of the argument. - ✓ - ✓ - * - | float norm3df ( float a, float b, float c ) - | Calculate the square root of the sum of squares of three coordinates of the argument. + * - | ``float rnorm4df(float x, float y, float z, float w)`` + | Returns one over the square root of the sum of squares of four coordinates of the argument. - ✓ - ✓ - * - | float norm4df ( float a, float b, float c, float d ) - | Calculate the square root of the sum of squares of four coordinates of the argument. + * - | ``float rnormf(int dim, const float *a)`` + | Returns the reciprocal of square root of the sum of squares of any number of coordinates. - ✓ - ✓ - * - | loat normcdff ( float y ) - | Calculate the standard normal cumulative distribution function. + * - | ``float scalblnf(float x, long int n)`` + | Scale :math:`x` by :math:`2^n`. - ✓ - ✓ - * - | float normcdfinvf ( float y ) - | Calculate the inverse of the standard normal cumulative distribution function. + * - | ``float scalbnf(float x, int n)`` + | Scale :math:`x` by :math:`2^n`. - ✓ - ✓ - * - | float normf ( int dim, `const float *a` ) - | Calculate the square root of the sum of squares of any number of coordinates. + * - | ``bool signbit(float x)`` + | Return the sign bit of :math:`x`. - ✓ - ✓ - * - | float rcbrtf ( float x ) - | Calculate the reciprocal cube root function. + * - | ``float sinf(float x)`` + | Returns the sine of :math:`x`. - ✓ - ✓ - * - | float remquof ( float x, float y, `int *quo` ) - | Compute single-precision floating-point remainder and part of quotient. + * - | ``float sinhf(float x)`` + | Returns the hyperbolic sine of :math:`x`. - ✓ - ✓ - * - | float rhypotf ( float x, float y ) - | Calculate one over the square root of the sum of squares of two arguments. + * - | ``float sinpif(float x)`` + | Returns the hyperbolic sine of :math:`\pi \cdot x`. - ✓ - ✓ - * - | float rintf ( float x ) - | Round input to nearest integer value in floating-point. + * - | ``void sincosf(float x, float *sptr, float *cptr)`` + | Returns the sine and cosine of :math:`x`. - ✓ - ✓ - - * - | float rnorm3df ( float a, float b, float c ) - | Calculate one over the square root of the sum of squares of three coordinates of the argument. + + * - | ``void sincospif(float x, float *sptr, float *cptr)`` + | Returns the sine and cosine of :math:`\pi \cdot x`. - ✓ - ✓ - * - | float rnorm4df ( float a, float b, float c, float d ) - | Calculate one over the square root of the sum of squares of four coordinates of the argument. + * - | ``float sqrtf(float x)`` + | Returns the square root of :math:`x`. - ✓ - ✓ - * - | float rnormf ( int dim, `const float *a` ) - | Calculate the reciprocal of square root of the sum of squares of any number of coordinates. + * - | ``float rsqrtf(float x)`` + | Returns the reciprocal of the square root of :math:`x`. + - ✗ + - ✓ + + * - | ``float tanf(float x)`` + | Returns the tangent of :math:`x`. - ✓ - ✓ - * - | float scalblnf ( float x, long int n ) - | Scale floating-point input by integer power of two. + * - | ``float tanhf(float x)`` + | Returns the hyperbolic tangent of :math:`x`. - ✓ - ✓ - - * - | void sincosf ( float x, `float *sptr`, `float *cptr`) - | Calculate the sine and cosine of the first input argument. + + * - | ``float tgammaf(float x)`` + | Returns the gamma function of :math:`x`. - ✓ - ✓ - * - | void sincospif ( float x, `float *sptr`, `float *cptr`) - | Calculate the sine and cosine of the first input argument multiplied by PI. + * - | ``float truncf(float x)`` + | Truncate :math:`x` to the integral part. - ✓ - ✓ - * - | float y0f ( float x ) - | Calculate the value of the Bessel function of the second kind of order 0 for the input argument. + * - | ``float y0f(float x)`` + | Returns the value of the Bessel function of the second kind of order 0 for :math:`x`. - ✓ - ✓ - * - | float y1f ( float x ) - | Calculate the value of the Bessel function of the second kind of order 1 for the input argument. + * - | ``float y1f(float x)`` + | Returns the value of the Bessel function of the second kind of order 1 for :math:`x`. - ✓ - ✓ - * - | float ynf ( int n, float x ) - | Calculate the value of the Bessel function of the second kind of order n for the input argument. + * - | ``float ynf(int n, float x)`` + | Returns the value of the Bessel function of the second kind of order n for :math:`x`. - ✓ - ✓ Double precision mathematical functions -------------------------------------------------------------------------------------------- + Following is the list of supported double precision mathematical functions. -.. list-table:: Single precision mathematical functions +.. list-table:: Double precision mathematical functions * - **Function** - **Supported on Host** - **Supported on Device** - * - | double acos ( double x ) - | Calculate the arc cosine of the input argument. + * - | ``double abs(double x)`` + | Returns the absolute value of :math:`x` - ✓ - ✓ - * - | double acosh ( double x ) - | Calculate the nonnegative arc hyperbolic cosine of the input argument. + * - | ``double acos(double x)`` + | Returns the arc cosine of :math:`x`. - ✓ - ✓ - * - | double asin ( double x ) - | Calculate the arc sine of the input argument. + * - | ``double acosh(double x)`` + | Returns the nonnegative arc hyperbolic cosine of :math:`x`. - ✓ - ✓ - * - | double asinh ( double x ) - | Calculate the arc hyperbolic sine of the input argument. + * - | ``double asin(double x)`` + | Returns the arc sine of :math:`x`. - ✓ - ✓ - * - | double atan ( double x ) - | Calculate the arc tangent of the input argument. + * - | ``double asinh(double x)`` + | Returns the arc hyperbolic sine of :math:`x`. - ✓ - ✓ - * - | double atan2 ( double y, double x ) - | Calculate the arc tangent of the ratio of first and second input arguments. + * - | ``double atan(double x)`` + | Returns the arc tangent of :math:`x`. - ✓ - ✓ - * - | double atanh ( double x ) - | Calculate the arc hyperbolic tangent of the input argument. + * - | ``double atan2(double x, double y)`` + | Returns the arc tangent of the ratio of :math:`x` and :math:`y`. - ✓ - ✓ - * - | double cbrt ( double x ) - | Calculate the cube root of the input argument. + * - | ``double atanh(double x)`` + | Returns the arc hyperbolic tangent of :math:`x`. - ✓ - ✓ - * - | double ceil ( double x ) - | Calculate ceiling of the input argument. + * - | ``double cbrt(double x)`` + | Returns the cube root of :math:`x`. - ✓ - ✓ - * - | double copysign ( double x, double y ) - | Create value with given magnitude, copying sign of second value. + * - | ``double ceil(double x)`` + | Returns ceiling of :math:`x`. - ✓ - ✓ - * - | double cos ( double x ) - | Calculate the cosine of the input argument. + * - | ``double copysign(double x, double y)`` + | Create value with given magnitude, copying sign of second value. - ✓ - ✓ - * - | double cosh ( double x ) - | Calculate the hyperbolic cosine of the input argument. + * - | ``double cos(double x)`` + | Returns the cosine of :math:`x`. - ✓ - ✓ - * - | double erf ( double x ) - | Calculate the error function of the input argument. + * - | ``double cosh(double x)`` + | Returns the hyperbolic cosine of :math:`x`. - ✓ - ✓ - * - | double erfc ( double x ) - | Calculate the complementary error function of the input argument. + * - | ``double cospi(double x)`` + | Returns the cosine of :math:`\pi \cdot x`. - ✓ - ✓ - * - | double exp ( double x ) - | Calculate the base e exponential of the input argument. - - ✓ - - ✓ + * - | ``double cyl_bessel_i0(double x)`` + | Returns the value of the regular modified cylindrical Bessel function of order 0 for :math:`x`. + - ✗ + - ✗ - * - | double exp10 ( double x ) - | Calculate the base 10 exponential of the input argument. + * - | ``double cyl_bessel_i1(double x)`` + | Returns the value of the regular modified cylindrical Bessel function of order 1 for :math:`x`. + - ✗ + - ✗ + + * - | ``double erf(double x)`` + | Returns the error function of :math:`x`. - ✓ - ✓ - * - | double exp2 ( double x ) - | Calculate the base 2 exponential of the input argument. + * - | ``double erfc(double x)`` + | Returns the complementary error function of :math:`x`. - ✓ - ✓ - * - | double expm1 ( double x ) - | Calculate the base e exponential of the input argument, minus 1. + * - | ``double erfcinv(double x)`` + | Returns the inverse complementary function of :math:`x`. - ✓ - ✓ - * - | double fabs ( double x ) - | Calculate the absolute value of the input argument. + * - | ``double erfcx(double x)`` + | Returns the scaled complementary error function of :math:`x`. - ✓ - ✓ - * - | double fdim ( double x, double y ) - | Compute the positive difference between `x` and `y`. + * - | ``double erfinv(double x)`` + | Returns the inverse error function of :math:`x`. - ✓ - ✓ - * - | double floor ( double x ) - | Calculate the largest integer less than or equal to `x`. + * - | ``double exp(double x)`` + | Returns :math:`e^x`. - ✓ - ✓ - * - | double fma ( double x, double y, double z ) - | Compute `x × y + z` as a single operation. + * - | ``double exp10(double x)`` + | Returns :math:`10^x`. - ✓ - ✓ - * - | double fmax ( double , double ) - | Determine the maximum numeric value of the arguments. + * - | ``double exp2( double x)`` + | Returns :math:`2^x`. - ✓ - ✓ - - * - | double fmin ( double x, double y ) - | Determine the minimum numeric value of the arguments. + + * - | ``double expm1(double x)`` + | Returns :math:`ln(x - 1)` - ✓ - ✓ - * - | double fmod ( double x, double y ) - | Calculate the floating-point remainder of `x / y`. + * - | ``double fabs(double x)`` + | Returns the absolute value of `x` - ✓ - ✓ - - * - | double frexp ( double x, int* nptr ) - | Extract mantissa and exponent of a floating-point value. + + * - | ``double fdim(double x, double y)`` + | Returns the positive difference between :math:`x` and :math:`y`. + - ✓ - ✓ - - ✗ - * - | double hypot ( double x, double y ) - | Calculate the square root of the sum of squares of two arguments. + * - | ``double floor(double x)`` + | Returns the largest integer less than or equal to :math:`x`. - ✓ - ✓ - * - | int ilogb ( double x ) - | Compute the unbiased integer exponent of the argument. + * - | ``double fma(double x, double y, double z)`` + | Returns :math:`x \cdot y + z` as a single operation. - ✓ - ✓ - * - | __RETURN_TYPE isfinite ( double a ) - | Determine whether argument is finite. + * - | ``double fmax(double x, double y)`` + | Determine the maximum numeric value of :math:`x` and :math:`y`. - ✓ - ✓ - * - | __RETURN_TYPE isinf ( double a ) - | Determine whether argument is infinite. + * - | ``double fmin(double x, double y)`` + | Determine the minimum numeric value of :math:`x` and :math:`y`. - ✓ - ✓ - - * - | __RETURN_TYPE isnan ( double a ) - | Determine whether argument is a NaN. + + * - | ``double fmod(double x, double y)`` + | Returns the floating-point remainder of :math:`x / y`. - ✓ - ✓ - * - | double ldexp ( double x, int exp ) - | Calculate the value of x ⋅ 2 exp. + * - | ``double modf(double x, double* iptr)`` + | Break down :math:`x` into fractional and integral parts. - ✓ + - ✗ + + * - | ``double frexp(double x, int* nptr)`` + | Extract mantissa and exponent of :math:`x`. - ✓ + - ✗ - * - | double log ( double x ) - | Calculate the base e logarithm of the input argument. + * - | ``double hypot(double x, double y)`` + | Returns the square root of the sum of squares of :math:`x` and :math:`y`. - ✓ - ✓ - * - | double log10 ( double x ) - | Calculate the base 10 logarithm of the input argument. + * - | ``int ilogb(double x)`` + | Returns the unbiased integer exponent of :math:`x`. - ✓ - ✓ - * - | double log1p ( double x ) - | Calculate the value of logarithm of exp ( 1 + x ). + * - | ``bool isfinite(double x)`` + | Determine whether :math:`x` is finite. - ✓ - ✓ - * - | double log2 ( double x ) - | Calculate the base 2 logarithm of the input argument. + * - | ``bool isin(double x)`` + | Determine whether :math:`x` is infinite. - ✓ - ✓ - * - | double logb ( double x ) - | Calculate the floating point representation of the exponent of the input argument. + * - | ``bool isnan(double x)`` + | Determine whether :math:`x` is a ``NAN``. - ✓ - ✓ - * - | double modf ( double x, `double* iptr` ) - | Break down the input argument into fractional and integral parts. + * - | ``double j0(double x)`` + | Returns the value of the Bessel function of the first kind of order 0 for :math:`x`. - ✓ - - ✗ - - * - | double nan ( const `char* tagp`) - | Returns ``Not a Number`` value. - - ✗ - ✓ - * - | double nearbyint ( double x ) - | Round the input argument to the nearest integer. + * - | ``double j1(double x)`` + | Returns the value of the Bessel function of the first kind of order 1 for :math:`x`. - ✓ - ✓ - * - | double pow ( double x, double y ) - | Calculate the value of first argument to the power of second argument. + * - | ``double jn(int n, double x)`` + | Returns the value of the Bessel function of the first kind of order n for :math:`x`. - ✓ - ✓ - * - | double remainder ( double x, double y ) - | Compute double-precision floating-point remainder. + * - | ``double ldexp(double x, int exp)`` + | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`. - ✓ - ✓ - * - | double remquo ( double x, double y, `int* quo` ) - | Compute double-precision floating-point remainder and part of quotient. + * - | ``double lgamma(double x)`` + | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`. - ✓ - ✗ - - * - | double round ( double x ) - | Round to nearest integer value in floating-point. + + * - | ``long int lrint(double x)`` + | Round :math:`x` to nearest integer value. - ✓ - ✓ - * - | double scalbn ( double x, int n ) - | Scale floating-point input by integer power of two. + * - | ``long long int llrint(double x)`` + | Round :math:`x` to nearest integer value. - ✓ - ✓ - - * - | __RETURN_TYPE signbit ( double a ) - | Return the sign bit of the input. + + * - | ``long int lround(double x)`` + | Round to nearest integer value. - ✓ - ✓ - * - | double sin ( double x ) - | Calculate the sine of the input argument. + * - | ``long long int llround(double x)`` + | Round to nearest integer value. - ✓ - ✓ - * - | void sincos ( double x, `double* sptr`, `double* cptr` ) - | Calculate the sine and cosine of the first input argument. - - ✓ - - ✗ - - * - | double sinh ( double x ) - | Calculate the hyperbolic sine of the input argument. + * - | ``double log10(double x)`` + | Returns the base 10 logarithm of :math:`x`. - ✓ - ✓ - * - | double sqrt ( double x ) - | Calculate the square root of the input argument. + * - | ``double log1p(double x)`` + | Returns the natural logarithm of :math:`x + 1`. - ✓ - ✓ - - * - | double tan ( double x ) - | Calculate the tangent of the input argument. + + * - | ``double log2(double x)`` + | Returns the base 2 logarithm of :math:`x`. - ✓ - ✓ - * - | double tanh ( double x ) - | Calculate the hyperbolic tangent of the input argument. + * - | ``double log(double x)`` + | Returns the natural logarithm of :math:`x`. - ✓ - ✓ - * - | double tgamma ( double x ) - | Calculate the gamma function of the input argument. + * - | ``double logb(double x)`` + | Returns the floating point representation of the exponent of :math:`x`. - ✓ - ✓ - * - | double trunc ( double x ) - | Truncate input argument to the integral part. + * - | ``double nan(const char* tagp)`` + | Returns "Not a Number" value. + - ✗ + - ✓ + + * - | ``double nearbyint(double x)`` + | Round :math:`x` to the nearest integer. - ✓ - ✓ - * - | double erfcinv ( double y ) - | Calculate the inverse complementary function of the input argument. + * - | ``double nextafter(double x, double y)`` + | Returns next representable double-precision floating-point value after argument. - ✓ - ✓ - * - | double erfcx ( double x ) - | Calculate the scaled complementary error function of the input argument. + * - | ``double norm3d(double x, double y, double z)`` + | Returns the square root of the sum of squares of :math:`x`, :math:`y` and :math:`z`. - ✓ - ✓ - * - | double erfinv ( double y ) - | Calculate the inverse error function of the input argument. + * - | ``double norm4d(double x, double y, double z, double w)`` + | Returns the square root of the sum of squares of :math:`x`, :math:`y`, :math:`z` and :math:`w`. - ✓ - ✓ - - * - | double frexp ( float x, `int *nptr` ) - | Extract mantissa and exponent of a floating-point value. + + * - | ``double normcdf(double y)`` + | Returns the standard normal cumulative distribution function. - ✓ - ✓ - * - | double j0 ( double x ) - | Calculate the value of the Bessel function of the first kind of order 0 for the input argument. + * - | ``double normcdfinv(double y)`` + | Returns the inverse of the standard normal cumulative distribution function. - ✓ - ✓ - * - | double j1 ( double x ) - | Calculate the value of the Bessel function of the first kind of order 1 for the input argument. + * - | ``double norm(int dim, const double *a)`` + | Returns the square root of the sum of squares of any number of coordinates. - ✓ - ✓ - * - | double jn ( int n, double x ) - | Calculate the value of the Bessel function of the first kind of order n for the input argument. + * - | ``double pow(double x, double y)`` + | Returns :math:`x^y`. - ✓ - ✓ - * - | double lgamma ( double x ) - | Calculate the natural logarithm of the absolute value of the gamma function of the input argument. + * - | ``double powi(double base, int iexp)`` + | Returns the value of first argument to the power of second argument. - ✓ - ✓ - * - | long long int llrint ( double x ) - | Round input to nearest integer value. + * - | ``double remainder(double x, double y)`` + | Returns double-precision floating-point remainder. - ✓ - ✓ + * - | ``double remquo(double x, double y, int* quo)`` + | Returns double-precision floating-point remainder and part of quotient. + - ✓ + - ✗ - * - | long long int llround ( double x ) - | Round to nearest integer value. + * - | ``double round(double x)`` + | Round to nearest integer value in floating-point. - ✓ - ✓ - * - | long int lrint ( double x ) - | Round input to nearest integer value. + * - | ``double rcbrt(double x)`` + | Returns the reciprocal cube root function. - ✓ - ✓ - * - | long int lround ( double x ) - | Round to nearest integer value. + * - | ``double rhypot(double x, double y)`` + | Returns one over the square root of the sum of squares of two arguments. - ✓ - ✓ - * - | double modf ( double x, `double *iptr` ) - | Break down the input argument into fractional and integral parts. + * - | ``double rint(double x)`` + | Round input to nearest integer value in floating-point. - ✓ - ✓ - - * - | double nextafter ( double x, double y ) - | Returns next representable single-precision floating-point value after argument. + + * - | ``double rnorm3d(double x, double y, double z)`` + | Returns one over the square root of the sum of squares of three coordinates of the argument. - ✓ - ✓ - * - | double norm3d ( double a, double b, double c ) - | Calculate the square root of the sum of squares of three coordinates of the argument. + * - | ``double rnorm4d(double x, double y, double z, double w)`` + | Returns one over the square root of the sum of squares of four coordinates of the argument. - ✓ - ✓ - * - | float norm4d ( double a, double b, double c, double d ) - | Calculate the square root of the sum of squares of four coordinates of the argument. + * - | ``double rnorm(int dim, const double *a)`` + | Returns the reciprocal of square root of the sum of squares of any number of coordinates. - ✓ - ✓ - * - | double normcdf ( double y ) - | Calculate the standard normal cumulative distribution function. + * - | ``double scalbln(double x, long int n)`` + | Scale :math:`x` by :math:`2^n`. - ✓ - ✓ - * - | double normcdfinv ( double y ) - | Calculate the inverse of the standard normal cumulative distribution function. + * - | ``double scalbn(double x, int n)`` + | Scale :math:`x` by :math:`2^n`. - ✓ - ✓ - * - | double rcbrt ( double x ) - | Calculate the reciprocal cube root function. + * - | ``bool signbit(double x)`` + | Return the sign bit of :math:`x`. - ✓ - ✓ - * - | double remquo ( double x, `double y`, `int *quo` ) - | Compute single-precision floating-point remainder and part of quotient. + * - | ``double sin(double x)`` + | Returns the sine of :math:`x`. - ✓ - ✓ - * - | double rhypot ( double x, double y ) - | Calculate one over the square root of the sum of squares of two arguments. + * - | ``double sinh(double x)`` + | Returns the hyperbolic sine of :math:`x`. - ✓ - ✓ - * - | double rint ( double x ) - | Round input to nearest integer value in floating-point. + * - | ``double sinpi(double x)`` + | Returns the hyperbolic sine of :math:`\pi \cdot x`. - ✓ - ✓ - * - | double rnorm3d ( double a, double b, double c ) - | Calculate one over the square root of the sum of squares of three coordinates of the argument. + * - | ``void sincos(double x, double *sptr, double *cptr)`` + | Returns the sine and cosine of :math:`x`. - ✓ - ✓ - * - | double rnorm4d ( double a, double b, double c, double d ) - | Calculate one over the square root of the sum of squares of four coordinates of the argument. + * - | ``void sincospi(double x, double *sptr, double *cptr)`` + | Returns the sine and cosine of :math:`\pi \cdot x`. - ✓ - ✓ - * - | double rnorm ( int dim, `const double *a` ) - | Calculate the reciprocal of square root of the sum of squares of any number of coordinates. + * - | ``double sqrt(double x)`` + | Returns the square root of :math:`x`. + - ✓ - ✓ + + * - | ``double rsqrt(double x)`` + | Returns the reciprocal of the square root of :math:`x`. + - ✗ - ✓ - * - | double scalbln ( double x, long int n ) - | Scale floating-point input by integer power of two. + * - | ``double tan(double x)`` + | Returns the tangent of :math:`x`. - ✓ - ✓ - * - | void sincos ( double x, `double *sptr`, `double *cptr` ) - | Calculate the sine and cosine of the first input argument. + * - | ``double tanh(double x)`` + | Returns the hyperbolic tangent of :math:`x`. - ✓ - ✓ - * - | void sincospi ( double x, `double *sptr`, `double *cptr` ) - | Calculate the sine and cosine of the first input argument multiplied by PI. + * - | ``double tgamma(double x)`` + | Returns the gamma function of :math:`x`. - ✓ - ✓ - * - | double y0f ( double x ) - | Calculate the value of the Bessel function of the second kind of order 0 for the input argument. + * - | ``double trunc(double x)`` + | Truncate :math:`x` to the integral part. + - ✓ + - ✓ + + * - | ``double y0(double x)`` + | Returns the value of the Bessel function of the second kind of order 0 for :math:`x`. - ✓ - ✓ - * - | double y1 ( double x ) - | Calculate the value of the Bessel function of the second kind of order 1 for the input argument. + * - | ``double y1(double x)`` + | Returns the value of the Bessel function of the second kind of order 1 for :math:`x`. - ✓ - ✓ - * - | double yn ( int n, double x ) - | Calculate the value of the Bessel function of the second kind of order n for the input argument. + * - | ``double yn(int n, double x)`` + | Returns the value of the Bessel function of the second kind of order n for :math:`x`. - ✓ - ✓ Integer intrinsics -------------------------------------------------------------------------------------------- + Following is the list of supported integer intrinsics. Note that intrinsics are supported on device only. -.. list-table:: Single precision mathematical functions +.. list-table:: Integer intrinsics mathematical functions * - **Function** - * - | double acos ( double x ) - | Calculate the arc cosine of the input argument. - - * - | unsigned int __brev ( unsigned int x ) + * - | ``unsigned int __brev(unsigned int x)`` | Reverse the bit order of a 32 bit unsigned integer. - * - | unsigned long long int __brevll ( unsigned long long int x ) + * - | ``unsigned long long int __brevll(unsigned long long int x)`` | Reverse the bit order of a 64 bit unsigned integer. - * - | int __clz ( int x ) - | Return the number of consecutive high-order zero bits in a 32 bit integer. + * - | ``unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int z)`` + | Return selected bytes from two 32-bit unsigned integers. - * - | unsigned int __clz(unsigned int x) - | Return the number of consecutive high-order zero bits in 32 bit unsigned integer. + * - | ``unsigned int __clz(int x)`` + | Return the number of consecutive high-order zero bits in 32 bit integer. - * - | int __clzll ( long long int x ) - | Count the number of consecutive high-order zero bits in a 64 bit integer. + * - | ``unsigned int __clzll(long long int x)`` + | Return the number of consecutive high-order zero bits in 64 bit integer. - * - | unsigned int __clzll(long long int x) - | Return the number of consecutive high-order zero bits in 64 bit signed integer. + * - | ``unsigned int __ffs(int x)`` + | Find the position of least signigicant bit set to 1 in a 32 bit integer. - * - | unsigned int __ffs(unsigned int x) - | Find the position of least signigicant bit set to 1 in a 32 bit unsigned integer. + * - | ``unsigned int __ffsll(long long int x)`` + | Find the position of least signigicant bit set to 1 in a 64 bit signed integer. - * - | unsigned int __ffs(int x) - | Find the position of least signigicant bit set to 1 in a 32 bit signed integer. + * - | ``unsigned int __fns32(unsigned long long mask, unsigned int base, int offset)`` + | Find the position of the n-th set to 1 bit in a 32-bit integer. - * - | unsigned int __ffsll(unsigned long long int x) - | Find the position of least signigicant bit set to 1 in a 64 bit unsigned integer. + * - | ``unsigned int __fns64(unsigned long long int mask, unsigned int base, int offset)`` + | Find the position of the n-th set to 1 bit in a 64-bit integer. - * - | unsigned int __ffsll(long long int x) - | Find the position of least signigicant bit set to 1 in a 64 bit signed integer. + * - | ``unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift)`` + | Concatenate :math:`hi` and :math:`lo`, shift left by shift & 31 bits, return the most significant 32 bits. + + * - | ``unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift)`` + | Concatenate :math:`hi` and :math:`lo`, shift left by min(shift, 32) bits, return the most significant 32 bits. + + * - | ``unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift)`` + | Concatenate :math:`hi` and :math:`lo`, shift right by shift & 31 bits, return the least significant 32 bits. + + * - | ``unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift)`` + | Concatenate :math:`hi` and :math:`lo`, shift right by min(shift, 32) bits, return the least significant 32 bits. + + * - | ``unsigned int __hadd(int x, int y)`` + | Compute average of signed input arguments, avoiding overflow in the intermediate sum. + + * - | ``unsigned int __rhadd(int x, int y)`` + | Compute rounded average of signed input arguments, avoiding overflow in the intermediate sum. + + * - | ``unsigned int __uhadd(int x, int y)`` + | Compute average of unsigned input arguments, avoiding overflow in the intermediate sum. - * - | unsigned int __popc ( unsigned int x ) + * - | ``unsigned int __urhadd (unsigned int x, unsigned int y)`` + | Compute rounded average of unsigned input arguments, avoiding overflow in the intermediate sum. + + * - | ``int __sad(int x, int y, int z)`` + | Returns :math:`|x - y| + z`, the sum of absolute difference. + + * - | ``unsigned int __usad(unsigned int x, unsigned int y, unsigned int z)`` + | Returns :math:`|x - y| + z`, the sum of absolute difference. + + * - | ``unsigned int __popc(unsigned int x)`` | Count the number of bits that are set to 1 in a 32 bit integer. - * - | unsigned int __popcll ( unsigned long long int x ) + * - | ``unsigned int __popcll(unsigned long long int x)`` | Count the number of bits that are set to 1 in a 64 bit integer. - * - | int __mul24 ( int x, int y ) + * - | ``int __mul24(int x, int y)`` | Multiply two 24bit integers. - * - | unsigned int __umul24 ( unsigned int x, unsigned int y ) + * - | ``unsigned int __umul24(unsigned int x, unsigned int y)`` | Multiply two 24bit unsigned integers. + * - | ``int __mulhi(int x, int y)`` + | Returns the most significant 32 bits of the product of the two 32-bit integers. + + * - | ``unsigned int __umulhi(unsigned int x, unsigned int y)`` + | Returns the most significant 32 bits of the product of the two 32-bit unsigned integers. + + * - | ``long long int __mul64hi(long long int x, long long int y)`` + | Returns the most significant 64 bits of the product of the two 64-bit integers. + + * - | ``unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y)`` + | Returns the most significant 64 bits of the product of the two 64 unsigned bit integers. + The HIP-Clang implementation of ``__ffs()`` and ``__ffsll()`` contains code to add a constant +1 to produce the ffs result format. For the cases where this overhead is not acceptable and programmer is willing to specialize for the platform, -HIP-Clang provides `__lastbit_u32_u32(unsigned int input)` and `__lastbit_u32_u64(unsigned long long int input)`. +HIP-Clang provides ``__lastbit_u32_u32(unsigned int input)`` and ``__lastbit_u32_u64(unsigned long long int input)``. The index returned by ``__lastbit_`` instructions starts at -1, while for ffs the index starts at 0. Floating-point Intrinsics -------------------------------------------------------------------------------------------- + Following is the list of supported floating-point intrinsics. Note that intrinsics are supported on device only. -.. list-table:: Single precision mathematical functions +.. note:: + + Only the nearest even rounding mode supported on AMD GPUs by defaults. The ``_rz``, ``_ru`` and + ``_rd`` suffixed intrinsic functions are existing in HIP AMD backend, if the + ``OCML_BASIC_ROUNDED_OPERATIONS`` macro is defined. + +.. list-table:: Single precision intrinsics mathematical functions * - **Function** - * - | float __cosf ( float x ) - | Calculate the fast approximate cosine of the input argument. + * - | ``float __cosf(float x)`` + | Returns the fast approximate cosine of :math:`x`. + + * - | ``float __exp10f(float x)`` + | Returns the fast approximate for 10 :sup:`x`. + + * - | ``float __expf(float x)`` + | Returns the fast approximate for e :sup:`x`. + + * - | ``float __fadd_rn(float x, float y)`` + | Add two floating-point values in round-to-nearest-even mode. + + * - | ``float __fdiv_rn(float x, float y)`` + | Divide two floating point values in round-to-nearest-even mode. + + * - | ``float __fmaf_rn(float x, float y, float z)`` + | Returns ``x × y + z`` as a single operation in round-to-nearest-even mode. + + * - | ``float __fmul_rn(float x, float y)`` + | Multiply two floating-point values in round-to-nearest-even mode. + + * - | ``float __frcp_rn(float x, float y)`` + | Returns ``1 / x`` in round-to-nearest-even mode. + + * - | ``float __frsqrt_rn(float x)`` + | Returns ``1 / √x`` in round-to-nearest-even mode. + + * - | ``float __fsqrt_rn(float x)`` + | Returns ``√x`` in round-to-nearest-even mode. - * - | float __expf ( float x ) - | Calculate the fast approximate base e exponential of the input argument. + * - | ``float __fsub_rn(float x, float y)`` + | Subtract two floating-point values in round-to-nearest-even mode. - * - | float __frsqrt_rn ( float x ) - | Compute `1 / √x` in round-to-nearest-even mode. + * - | ``float __log10f(float x)`` + | Returns the fast approximate for base 10 logarithm of :math:`x`. + + * - | ``float __log2f(float x)`` + | Returns the fast approximate for base 2 logarithm of :math:`x`. + + * - | ``float __logf(float x)`` + | Returns the fast approximate for natural logarithm of :math:`x`. + + * - | ``float __powf(float x, float y)`` + | Returns the fast approximate of x :sup:`y`. + + * - | ``float __saturatef(float x)`` + | Clamp :math:`x` to [+0.0, 1.0]. + + * - | ``float __sincosf(float x, float* sinptr, float* cosptr)`` + | Returns the fast approximate of sine and cosine of :math:`x`. + + * - | ``float __sinf(float x)`` + | Returns the fast approximate sine of :math:`x`. + + * - | ``float __tanf(float x)`` + | Returns the fast approximate tangent of :math:`x`. + +.. list-table:: Double precision intrinsics mathematical functions + + * - **Function** - * - | float __fsqrt_rn ( float x ) - | Compute `√x` in round-to-nearest-even mode. + * - | ``double __dadd_rn(double x, double y)`` + | Add two floating-point values in round-to-nearest-even mode. - * - | float __log10f ( float x ) - | Calculate the fast approximate base 10 logarithm of the input argument. + * - | ``double __ddiv_rn(double x, double y)`` + | Divide two floating-point values in round-to-nearest-even mode. - * - | float __log2f ( float x ) - | Calculate the fast approximate base 2 logarithm of the input argument. + * - | ``double __dmul_rn(double x, double y)`` + | Multiply two floating-point values in round-to-nearest-even mode. - * - | float __logf ( float x ) - | Calculate the fast approximate base e logarithm of the input argument. + * - | ``double __drcp_rn(double x, double y)`` + | Returns ``1 / x`` in round-to-nearest-even mode. - * - | float __powf ( float x, float y ) - | Calculate the fast approximate of xy. + * - | ``double __dsqrt_rn(double x)`` + | Returns ``√x`` in round-to-nearest-even mode. - * - | float __sinf ( float x ) - | Calculate the fast approximate sine of the input argument. + * - | ``double __dsub_rn(double x, double y)`` + | Subtract two floating-point values in round-to-nearest-even mode. - * - | float __tanf ( float x ) - | Calculate the fast approximate tangent of the input argument. + * - | ``double __fma_rn(double x, double y, double z)`` + | Returns ``x × y + z`` as a single operation in round-to-nearest-even mode. - * - | double __dsqrt_rn ( double x ) - | Compute `√x` in round-to-nearest-even mode. Texture functions =============================================== @@ -1372,255 +1497,255 @@ HIP supports the following atomic operations. - **Supported in HIP** - **Supported in CUDA** - * - int atomicAdd(int* address, int val) + * - ``int atomicAdd(int* address, int val)`` - ✓ - ✓ - * - int atomicAdd_system(int* address, int val) + * - ``int atomicAdd_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicAdd(unsigned int* address,unsigned int val) + * - ``unsigned int atomicAdd(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicAdd_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicAdd_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicAdd(unsigned long long* address,unsigned long long val) + * - ``unsigned long long atomicAdd(unsigned long long* address,unsigned long long val)`` - ✓ - ✓ - * - unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) + * - ``unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val)`` - ✓ - ✓ - * - float atomicAdd(float* address, float val) + * - ``float atomicAdd(float* address, float val)`` - ✓ - ✓ - * - float atomicAdd_system(float* address, float val) + * - ``float atomicAdd_system(float* address, float val)`` - ✓ - ✓ - * - double atomicAdd(double* address, double val) + * - ``double atomicAdd(double* address, double val)`` - ✓ - ✓ - * - double atomicAdd_system(double* address, double val) + * - ``double atomicAdd_system(double* address, double val)`` - ✓ - ✓ - * - float unsafeAtomicAdd(float* address, float val) + * - ``float unsafeAtomicAdd(float* address, float val)`` - ✓ - ✗ - * - float safeAtomicAdd(float* address, float val) + * - ``float safeAtomicAdd(float* address, float val)`` - ✓ - ✗ - * - double unsafeAtomicAdd(double* address, double val) + * - ``double unsafeAtomicAdd(double* address, double val)`` - ✓ - ✗ - * - double safeAtomicAdd(double* address, double val) + * - ``double safeAtomicAdd(double* address, double val)`` - ✓ - ✗ - * - int atomicSub(int* address, int val) + * - ``int atomicSub(int* address, int val)`` - ✓ - ✓ - * - int atomicSub_system(int* address, int val) + * - ``int atomicSub_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicSub(unsigned int* address,unsigned int val) + * - ``unsigned int atomicSub(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicSub_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicSub_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - int atomicExch(int* address, int val) + * - ``int atomicExch(int* address, int val)`` - ✓ - ✓ - * - int atomicExch_system(int* address, int val) + * - ``int atomicExch_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicExch(unsigned int* address,unsigned int val) + * - ``unsigned int atomicExch(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicExch_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicExch_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicExch(unsigned long long int* address,unsigned long long int val) + * - ``unsigned long long atomicExch(unsigned long long int* address,unsigned long long int val)`` - ✓ - ✓ - * - unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) + * - ``unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val)`` - ✓ - ✓ - * - unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) + * - ``unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val)`` - ✓ - ✓ - * - float atomicExch(float* address, float val) + * - ``float atomicExch(float* address, float val)`` - ✓ - ✓ - * - int atomicMin(int* address, int val) + * - ``int atomicMin(int* address, int val)`` - ✓ - ✓ - * - int atomicMin_system(int* address, int val) + * - ``int atomicMin_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicMin(unsigned int* address,unsigned int val) + * - ``unsigned int atomicMin(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicMin_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicMin_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicMin(unsigned long long* address,unsigned long long val) + * - ``unsigned long long atomicMin(unsigned long long* address,unsigned long long val)`` - ✓ - ✓ - * - int atomicMax(int* address, int val) + * - ``int atomicMax(int* address, int val)`` - ✓ - ✓ - * - int atomicMax_system(int* address, int val) + * - ``int atomicMax_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicMax(unsigned int* address,unsigned int val) + * - ``unsigned int atomicMax(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicMax_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicMax_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicMax(unsigned long long* address,unsigned long long val) + * - ``unsigned long long atomicMax(unsigned long long* address,unsigned long long val)`` - ✓ - ✓ - * - unsigned int atomicInc(unsigned int* address) + * - ``unsigned int atomicInc(unsigned int* address)`` - ✗ - ✓ - * - unsigned int atomicDec(unsigned int* address) + * - ``unsigned int atomicDec(unsigned int* address)`` - ✗ - ✓ - * - int atomicCAS(int* address, int compare, int val) + * - ``int atomicCAS(int* address, int compare, int val)`` - ✓ - ✓ - * - int atomicCAS_system(int* address, int compare, int val) + * - ``int atomicCAS_system(int* address, int compare, int val)`` - ✓ - ✓ - * - unsigned int atomicCAS(unsigned int* address,unsigned int compare,unsigned int val) + * - ``unsigned int atomicCAS(unsigned int* address,unsigned int compare,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) + * - ``unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicCAS(unsigned long long* address,unsigned long long compare,unsigned long long val) + * - ``unsigned long long atomicCAS(unsigned long long* address,unsigned long long compare,unsigned long long val)`` - ✓ - ✓ - * - unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare, unsigned long long val) + * - ``unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare, unsigned long long val)`` - ✓ - ✓ - * - int atomicAnd(int* address, int val) + * - ``int atomicAnd(int* address, int val)`` - ✓ - ✓ - * - int atomicAnd_system(int* address, int val) + * - ``int atomicAnd_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicAnd(unsigned int* address,unsigned int val) + * - ``unsigned int atomicAnd(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicAnd_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicAnd_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicAnd(unsigned long long* address,unsigned long long val) + * - ``unsigned long long atomicAnd(unsigned long long* address,unsigned long long val)`` - ✓ - ✓ - * - unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) + * - ``unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val)`` - ✓ - ✓ - * - int atomicOr(int* address, int val) + * - ``int atomicOr(int* address, int val)`` - ✓ - ✓ - * - int atomicOr_system(int* address, int val) + * - ``int atomicOr_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicOr(unsigned int* address,unsigned int val) + * - ``unsigned int atomicOr(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicOr_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicOr_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicOr_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicOr_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicOr(unsigned long long int* address,unsigned long long val) + * - ``unsigned long long atomicOr(unsigned long long int* address,unsigned long long val)`` - ✓ - ✓ - * - unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) + * - ``unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val)`` - ✓ - ✓ - * - int atomicXor(int* address, int val) + * - ``int atomicXor(int* address, int val)`` - ✓ - ✓ - * - int atomicXor_system(int* address, int val) + * - ``int atomicXor_system(int* address, int val)`` - ✓ - ✓ - * - unsigned int atomicXor(unsigned int* address,unsigned int val) + * - ``unsigned int atomicXor(unsigned int* address,unsigned int val)`` - ✓ - ✓ - * - unsigned int atomicXor_system(unsigned int* address, unsigned int val) + * - ``unsigned int atomicXor_system(unsigned int* address, unsigned int val)`` - ✓ - ✓ - * - unsigned long long atomicXor(unsigned long long* address,unsigned long long val) + * - ``unsigned long long atomicXor(unsigned long long* address,unsigned long long val)`` - ✓ - ✓ - * - unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) + * - ``unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val)`` - ✓ - ✓ @@ -1826,103 +1951,103 @@ HIP supports the following kernel language cooperative groups types and function - **Supported in HIP** - **Supported in CUDA** - * - void thread_group.sync(); + * - ``void thread_group.sync();`` - ✓ - ✓ - * - unsigned thread_group.size(); + * - ``unsigned thread_group.size();`` - ✓ - ✓ - * - unsigned thread_group.thread_rank() + * - ``unsigned thread_group.thread_rank()`` - ✓ - ✓ - * - bool thread_group.is_valid(); + * - ``bool thread_group.is_valid();`` - ✓ - ✓ - * - grid_group this_grid() + * - ``grid_group this_grid()`` - ✓ - ✓ - * - void grid_group.sync() + * - ``void grid_group.sync()`` - ✓ - ✓ - * - unsigned grid_group.size() + * - ``unsigned grid_group.size()`` - ✓ - ✓ - * - unsigned grid_group.thread_rank() + * - ``unsigned grid_group.thread_rank()`` - ✓ - ✓ - * - bool grid_group.is_valid() + * - ``bool grid_group.is_valid()`` - ✓ - ✓ - * - multi_grid_group this_multi_grid() + * - ``multi_grid_group this_multi_grid()`` - ✓ - ✓ - * - void multi_grid_group.sync() + * - ``void multi_grid_group.sync()`` - ✓ - ✓ - * - unsigned multi_grid_group.size() + * - ``unsigned multi_grid_group.size()`` - ✓ - ✓ - * - unsigned multi_grid_group.thread_rank() + * - ``unsigned multi_grid_group.thread_rank()`` - ✓ - ✓ - * - bool multi_grid_group.is_valid() + * - ``bool multi_grid_group.is_valid()`` - ✓ - ✓ - * - unsigned multi_grid_group.num_grids() + * - ``unsigned multi_grid_group.num_grids()`` - ✓ - ✓ - * - unsigned multi_grid_group.grid_rank() + * - ``unsigned multi_grid_group.grid_rank()`` - ✓ - ✓ - * - thread_block this_thread_block() + * - ``thread_block this_thread_block()`` - ✓ - ✓ - * - multi_grid_group this_multi_grid() + * - ``multi_grid_group this_multi_grid()`` - ✓ - ✓ - * - void multi_grid_group.sync() + * - ``void multi_grid_group.sync()`` - ✓ - ✓ - * - void thread_block.sync() + * - ``void thread_block.sync()`` - ✓ - ✓ - * - unsigned thread_block.size() + * - ``unsigned thread_block.size()`` - ✓ - ✓ - * - unsigned thread_block.thread_rank() + * - ``unsigned thread_block.thread_rank()`` - ✓ - ✓ - * - bool thread_block.is_valid() + * - ``bool thread_block.is_valid()`` - ✓ - ✓ - * - dim3 thread_block.group_index() + * - ``dim3 thread_block.group_index()`` - ✓ - ✓ - * - dim3 thread_block.thread_index() + * - ``dim3 thread_block.thread_index()`` - ✓ - ✓ @@ -1940,23 +2065,23 @@ HIP does not support kernel language warp matrix types or functions. - **Supported in HIP** - **Supported in CUDA** - * - void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda) + * - ``void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda)`` - ✗ - ✓ - * - void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda, layout_t layout) + * - ``void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda, layout_t layout)`` - ✗ - ✓ - * - void store_matrix_sync(T* mptr, fragment<...> &a, unsigned lda, layout_t layout) + * - ``void store_matrix_sync(T* mptr, fragment<...> &a, unsigned lda, layout_t layout)`` - ✗ - ✓ - * - void fill_fragment(fragment<...> &a, const T &value) + * - ``void fill_fragment(fragment<...> &a, const T &value)`` - ✗ - ✓ - * - void mma_sync(fragment<...> &d, const fragment<...> &a, const fragment<...> &b, const fragment<...> &c , bool sat) + * - ``void mma_sync(fragment<...> &d, const fragment<...> &a, const fragment<...> &b, const fragment<...> &c , bool sat)`` - ✗ - ✓