From 2a08a205c35f74c249952504b7eabb29a3d242bf Mon Sep 17 00:00:00 2001
From: Istvan Kiss <neon60@gmail.com>
Date: Thu, 23 May 2024 15:49:41 +0200
Subject: [PATCH] Update the math functions documentation

- Function should be code in the rst table
- Fix single and double precision intrinsic functions
- OCML_BASIC_ROUNDED_OPERATIONS define macro is undefined by default
- Update integer intrinsic math functions
- PR findings
- Remove function duplication and rename math function variables
- Added missing math functions and updated description
- Minor fixes
---
 docs/reference/kernel_language.rst | 1181 +++++++++++++++-------------
 1 file changed, 653 insertions(+), 528 deletions(-)

diff --git a/docs/reference/kernel_language.rst b/docs/reference/kernel_language.rst
index 328d517042..ae87e9e11e 100644
--- a/docs/reference/kernel_language.rst
+++ b/docs/reference/kernel_language.rst
@@ -310,7 +310,6 @@ Synchronization functions
 The ``__syncthreads()`` built-in function is supported in HIP. The ``__syncthreads_count(int)``,
 ``__syncthreads_and(int)``, and ``__syncthreads_or(int)`` functions are under development.
 
-
 Math functions
 ====================================================
 
@@ -319,6 +318,7 @@ These are described in the following sections.
 
 Single precision mathematical functions
 --------------------------------------------------------------------------------------------
+
 Following is the list of supported single precision mathematical functions.
 
 .. list-table:: Single precision mathematical functions
@@ -327,979 +327,1104 @@ Following is the list of supported single precision mathematical functions.
       - **Supported on Host**
       - **Supported on Device**
 
-    * - | float acosf ( float  x ) 
-        | Calculate the arc cosine of the input argument.  
+    * - | ``float abs(float x)``
+        | Returns the absolute value of :math:`x`
       - ✓
       - ✓
 
-    * - | float acoshf ( float  x ) 
-        | Calculate the nonnegative arc hyperbolic cosine of the input argument.
+    * - | ``float acosf(float x)``
+        | Returns the arc cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float asinf ( float  x )
-        | Calculate the arc sine of the input argument
+    * - | ``float acoshf(float x)``
+        | Returns the nonnegative arc hyperbolic cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float asinhf ( float  x )
-        | Calculate the arc hyperbolic sine of the input argument.
+    * - | ``float asinf(float x)``
+        | Returns the arc sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float atan2f ( float  y, float  x ) 
-        | Calculate the arc tangent of the ratio of first and second input arguments.
+    * - | ``float asinhf(float x)``
+        | Returns the arc hyperbolic sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float atanf ( float  x )
-        | Calculate the arc tangent of the input argument.
+    * - | ``float atanf(float x)``
+        | Returns the arc tangent of :math:`x`.
       - ✓
       - ✓
 
-    * - | float atanhf ( float  x )
-        | Calculate the arc hyperbolic tangent of the input argument.
+    * - | ``float atan2f(float x, float y)``
+        | Returns the arc tangent of the ratio of :math:`x` and :math:`y`.
       - ✓
       - ✓
 
-    * - | float cbrtf ( float  x )
-        | Calculate the cube root of the input argument.
+    * - | ``float atanhf(float x)``
+        | Returns the arc hyperbolic tangent of :math:`x`.
       - ✓
       - ✓
 
-    * - | float ceilf ( float  x )
-        | Calculate ceiling of the input argument.
+    * - | ``float cbrtf(float x)``
+        | Returns the cube root of :math:`x`.
       - ✓
       - ✓
 
-    * - | float copysignf ( float  x, float  y )
-        | Create value with given magnitude, copying sign of second value.
+    * - | ``float ceilf(float x)``
+        | Returns ceiling of :math:`x`.
       - ✓
       - ✓
 
-    * - | float copysignf ( float  x, float  y ) 
+    * - | ``float copysignf(float x, float y)``
         | Create value with given magnitude, copying sign of second value.
       - ✓
       - ✓
 
-    * - | float cosf ( float  x )
-        | Calculate the cosine of the input argument.
+    * - | ``float cosf(float x)``
+        | Returns the cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float coshf ( float  x )
-        | Calculate the hyperbolic cosine of the input argument.
-      - ✓
-      - ✓
-    * - | float erfcf ( float  x )
-        | Calculate the complementary error function of the input argument.
+    * - | ``float coshf(float x)``
+        | Returns the hyperbolic cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float erff ( float  x )
-        | Calculate the error function of the input argument.
+    * - | ``float cospif(float x)``
+        | Returns the cosine of :math:`\pi \cdot x`.
       - ✓
       - ✓
 
-    * - | float exp10f ( float  x ) 
-        | Calculate the base 10 exponential of the input argument.
+    * - | ``float cyl_bessel_i0f(float x)``
+        | Returns the value of the regular modified cylindrical Bessel function of order 0 for :math:`x`.
+      - ✗
+      - ✗
+
+    * - | ``float cyl_bessel_i1f(float x)``
+        | Returns the value of the regular modified cylindrical Bessel function of order 1 for :math:`x`.
+      - ✗
+      - ✗
+
+    * - | ``float erff(float x)``
+        | Returns the error function of :math:`x`.
       - ✓
       - ✓
 
-    * - | float exp2f ( float  x ) 
-        | Calculate the base 2 exponential of the input argument.
+    * - | ``float erfcf(float x)``
+        | Returns the complementary error function of :math:`x`.
       - ✓
       - ✓
-  
-    * - | float expf ( float  x ) 
-        | Calculate the base e exponential of the input argument.
+
+    * - | ``float erfcinvf(float x)``
+        | Returns the inverse complementary function of :math:`x`.
       - ✓
       - ✓
 
-    * - | float expm1f ( float  x )
-        | Calculate the base e exponential of the input argument, minus 1.
+    * - | ``float erfcxf(float x)``
+        | Returns the scaled complementary error function of :math:`x`.
       - ✓
       - ✓
 
-    * - | float fabsf ( float  x ) 
-        | Calculate the absolute value of its argument. 
+    * - | ``float erfinvf(float x)``
+        | Returns the inverse error function of :math:`x`.
       - ✓
       - ✓
- 
-    * - | float fdimf ( float  x, float  y )
-        | Compute the positive difference between `x` and `y`.
+
+    * - | ``float expf(float x)``
+        | Returns :math:`e^x`.
       - ✓
       - ✓
 
-    * - | float floorf ( float  x )
-        | Calculate the largest integer less than or equal to `x`.
+    * - | ``float exp10f(float x)``
+        | Returns :math:`10^x`.
       - ✓
       - ✓
 
-    * - | float fmaf ( float  x, float  y, float  z )
-        | Compute `x × y + z` as a single operation.
+    * - | ``float exp2f( float x)``
+        | Returns :math:`2^x`.
       - ✓
       - ✓
 
-    * - | float fmaxf ( float  x, float  y )
-        | Determine the maximum numeric value of the arguments.
+    * - | ``float expm1f(float x)``
+        | Returns :math:`ln(x - 1)`
       - ✓
       - ✓
 
-    * - | float fminf ( float  x, float  y )
-        | Determine the minimum numeric value of the arguments.
+    * - | ``float fabsf(float x)``
+        | Returns the absolute value of `x`
       - ✓
       - ✓
-    
-    * - | float fmodf ( float  x, float  y )
-        | Calculate the floating-point remainder of `x / y`.
+ 
+    * - | ``float fdimf(float x, float y)``
+        | Returns the positive difference between :math:`x` and :math:`y`.
       - ✓
       - ✓
 
-    * - | float frexpf ( float  x, int* nptr )
-        | Extract mantissa and exponent of a floating-point value.
+    * - | ``float fdividef(float x, float y)``
+        | Divide two floating point values.
+      - ✓
       - ✓
-      - ✗
 
-    * - | float hypotf ( float  x, float  y )
-        | Calculate the square root of the sum of squares of two arguments.
+    * - | ``float floorf(float x)``
+        | Returns the largest integer less than or equal to :math:`x`.
       - ✓
       - ✓
 
-    * - | int ilogbf ( float  x )
-        | Compute the unbiased integer exponent of the argument.
+    * - | ``float fmaf(float x, float y, float z)``
+        | Returns :math:`x \cdot y + z` as a single operation.
       - ✓
       - ✓
 
-    * - | __RETURN_TYPE isfinite ( float  a )
-        | Determine whether argument is finite.
+    * - | ``float fmaxf(float x, float y)``
+        | Determine the maximum numeric value of :math:`x` and :math:`y`.
       - ✓
       - ✓
 
-    * - | __RETURN_TYPE isinf ( float  a )
-        | Determine whether argument is infinite.
+    * - | ``float fminf(float x, float y)``
+        | Determine the minimum numeric value of :math:`x` and :math:`y`.
       - ✓
       - ✓
-
-    * - | __RETURN_TYPE isnan ( float  a )
-        | Determine whether argument is a NaN.
+    
+    * - | ``float fmodf(float x, float y)``
+        | Returns the floating-point remainder of :math:`x / y`.
       - ✓
       - ✓
 
-    * - | float ldexpf ( float  x, int  exp )
-        | Calculate the value of x ⋅ 2 of the exponent of the input argument.
+    * - | ``float modff(float x, float* iptr)``
+        | Break down :math:`x` into fractional and integral parts.
       - ✓
+      - ✗ 
+
+    * - | ``float frexpf(float x, int* nptr)``
+        | Extract mantissa and exponent of :math:`x`.
       - ✓
+      - ✗
 
-    * - | loat log10f ( float  x )
-        | Calculate the base 10 logarithm of the input argument.
+    * - | ``float hypotf(float x, float y)``
+        | Returns the square root of the sum of squares of :math:`x` and :math:`y`.
       - ✓
       - ✓
 
-    * - | float log1pf ( float  x )
-        | Calculate the value of the exponent of the input argument
+    * - | ``int ilogbf(float x)``
+        | Returns the unbiased integer exponent of :math:`x`.
       - ✓
       - ✓
 
-    * - | float logbf ( float  x )
-        | Calculate the floating point representation of the exponent of the input argument.
+    * - | ``bool isfinite(float x)``
+        | Determine whether :math:`x` is finite.
       - ✓
       - ✓
-    
-    * - | float log2f ( float  x )
-        | Calculate the base 2 logarithm of the input argument.
+
+    * - | ``bool isinf(float x)``
+        | Determine whether :math:`x` is infinite.
       - ✓
       - ✓
 
-    * - | float logf ( float  x )
-        | Calculate the natural logarithm of the input argument.
+    * - | ``bool isnan(float x)``
+        | Determine whether :math:`x` is a ``NAN``.
       - ✓
       - ✓
 
-    * - | float modff ( float  x, float* iptr )
-        | Break down the input argument into fractional and integral parts.
+    * - | ``float j0f(float x)``
+        | Returns the value of the Bessel function of the first kind of order 0 for :math:`x`.
       - ✓
-      - ✗ 
-
-    * - | float nanf ( const char* tagp )
-        | Returns "Not a Number" value.
-      - ✗ 
       - ✓
 
-    * - | float nearbyintf ( float  x )
-        | Round the input argument to the nearest integer.
+    * - | ``float j1f(float x)``
+        | Returns the value of the Bessel function of the first kind of order 1 for :math:`x`.
       - ✓
       - ✓
 
-    * - | float powf ( float  x, float  y )
-        | Calculate the value of first argument to the power of second argument.
+    * - | ``float jnf(int n, float x)``
+        | Returns the value of the Bessel function of the first kind of order n for :math:`x`.
       - ✓
       - ✓
 
-    * - | float remainderf ( float  x, float  y )
-        | Compute single-precision floating-point remainder.
+    * - | ``float ldexpf(float x, int exp)``
+        | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`.
       - ✓
       - ✓
 
-    * - | float remquof ( float  x, float  y, int* quo )
-        | Compute single-precision floating-point remainder and part of quotient.
+    * - | ``float lgammaf(float x)``
+        | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`.
       - ✓
-      - ✗ 
+      - ✗
 
-    * - | float roundf ( float  x )
-        | Round to nearest integer value in floating-point.
+    * - | ``long int lrintf(float x)``
+        | Round :math:`x` to nearest integer value.
       - ✓
       - ✓
 
-    * - | float scalbnf ( float  x, int  n )
-        | Scale floating-point input by integer power of two.
+    * - | ``long long int llrintf(float x)``
+        | Round :math:`x` to nearest integer value.
       - ✓
       - ✓
 
-    * - | __RETURN_TYPE signbit ( float  a )
-        | Return the sign bit of the input.
+    * - | ``long int lroundf(float x)``
+        | Round to nearest integer value.
       - ✓
       - ✓
 
-    * - | void sincosf ( float  x, float* sptr, float* cptr )
-        | Calculate the sine and cosine of the first input argument.
+    * - | ``long long int llroundf(float x)``
+        | Round to nearest integer value.
+      - ✓
       - ✓
-      - ✗ 
 
-    * - | float sinf ( float  x )
-        | Calculate the sine of the input argument.
+    * - | ``float log10f(float x)``
+        | Returns the base 10 logarithm of :math:`x`.
       - ✓
       - ✓
 
-    * - | float sinhf ( float  x )
-        | Calculate the hyperbolic sine of the input argument.
+    * - | ``float log1pf(float x)``
+        | Returns the natural logarithm of :math:`x + 1`.
       - ✓
       - ✓
-
-    * - | float sqrtf ( float  x )
-        | Calculate the square root of the input argument.
+    
+    * - | ``float log2f(float x)``
+        | Returns the base 2 logarithm of :math:`x`.
       - ✓
       - ✓
 
-    * - | float tanf ( float  x )
-        | Calculate the tangent of the input argument.
+    * - | ``float logf(float x)``
+        | Returns the natural logarithm of :math:`x`.
       - ✓
       - ✓
 
-    * - | float tanhf ( float  x )
-        | Calculate the hyperbolic tangent of the input argument.
+    * - | ``float logbf(float x)``
+        | Returns the floating point representation of the exponent of :math:`x`.
       - ✓
       - ✓
 
-    * - | float truncf ( float  x )
-        | Truncate input argument to the integral part.
-      - ✓
+    * - | ``float nanf(const char* tagp)``
+        | Returns "Not a Number" value.
+      - ✗ 
       - ✓
 
-    * - | float tgammaf ( float  x )
-        | Calculate the gamma function of the input argument.
+    * - | ``float nearbyintf(float x)``
+        | Round :math:`x` to the nearest integer.
       - ✓
       - ✓
 
-    * - | float erfcinvf ( float  y )
-        | Calculate the inverse complementary function of the input argument.
-      - ✓
+    * - | ``float nextafterf(float x, float y)``
+        | Returns next representable single-precision floating-point value after argument.
       - ✓
+      - ✗
 
-    * - | float erfcxf ( float  x )
-        | Calculate the scaled complementary error function of the input argument.
+    * - | ``float norm3df(float x, float y, float z)``
+        | Returns the square root of the sum of squares of :math:`x`, :math:`y` and :math:`z`.
       - ✓
       - ✓
 
-    * - | float erfinvf ( float  y )
-        | Calculate the inverse error function of the input argument.
+    * - | ``float norm4df(float x, float y, float z, float w)``
+        | Returns the square root of the sum of squares of :math:`x`, :math:`y`, :math:`z` and :math:`w`.
       - ✓
       - ✓
- 
-    * - | float fdividef ( float x, float  y )
-        | Divide two floating point values.
+
+    * - | ``float normcdff(float y)``
+        | Returns the standard normal cumulative distribution function.
       - ✓
       - ✓
 
-    * - | float frexpf ( float  x, `int *nptr` )
-        | Extract mantissa and exponent of a floating-point value. 
+    * - | ``float normcdfinvf(float y)``
+        | Returns the inverse of the standard normal cumulative distribution function.
       - ✓
       - ✓
 
-    * - | float j0f ( float  x )
-        | Calculate the value of the Bessel function of the first kind of order 0 for the input argument.
+    * - | ``float normf(int dim, const float *a)``
+        | Returns the square root of the sum of squares of any number of coordinates.
       - ✓
       - ✓
 
-    * - | float j1f ( float  x )
-        | Calculate the value of the Bessel function of the first kind of order 1 for the input argument.
+    * - | ``float powf(float x, float y)``
+        | Returns :math:`x^y`.
       - ✓
       - ✓
 
-    * - | float jnf ( int n, float  x )
-        | Calculate the value of the Bessel function of the first kind of order n for the input argument.
+    * - | ``float powif(float base, int iexp)``
+        | Returns the value of first argument to the power of second argument.
       - ✓
       - ✓
 
-    * - | float lgammaf ( float  x )
-        | Calculate the natural logarithm of the absolute value of the gamma function of the input argument.
+    * - | ``float remainderf(float x, float y)``
+        | Returns single-precision floating-point remainder.
       - ✓
       - ✓
 
-    * - | long long int llrintf ( float  x )
-        | Round input to nearest integer value.
-      - ✓
+    * - | ``float remquof(float x, float y, int* quo)``
+        | Returns single-precision floating-point remainder and part of quotient.
       - ✓
+      - ✓ 
 
-    * - | long long int llroundf ( float  x )
-        | Round to nearest integer value.
+    * - | ``float roundf(float x)``
+        | Round to nearest integer value in floating-point.
       - ✓
       - ✓
 
-    * - | long int lrintf ( float  x )
-        | Round input to nearest integer value.
+    * - | ``float rcbrtf(float x)``
+        | Returns the reciprocal cube root function.
       - ✓
       - ✓
 
-    * - | long int lroundf ( float  x )
-        | Round to nearest integer value.
+    * - | ``float rhypotf(float x, float y)``
+        | Returns one over the square root of the sum of squares of two arguments.
       - ✓
       - ✓
 
-    * - | float modff ( float  x, `float *iptr` )
-        | Break down the input argument into fractional and integral parts.
+    * - | ``float rintf(float x)``
+        | Round input to nearest integer value in floating-point.
       - ✓
       - ✓
-
-    * - | float nextafterf ( float  x, float y )
-        | Returns next representable single-precision floating-point value after argument.
+ 
+    * - | ``float rnorm3df(float x, float y, float z)``
+        | Returns one over the square root of the sum of squares of three coordinates of the argument.
       - ✓
       - ✓
 
-    * - | float norm3df ( float  a, float b, float c )
-        | Calculate the square root of the sum of squares of three coordinates of the argument.
+    * - | ``float rnorm4df(float x, float y, float z, float w)``
+        | Returns one over the square root of the sum of squares of four coordinates of the argument.
       - ✓
       - ✓
 
-    * - | float norm4df ( float  a, float b, float c, float d )
-        | Calculate the square root of the sum of squares of four coordinates of the argument.
+    * - | ``float rnormf(int dim, const float *a)``
+        | Returns the reciprocal of square root of the sum of squares of any number of coordinates.
       - ✓
       - ✓
 
-    * - | loat normcdff ( float  y )
-        | Calculate the standard normal cumulative distribution function.
+    * - | ``float scalblnf(float x, long int n)``
+        | Scale :math:`x` by :math:`2^n`.
       - ✓
       - ✓
 
-    * - | float normcdfinvf ( float  y )
-        | Calculate the inverse of the standard normal cumulative distribution function.
+    * - | ``float scalbnf(float x, int  n)``
+        | Scale :math:`x` by :math:`2^n`.
       - ✓
       - ✓
 
-    * - | float normf ( int dim, `const float *a` )
-        | Calculate the square root of the sum of squares of any number of coordinates.
+    * - | ``bool signbit(float x)``
+        | Return the sign bit of :math:`x`.
       - ✓
       - ✓
 
-    * - | float rcbrtf ( float x )
-        | Calculate the reciprocal cube root function.
+    * - | ``float sinf(float x)``
+        | Returns the sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float remquof ( float x, float y, `int *quo` )
-        | Compute single-precision floating-point remainder and part of quotient.
+    * - | ``float sinhf(float x)``
+        | Returns the hyperbolic sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | float rhypotf ( float x, float y )
-        | Calculate one over the square root of the sum of squares of two arguments.
+    * - | ``float sinpif(float x)``
+        | Returns the hyperbolic sine of :math:`\pi \cdot x`.
       - ✓
       - ✓
 
-    * - | float rintf ( float x )
-        | Round input to nearest integer value in floating-point.
+    * - | ``void sincosf(float x, float *sptr, float *cptr)``
+        | Returns the sine and cosine of :math:`x`.
       - ✓
       - ✓
- 
-    * - | float rnorm3df ( float  a, float b, float c )
-        | Calculate one over the square root of the sum of squares of three coordinates of the argument.
+
+    * - | ``void sincospif(float x, float *sptr, float *cptr)``
+        | Returns the sine and cosine of :math:`\pi \cdot x`.
       - ✓
       - ✓
 
-    * - | float rnorm4df ( float  a, float b, float c, float d )
-        | Calculate one over the square root of the sum of squares of four coordinates of the argument.
+    * - | ``float sqrtf(float x)``
+        | Returns the square root of :math:`x`.
       - ✓
       - ✓
 
-    * - | float rnormf ( int dim, `const float *a` )
-        | Calculate the reciprocal of square root of the sum of squares of any number of coordinates.
+    * - | ``float rsqrtf(float x)``
+        | Returns the reciprocal of the square root of :math:`x`.
+      - ✗
+      - ✓
+
+    * - | ``float tanf(float x)``
+        | Returns the tangent of :math:`x`.
       - ✓
       - ✓
 
-    * - | float scalblnf ( float x, long int n )
-        | Scale floating-point input by integer power of two.
+    * - | ``float tanhf(float x)``
+        | Returns the hyperbolic tangent of :math:`x`.
       - ✓
       - ✓
-  
-    * - | void sincosf ( float x, `float *sptr`, `float *cptr`)
-        | Calculate the sine and cosine of the first input argument.
+
+    * - | ``float tgammaf(float x)``
+        | Returns the gamma function of :math:`x`.
       - ✓
       - ✓
 
-    * - | void sincospif ( float x, `float *sptr`, `float *cptr`)
-        | Calculate the sine and cosine of the first input argument multiplied by PI.
+    * - | ``float truncf(float x)``
+        | Truncate :math:`x` to the integral part.
       - ✓
       - ✓
     
-    * - | float y0f ( float  x )
-        | Calculate the value of the Bessel function of the second kind of order 0 for the input argument.
+    * - | ``float y0f(float x)``
+        | Returns the value of the Bessel function of the second kind of order 0 for :math:`x`.
       - ✓
       - ✓
 
-    * - | float y1f ( float  x )
-        | Calculate the value of the Bessel function of the second kind of order 1 for the input argument.
+    * - | ``float y1f(float x)``
+        | Returns the value of the Bessel function of the second kind of order 1 for :math:`x`.
       - ✓
       - ✓
 
-    * - | float ynf ( int n, float  x )
-        | Calculate the value of the Bessel function of the second kind of order n for the input argument.
+    * - | ``float ynf(int n, float x)``
+        | Returns the value of the Bessel function of the second kind of order n for :math:`x`.
       - ✓
       - ✓
 
 Double precision mathematical functions
 --------------------------------------------------------------------------------------------
+
 Following is the list of supported  double precision mathematical functions.
 
-.. list-table:: Single precision mathematical functions
+.. list-table:: Double precision mathematical functions
 
     * - **Function**
       - **Supported on Host**
       - **Supported on Device**
 
-    * - | double acos ( double  x )
-        | Calculate the arc cosine of the input argument.
+    * - | ``double abs(double x)``
+        | Returns the absolute value of :math:`x`
       - ✓
       - ✓
 
-    * - | double acosh ( double  x )
-        | Calculate the nonnegative arc hyperbolic cosine of the input argument.
+    * - | ``double acos(double x)``
+        | Returns the arc cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double asin ( double  x )
-        | Calculate the arc sine of the input argument.
+    * - | ``double acosh(double x)``
+        | Returns the nonnegative arc hyperbolic cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double asinh ( double  x )
-        | Calculate the arc hyperbolic sine of the input argument.
+    * - | ``double asin(double x)``
+        | Returns the arc sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double atan ( double  x )
-        | Calculate the arc tangent of the input argument.
+    * - | ``double asinh(double x)``
+        | Returns the arc hyperbolic sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double atan2 ( double  y, double  x )
-        | Calculate the arc tangent of the ratio of first and second input arguments.
+    * - | ``double atan(double x)``
+        | Returns the arc tangent of :math:`x`.
       - ✓
       - ✓
 
-    * - | double atanh ( double  x )
-        | Calculate the arc hyperbolic tangent of the input argument.
+    * - | ``double atan2(double x, double y)``
+        | Returns the arc tangent of the ratio of :math:`x` and :math:`y`.
       - ✓
       - ✓
 
-    * - | double cbrt ( double  x )
-        | Calculate the cube root of the input argument.
+    * - | ``double atanh(double x)``
+        | Returns the arc hyperbolic tangent of :math:`x`.
       - ✓
       - ✓
 
-    * - | double ceil ( double  x )
-        | Calculate ceiling of the input argument.
+    * - | ``double cbrt(double x)``
+        | Returns the cube root of :math:`x`.
       - ✓
       - ✓
 
-    * - | double copysign ( double  x, double  y )
-        | Create value with given magnitude, copying sign of second value.
+    * - | ``double ceil(double x)``
+        | Returns ceiling of :math:`x`.
       - ✓
       - ✓
 
-    * - | double cos ( double  x )
-        | Calculate the cosine of the input argument.
+    * - | ``double copysign(double x, double y)``
+        | Create value with given magnitude, copying sign of second value.
       - ✓
       - ✓
 
-    * - | double cosh ( double  x )
-        | Calculate the hyperbolic cosine of the input argument.
+    * - | ``double cos(double x)``
+        | Returns the cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double erf ( double  x )
-        | Calculate the error function of the input argument.
+    * - | ``double cosh(double x)``
+        | Returns the hyperbolic cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double erfc ( double  x )
-        | Calculate the complementary error function of the input argument.
+    * - | ``double cospi(double x)``
+        | Returns the cosine of :math:`\pi \cdot x`.
       - ✓
       - ✓
 
-    * - | double exp ( double  x )
-        | Calculate the base e exponential of the input argument.
-      - ✓
-      - ✓
+    * - | ``double cyl_bessel_i0(double x)``
+        | Returns the value of the regular modified cylindrical Bessel function of order 0 for :math:`x`.
+      - ✗
+      - ✗
 
-    * - | double exp10 ( double  x )
-        | Calculate the base 10 exponential of the input argument.
+    * - | ``double cyl_bessel_i1(double x)``
+        | Returns the value of the regular modified cylindrical Bessel function of order 1 for :math:`x`.
+      - ✗
+      - ✗
+
+    * - | ``double erf(double x)``
+        | Returns the error function of :math:`x`.
       - ✓
       - ✓
 
-    * - | double exp2 ( double  x )
-        | Calculate the base 2 exponential of the input argument.
+    * - | ``double erfc(double x)``
+        | Returns the complementary error function of :math:`x`.
       - ✓
       - ✓
 
-    * - | double expm1 ( double  x )
-        | Calculate the base e exponential of the input argument, minus 1.
+    * - | ``double erfcinv(double x)``
+        | Returns the inverse complementary function of :math:`x`.
       - ✓
       - ✓
 
-    * - | double fabs ( double  x )
-        | Calculate the absolute value of the input argument.
+    * - | ``double erfcx(double x)``
+        | Returns the scaled complementary error function of :math:`x`.
       - ✓
       - ✓
 
-    * - | double fdim ( double  x, double  y )
-        | Compute the positive difference between `x` and `y`.
+    * - | ``double erfinv(double x)``
+        | Returns the inverse error function of :math:`x`.
       - ✓
       - ✓
 
-    * - | double floor ( double  x )
-        | Calculate the largest integer less than or equal to `x`.
+    * - | ``double exp(double x)``
+        | Returns :math:`e^x`.
       - ✓
       - ✓
 
-    * - | double fma ( double  x, double  y, double  z )
-        | Compute `x × y + z` as a single operation.
+    * - | ``double exp10(double x)``
+        | Returns :math:`10^x`.
       - ✓
       - ✓
 
-    * - | double fmax ( double , double )
-        | Determine the maximum numeric value of the arguments.
+    * - | ``double exp2( double x)``
+        | Returns :math:`2^x`.
       - ✓
       - ✓
-
-    * - | double fmin ( double  x, double  y )
-        | Determine the minimum numeric value of the arguments.
+  
+    * - | ``double expm1(double x)``
+        | Returns :math:`ln(x - 1)`
       - ✓
       - ✓
 
-    * - | double fmod ( double  x, double  y )
-        | Calculate the floating-point remainder of `x / y`.
+    * - | ``double fabs(double x)``
+        | Returns the absolute value of `x`
       - ✓
       - ✓
-
-    * - | double frexp ( double  x, int* nptr )
-        | Extract mantissa and exponent of a floating-point value.
+ 
+    * - | ``double fdim(double x, double y)``
+        | Returns the positive difference between :math:`x` and :math:`y`.
+      - ✓
       - ✓
-      - ✗
 
-    * - | double hypot ( double  x, double  y )
-        | Calculate the square root of the sum of squares of two arguments.
+    * - | ``double floor(double x)``
+        | Returns the largest integer less than or equal to :math:`x`.
       - ✓
       - ✓
 
-    * - | int ilogb ( double  x )
-        | Compute the unbiased integer exponent of the argument.
+    * - | ``double fma(double x, double y, double z)``
+        | Returns :math:`x \cdot y + z` as a single operation.
       - ✓
       - ✓
 
-    * - | __RETURN_TYPE isfinite ( double  a )
-        | Determine whether argument is finite.
+    * - | ``double fmax(double x, double y)``
+        | Determine the maximum numeric value of :math:`x` and :math:`y`.
       - ✓
       - ✓
 
-    * - | __RETURN_TYPE isinf ( double  a )
-        | Determine whether argument is infinite.
+    * - | ``double fmin(double x, double y)``
+        | Determine the minimum numeric value of :math:`x` and :math:`y`.
       - ✓
       - ✓
-
-    * - | __RETURN_TYPE isnan ( double  a )
-        | Determine whether argument is a NaN.
+    
+    * - | ``double fmod(double x, double y)``
+        | Returns the floating-point remainder of :math:`x / y`.
       - ✓
       - ✓
 
-    * - | double ldexp ( double  x, int  exp )
-        | Calculate the value of x ⋅ 2 exp.
+    * - | ``double modf(double x, double* iptr)``
+        | Break down :math:`x` into fractional and integral parts.
       - ✓
+      - ✗ 
+
+    * - | ``double frexp(double x, int* nptr)``
+        | Extract mantissa and exponent of :math:`x`.
       - ✓
+      - ✗
 
-    * - | double log ( double  x )
-        | Calculate the base e logarithm of the input argument.
+    * - | ``double hypot(double x, double y)``
+        | Returns the square root of the sum of squares of :math:`x` and :math:`y`.
       - ✓
       - ✓
 
-    * - | double log10 ( double  x )
-        | Calculate the base 10 logarithm of the input argument.
+    * - | ``int ilogb(double x)``
+        | Returns the unbiased integer exponent of :math:`x`.
       - ✓
       - ✓
 
-    * - | double log1p ( double  x )
-        | Calculate the value of logarithm of exp ( 1 + x ).
+    * - | ``bool isfinite(double x)``
+        | Determine whether :math:`x` is finite.
       - ✓
       - ✓
 
-    * - | double log2 ( double  x )
-        | Calculate the base 2 logarithm of the input argument.
+    * - | ``bool isin(double x)``
+        | Determine whether :math:`x` is infinite.
       - ✓
       - ✓
 
-    * - | double logb ( double  x )
-        | Calculate the floating point representation of the exponent of the input argument.
+    * - | ``bool isnan(double x)``
+        | Determine whether :math:`x` is a ``NAN``.
       - ✓
       - ✓
 
-    * - | double modf ( double  x, `double* iptr` )
-        | Break down the input argument into fractional and integral parts.
+    * - | ``double j0(double x)``
+        | Returns the value of the Bessel function of the first kind of order 0 for :math:`x`.
       - ✓
-      - ✗
- 
-    * - | double nan ( const `char* tagp`)
-        | Returns ``Not a Number`` value.
-      - ✗
       - ✓
 
-    * - | double nearbyint ( double  x )
-        | Round the input argument to the nearest integer.
+    * - | ``double j1(double x)``
+        | Returns the value of the Bessel function of the first kind of order 1 for :math:`x`.
       - ✓
       - ✓
 
-    * - | double pow ( double  x, double  y )
-        | Calculate the value of first argument to the power of second argument.
+    * - | ``double jn(int n, double x)``
+        | Returns the value of the Bessel function of the first kind of order n for :math:`x`.
       - ✓
       - ✓
 
-    * - | double remainder ( double  x, double  y )
-        | Compute double-precision floating-point remainder.
+    * - | ``double ldexp(double x, int exp)``
+        | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`.
       - ✓
       - ✓
 
-    * - | double remquo ( double  x, double  y, `int* quo` )
-        | Compute double-precision floating-point remainder and part of quotient.
+    * - | ``double lgamma(double x)``
+        | Returns the natural logarithm of the absolute value of the gamma function of :math:`x`.
       - ✓
       - ✗
- 
-    * - | double round ( double  x )
-        | Round to nearest integer value in floating-point.
+
+    * - | ``long int lrint(double x)``
+        | Round :math:`x` to nearest integer value.
       - ✓
       - ✓
 
-    * - | double scalbn ( double  x, int  n )
-        | Scale floating-point input by integer power of two.
+    * - | ``long long int llrint(double x)``
+        | Round :math:`x` to nearest integer value.
       - ✓
       - ✓
- 
-    * - | __RETURN_TYPE signbit ( double  a )
-        | Return the sign bit of the input.
+
+    * - | ``long int lround(double x)``
+        | Round to nearest integer value.
       - ✓
       - ✓
 
-    * - | double sin ( double  x )
-        | Calculate the sine of the input argument.
+    * - | ``long long int llround(double x)``
+        | Round to nearest integer value.
       - ✓
       - ✓
 
-    * - | void sincos ( double  x, `double* sptr`, `double* cptr` )
-        | Calculate the sine and cosine of the first input argument.
-      - ✓
-      - ✗
- 
-    * - | double sinh ( double  x )
-        | Calculate the hyperbolic sine of the input argument.
+    * - | ``double log10(double x)``
+        | Returns the base 10 logarithm of :math:`x`.
       - ✓
       - ✓
 
-    * - | double sqrt ( double  x )
-        | Calculate the square root of the input argument.
+    * - | ``double log1p(double x)``
+        | Returns the natural logarithm of :math:`x + 1`.
       - ✓
       - ✓
-
-    * - | double tan ( double  x )
-        | Calculate the tangent of the input argument.
+    
+    * - | ``double log2(double x)``
+        | Returns the base 2 logarithm of :math:`x`.
       - ✓
       - ✓
 
-    * - | double tanh ( double  x )
-        | Calculate the hyperbolic tangent of the input argument.
+    * - | ``double log(double x)``
+        | Returns the natural logarithm of :math:`x`.
       - ✓
       - ✓
 
-    * - | double tgamma ( double  x )
-        | Calculate the gamma function of the input argument.
+    * - | ``double logb(double x)``
+        | Returns the floating point representation of the exponent of :math:`x`.
       - ✓
       - ✓
 
-    * - | double trunc ( double  x )
-        | Truncate input argument to the integral part.
+    * - | ``double nan(const char* tagp)``
+        | Returns "Not a Number" value.
+      - ✗ 
+      - ✓
+
+    * - | ``double nearbyint(double x)``
+        | Round :math:`x` to the nearest integer.
       - ✓
       - ✓
 
-    * - | double erfcinv ( double  y )
-        | Calculate the inverse complementary function of the input argument.
+    * - | ``double nextafter(double x, double y)``
+        | Returns next representable double-precision floating-point value after argument.
       - ✓
       - ✓
 
-    * - | double erfcx ( double  x )
-        | Calculate the scaled complementary error function of the input argument.
+    * - | ``double norm3d(double x, double y, double z)``
+        | Returns the square root of the sum of squares of :math:`x`, :math:`y` and :math:`z`.
       - ✓
       - ✓
 
-    * - | double erfinv ( double  y )
-        | Calculate the inverse error function of the input argument.
+    * - | ``double norm4d(double x, double y, double z, double w)``
+        | Returns the square root of the sum of squares of :math:`x`, :math:`y`, :math:`z` and :math:`w`.
       - ✓
       - ✓
-      
-    * - | double frexp ( float  x, `int *nptr` )
-        | Extract mantissa and exponent of a floating-point value.
+
+    * - | ``double normcdf(double y)``
+        | Returns the standard normal cumulative distribution function.
       - ✓
       - ✓
 
-    * - | double j0 ( double  x )
-        | Calculate the value of the Bessel function of the first kind of order 0 for the input argument.
+    * - | ``double normcdfinv(double y)``
+        | Returns the inverse of the standard normal cumulative distribution function.
       - ✓
       - ✓
 
-    * - | double j1 ( double  x )
-        | Calculate the value of the Bessel function of the first kind of order 1 for the input argument.
+    * - | ``double norm(int dim, const double *a)``
+        | Returns the square root of the sum of squares of any number of coordinates.
       - ✓
       - ✓
 
-    * - | double jn ( int n, double  x )
-        | Calculate the value of the Bessel function of the first kind of order n for the input argument.
+    * - | ``double pow(double x, double y)``
+        | Returns :math:`x^y`.
       - ✓
       - ✓
 
-    * - | double lgamma ( double  x )
-        | Calculate the natural logarithm of the absolute value of the gamma function of the input argument.
+    * - | ``double powi(double base, int iexp)``
+        | Returns the value of first argument to the power of second argument.
       - ✓
       - ✓
 
-    * - | long long int llrint ( double  x )
-        | Round input to nearest integer value.
+    * - | ``double remainder(double x, double y)``
+        | Returns double-precision floating-point remainder.
       - ✓
       - ✓
 
+    * - | ``double remquo(double x, double y, int* quo)``
+        | Returns double-precision floating-point remainder and part of quotient.
+      - ✓
+      - ✗ 
 
-    * - | long long int llround ( double  x )
-        | Round to nearest integer value.
+    * - | ``double round(double x)``
+        | Round to nearest integer value in floating-point.
       - ✓
       - ✓
 
-    * - | long int lrint ( double  x )
-        | Round input to nearest integer value.
+    * - | ``double rcbrt(double x)``
+        | Returns the reciprocal cube root function.
       - ✓
       - ✓
 
-    * - | long int lround ( double  x )
-        | Round to nearest integer value.
+    * - | ``double rhypot(double x, double y)``
+        | Returns one over the square root of the sum of squares of two arguments.
       - ✓
       - ✓
 
-    * - | double modf ( double  x, `double *iptr` )
-        | Break down the input argument into fractional and integral parts.
+    * - | ``double rint(double x)``
+        | Round input to nearest integer value in floating-point.
       - ✓
       - ✓
-
-    * - | double nextafter ( double  x, double y )
-        | Returns next representable single-precision floating-point value after argument.
+ 
+    * - | ``double rnorm3d(double x, double y, double z)``
+        | Returns one over the square root of the sum of squares of three coordinates of the argument.
       - ✓
       - ✓
 
-    * - | double norm3d ( double  a, double b, double c )
-        | Calculate the square root of the sum of squares of three coordinates of the argument.
+    * - | ``double rnorm4d(double x, double y, double z, double w)``
+        | Returns one over the square root of the sum of squares of four coordinates of the argument.
       - ✓
       - ✓
 
-    * - | float norm4d ( double  a, double b, double c, double d )
-        | Calculate the square root of the sum of squares of four coordinates of the argument.
+    * - | ``double rnorm(int dim, const double *a)``
+        | Returns the reciprocal of square root of the sum of squares of any number of coordinates.
       - ✓
       - ✓
 
-    * - | double normcdf ( double  y )
-        | Calculate the standard normal cumulative distribution function.
+    * - | ``double scalbln(double x, long int n)``
+        | Scale :math:`x` by :math:`2^n`.
       - ✓
       - ✓
 
-    * - | double normcdfinv ( double  y )
-        | Calculate the inverse of the standard normal cumulative distribution function.
+    * - | ``double scalbn(double x, int  n)``
+        | Scale :math:`x` by :math:`2^n`.
       - ✓
       - ✓
 
-    * - | double rcbrt ( double x )
-        | Calculate the reciprocal cube root function.
+    * - | ``bool signbit(double x)``
+        | Return the sign bit of :math:`x`.
       - ✓
       - ✓
 
-    * - | double remquo ( double x, `double y`, `int *quo` )
-        | Compute single-precision floating-point remainder and part of quotient.
+    * - | ``double sin(double x)``
+        | Returns the sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double rhypot ( double x, double y )
-        | Calculate one over the square root of the sum of squares of two arguments.
+    * - | ``double sinh(double x)``
+        | Returns the hyperbolic sine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double rint ( double x )
-        | Round input to nearest integer value in floating-point.
+    * - | ``double sinpi(double x)``
+        | Returns the hyperbolic sine of :math:`\pi \cdot x`.
       - ✓
       - ✓
 
-    * - | double rnorm3d ( double a, double b, double c )
-        | Calculate one over the square root of the sum of squares of three coordinates of the argument.
+    * - | ``void sincos(double x, double *sptr, double *cptr)``
+        | Returns the sine and cosine of :math:`x`.
       - ✓
       - ✓
 
-    * - | double rnorm4d ( double a, double b, double c, double d )
-        | Calculate one over the square root of the sum of squares of four coordinates of the argument.
+    * - | ``void sincospi(double x, double *sptr, double *cptr)``
+        | Returns the sine and cosine of :math:`\pi \cdot x`.
       - ✓
       - ✓
 
-    * - | double rnorm ( int dim, `const double *a` )
-        | Calculate the reciprocal of square root of the sum of squares of any number of coordinates.
+    * - | ``double sqrt(double x)``
+        | Returns the square root of :math:`x`.
+      - ✓
       - ✓
+
+    * - | ``double rsqrt(double x)``
+        | Returns the reciprocal of the square root of :math:`x`.
+      - ✗
       - ✓
 
-    * - | double scalbln ( double x, long int n )
-        | Scale floating-point input by integer power of two.
+    * - | ``double tan(double x)``
+        | Returns the tangent of :math:`x`.
       - ✓
       - ✓
 
-    * - | void sincos ( double x, `double *sptr`, `double *cptr` )
-        | Calculate the sine and cosine of the first input argument.
+    * - | ``double tanh(double x)``
+        | Returns the hyperbolic tangent of :math:`x`.
       - ✓
       - ✓
 
-    * - | void sincospi ( double x, `double *sptr`, `double *cptr` )
-        | Calculate the sine and cosine of the first input argument multiplied by PI.
+    * - | ``double tgamma(double x)``
+        | Returns the gamma function of :math:`x`.
       - ✓
       - ✓
 
-    * - | double y0f ( double  x )
-        | Calculate the value of the Bessel function of the second kind of order 0 for the input argument.
+    * - | ``double trunc(double x)``
+        | Truncate :math:`x` to the integral part.
+      - ✓
+      - ✓
+    
+    * - | ``double y0(double x)``
+        | Returns the value of the Bessel function of the second kind of order 0 for :math:`x`.
       - ✓
       - ✓
 
-    * - | double y1 ( double  x )
-        | Calculate the value of the Bessel function of the second kind of order 1 for the input argument.
+    * - | ``double y1(double x)``
+        | Returns the value of the Bessel function of the second kind of order 1 for :math:`x`.
       - ✓
       - ✓
 
-    * - | double yn ( int n, double  x )
-        | Calculate the value of the Bessel function of the second kind of order n for the input argument.
+    * - | ``double yn(int n, double x)``
+        | Returns the value of the Bessel function of the second kind of order n for :math:`x`.
       - ✓
       - ✓
 
 Integer intrinsics
 --------------------------------------------------------------------------------------------
+
 Following is the list of supported integer intrinsics. Note that intrinsics are supported on device only.
 
-.. list-table:: Single precision mathematical functions
+.. list-table:: Integer intrinsics mathematical functions
 
     * - **Function**
 
-    * - | double acos ( double  x )
-        | Calculate the arc cosine of the input argument.
-
-    * - | unsigned int __brev ( unsigned int x )
+    * - | ``unsigned int __brev(unsigned int x)``
         | Reverse the bit order of a 32 bit unsigned integer.
 
-    * - | unsigned long long int __brevll ( unsigned long long int x )
+    * - | ``unsigned long long int __brevll(unsigned long long int x)``
         | Reverse the bit order of a 64 bit unsigned integer. 
 
-    * - | int __clz ( int  x )
-        | Return the number of consecutive high-order zero bits in a 32 bit integer.
+    * - | ``unsigned int __byte_perm(unsigned int  x, unsigned int  y, unsigned int  z)``
+        | Return selected bytes from two 32-bit unsigned integers.
 
-    * - | unsigned int __clz(unsigned int x)
-        | Return the number of consecutive high-order zero bits in 32 bit unsigned integer.
+    * - | ``unsigned int __clz(int x)``
+        | Return the number of consecutive high-order zero bits in 32 bit integer.
 
-    * - | int __clzll ( long long int x )
-        | Count the number of consecutive high-order zero bits in a 64 bit integer.
+    * - | ``unsigned int __clzll(long long int x)``
+        | Return the number of consecutive high-order zero bits in 64 bit integer.
 
-    * - | unsigned int __clzll(long long int x)
-        | Return the number of consecutive high-order zero bits in 64 bit signed integer.
+    * - | ``unsigned int __ffs(int x)``
+        | Find the position of least signigicant bit set to 1 in a 32 bit integer.
 
-    * - |  unsigned int __ffs(unsigned int x)
-        | Find the position of least signigicant bit set to 1 in a 32 bit unsigned integer.
+    * - | ``unsigned int __ffsll(long long int x)``
+        | Find the position of least signigicant bit set to 1 in a 64 bit signed integer.
 
-    * - | unsigned int __ffs(int x)
-        | Find the position of least signigicant bit set to 1 in a 32 bit signed integer.
+    * - | ``unsigned int __fns32(unsigned long long mask, unsigned int base, int offset)``
+        | Find the position of the n-th set to 1 bit in a 32-bit integer.
 
-    * - | unsigned int __ffsll(unsigned long long int x)
-        | Find the position of least signigicant bit set to 1 in a 64 bit unsigned integer.
+    * - | ``unsigned int __fns64(unsigned long long int mask, unsigned int base, int offset)``
+        | Find the position of the n-th set to 1 bit in a 64-bit integer.
 
-    * - | unsigned int __ffsll(long long int x)
-        | Find the position of least signigicant bit set to 1 in a 64 bit signed integer.
+    * - | ``unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift)``
+        | Concatenate :math:`hi` and :math:`lo`, shift left by shift & 31 bits, return the most significant 32 bits.
+
+    * - | ``unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift)``
+        | Concatenate :math:`hi` and :math:`lo`, shift left by min(shift, 32) bits, return the most significant 32 bits.
+
+    * - | ``unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift)``
+        | Concatenate :math:`hi` and :math:`lo`, shift right by shift & 31 bits, return the least significant 32 bits.
+
+    * - | ``unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift)``
+        | Concatenate :math:`hi` and :math:`lo`, shift right by min(shift, 32) bits, return the least significant 32 bits.
+    
+    * - | ``unsigned int __hadd(int x, int y)``
+        | Compute average of signed input arguments, avoiding overflow in the intermediate sum.
+
+    * - | ``unsigned int __rhadd(int x, int y)``
+        | Compute rounded average of signed input arguments, avoiding overflow in the intermediate sum.
+
+    * - | ``unsigned int __uhadd(int x, int y)``
+        | Compute average of unsigned input arguments, avoiding overflow in the intermediate sum.
 
-    * - | unsigned int __popc ( unsigned int x )
+    * - | ``unsigned int __urhadd (unsigned int  x, unsigned int  y)``
+        | Compute rounded average of unsigned input arguments, avoiding overflow in the intermediate sum.
+
+    * - | ``int __sad(int x, int y, int z)``
+        | Returns :math:`|x - y| + z`, the sum of absolute difference.
+
+    * - | ``unsigned int __usad(unsigned int x, unsigned int y, unsigned int z)``
+        | Returns :math:`|x - y| + z`, the sum of absolute difference.
+
+    * - | ``unsigned int __popc(unsigned int x)``
         | Count the number of bits that are set to 1 in a 32 bit integer.
 
-    * - | unsigned int __popcll ( unsigned long long int x )
+    * - | ``unsigned int __popcll(unsigned long long int x)``
         | Count the number of bits that are set to 1 in a 64 bit integer.
 
-    * - | int __mul24 ( int x, int y )
+    * - | ``int __mul24(int x, int y)``
         | Multiply two 24bit integers.
 
-    * - | unsigned int __umul24 ( unsigned int x, unsigned int y )
+    * - | ``unsigned int __umul24(unsigned int x, unsigned int y)``
         | Multiply two 24bit unsigned integers.
 
+    * - | ``int __mulhi(int x, int y)``
+        | Returns the most significant 32 bits of the product of the two 32-bit integers.
+
+    * - | ``unsigned int __umulhi(unsigned int x, unsigned int y)``
+        | Returns the most significant 32 bits of the product of the two 32-bit unsigned integers.
+
+    * - | ``long long int __mul64hi(long long int x, long long int y)``
+        | Returns the most significant 64 bits of the product of the two 64-bit integers.
+
+    * - | ``unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y)``
+        | Returns the most significant 64 bits of the product of the two 64 unsigned bit integers.
+
 The HIP-Clang implementation of ``__ffs()`` and ``__ffsll()`` contains code to add a constant +1 to produce the ffs result format.
 For the cases where this overhead is not acceptable and programmer is willing to specialize for the platform,
-HIP-Clang provides `__lastbit_u32_u32(unsigned int input)` and `__lastbit_u32_u64(unsigned long long int input)`.
+HIP-Clang provides ``__lastbit_u32_u32(unsigned int input)`` and ``__lastbit_u32_u64(unsigned long long int input)``.
 The index returned by ``__lastbit_`` instructions starts at -1, while for ffs the index starts at 0.
 
 Floating-point Intrinsics
 --------------------------------------------------------------------------------------------
+
 Following is the list of supported floating-point intrinsics. Note that intrinsics are supported on device only.
 
-.. list-table:: Single precision mathematical functions
+.. note::
+
+  Only the nearest even rounding mode supported on AMD GPUs by defaults. The ``_rz``, ``_ru`` and 
+  ``_rd`` suffixed intrinsic functions are existing in HIP AMD backend, if the 
+  ``OCML_BASIC_ROUNDED_OPERATIONS`` macro is defined.
+        
+.. list-table:: Single precision intrinsics mathematical functions
 
     * - **Function**
 
-    * - | float __cosf ( float  x )
-        | Calculate the fast approximate cosine of the input argument.
+    * - | ``float __cosf(float x)``
+        | Returns the fast approximate cosine of :math:`x`.
+
+    * - | ``float __exp10f(float x)``
+        | Returns the fast approximate for 10 :sup:`x`.
+
+    * - | ``float __expf(float x)``
+        | Returns the fast approximate for e :sup:`x`.
+
+    * - | ``float __fadd_rn(float x, float y)``
+        | Add two floating-point values in round-to-nearest-even mode.
+
+    * - | ``float __fdiv_rn(float x, float y)``
+        | Divide two floating point values in round-to-nearest-even mode.
+
+    * - | ``float __fmaf_rn(float x, float y, float z)``
+        | Returns ``x × y + z`` as a single operation in round-to-nearest-even mode.
+
+    * - | ``float __fmul_rn(float x, float y)``
+        | Multiply two floating-point values in round-to-nearest-even mode.
+
+    * - | ``float __frcp_rn(float x, float y)``
+        | Returns ``1 / x`` in round-to-nearest-even mode.
+
+    * - | ``float __frsqrt_rn(float x)``
+        | Returns ``1 / √x`` in round-to-nearest-even mode.
+
+    * - | ``float __fsqrt_rn(float x)``
+        | Returns ``√x`` in round-to-nearest-even mode.
 
-    * - | float __expf ( float  x )
-        | Calculate the fast approximate base e exponential of the input argument.
+    * - | ``float __fsub_rn(float x, float y)``
+        | Subtract two floating-point values in round-to-nearest-even mode.
 
-    * - | float __frsqrt_rn ( float  x )
-        | Compute `1 / √x` in round-to-nearest-even mode.
+    * - | ``float __log10f(float x)``
+        | Returns the fast approximate for base 10 logarithm of :math:`x`.
+
+    * - | ``float __log2f(float x)``
+        | Returns the fast approximate for base 2 logarithm of :math:`x`.
+
+    * - | ``float __logf(float x)``
+        | Returns the fast approximate for natural logarithm of :math:`x`.
+
+    * - | ``float __powf(float x, float y)``
+        | Returns the fast approximate of x :sup:`y`.
+
+    * - | ``float __saturatef(float x)``
+        | Clamp :math:`x` to [+0.0, 1.0].
+
+    * - | ``float __sincosf(float x, float* sinptr, float* cosptr)``
+        | Returns the fast approximate of sine and cosine of :math:`x`.
+
+    * - | ``float __sinf(float x)``
+        | Returns the fast approximate sine of :math:`x`.
+
+    * - | ``float __tanf(float x)``
+        | Returns the fast approximate tangent of :math:`x`.
+
+.. list-table:: Double precision intrinsics mathematical functions
+
+    * - **Function**
 
-    * - | float __fsqrt_rn ( float  x )
-        | Compute `√x` in round-to-nearest-even mode.
+    * - | ``double __dadd_rn(double x, double y)``
+        | Add two floating-point values in round-to-nearest-even mode.
 
-    * - | float __log10f ( float  x )
-        | Calculate the fast approximate base 10 logarithm of the input argument.
+    * - | ``double __ddiv_rn(double x, double y)``
+        | Divide two floating-point values in round-to-nearest-even mode.
 
-    * - | float __log2f ( float  x )
-        | Calculate the fast approximate base 2 logarithm of the input argument.
+    * - | ``double __dmul_rn(double x, double y)``
+        | Multiply two floating-point values in round-to-nearest-even mode.
 
-    * - | float __logf ( float  x )
-        | Calculate the fast approximate base e logarithm of the input argument.
+    * - | ``double __drcp_rn(double x, double y)``
+        | Returns ``1 / x`` in round-to-nearest-even mode.
 
-    * - | float __powf ( float  x, float  y )
-        | Calculate the fast approximate of x<sup>y</sup>.
+    * - | ``double __dsqrt_rn(double x)``
+        | Returns ``√x`` in round-to-nearest-even mode.
 
-    * - | float __sinf ( float  x )
-        | Calculate the fast approximate sine of the input argument.
+    * - | ``double __dsub_rn(double x, double y)``
+        | Subtract two floating-point values in round-to-nearest-even mode.
 
-    * - | float __tanf ( float  x )
-        | Calculate the fast approximate tangent of the input argument.
+    * - | ``double __fma_rn(double x, double y, double  z)``
+        | Returns ``x × y + z`` as a single operation in round-to-nearest-even mode.
 
-    * - | double __dsqrt_rn ( double  x )
-        | Compute `√x` in round-to-nearest-even mode.
 
 Texture functions
 ===============================================
@@ -1372,255 +1497,255 @@ HIP supports the following atomic operations.
       - **Supported in HIP**
       - **Supported in CUDA**
 
-    * - int atomicAdd(int* address, int val)
+    * - ``int atomicAdd(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicAdd_system(int* address, int val)
+    * - ``int atomicAdd_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicAdd(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicAdd(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicAdd_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicAdd_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicAdd(unsigned long long* address,unsigned long long val)
+    * - ``unsigned long long atomicAdd(unsigned long long* address,unsigned long long val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val)
+    * - ``unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val)``
       - ✓
       - ✓
 
-    * - float atomicAdd(float* address, float val)
+    * - ``float atomicAdd(float* address, float val)``
       - ✓
       - ✓
 
-    * - float atomicAdd_system(float* address, float val)
+    * - ``float atomicAdd_system(float* address, float val)``
       - ✓
       - ✓
 
-    * - double atomicAdd(double* address, double val)
+    * - ``double atomicAdd(double* address, double val)``
       - ✓
       - ✓
 
-    * - double atomicAdd_system(double* address, double val)
+    * - ``double atomicAdd_system(double* address, double val)``
       - ✓
       - ✓
 
-    * - float unsafeAtomicAdd(float* address, float val)
+    * - ``float unsafeAtomicAdd(float* address, float val)``
       - ✓
       - ✗
 
-    * - float safeAtomicAdd(float* address, float val)
+    * - ``float safeAtomicAdd(float* address, float val)``
       - ✓
       - ✗
 
-    * - double unsafeAtomicAdd(double* address, double val)
+    * - ``double unsafeAtomicAdd(double* address, double val)``
       - ✓
       - ✗
 
-    * - double safeAtomicAdd(double* address, double val)
+    * - ``double safeAtomicAdd(double* address, double val)``
       - ✓
       - ✗
 
-    * - int atomicSub(int* address, int val)
+    * - ``int atomicSub(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicSub_system(int* address, int val)
+    * - ``int atomicSub_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicSub(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicSub(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicSub_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicSub_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - int atomicExch(int* address, int val)
+    * - ``int atomicExch(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicExch_system(int* address, int val)
+    * - ``int atomicExch_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicExch(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicExch(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicExch_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicExch_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicExch(unsigned long long int* address,unsigned long long int val)
+    * - ``unsigned long long atomicExch(unsigned long long int* address,unsigned long long int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val)
+    * - ``unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val)
+    * - ``unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val)``
       - ✓
       - ✓
 
-    * - float atomicExch(float* address, float val)
+    * - ``float atomicExch(float* address, float val)``
       - ✓
       - ✓
 
-    * - int atomicMin(int* address, int val)
+    * - ``int atomicMin(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicMin_system(int* address, int val)
+    * - ``int atomicMin_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicMin(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicMin(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicMin_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicMin_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicMin(unsigned long long* address,unsigned long long val)
+    * - ``unsigned long long atomicMin(unsigned long long* address,unsigned long long val)``
       - ✓
       - ✓
 
-    * - int atomicMax(int* address, int val)
+    * - ``int atomicMax(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicMax_system(int* address, int val)
+    * - ``int atomicMax_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicMax(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicMax(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicMax_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicMax_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicMax(unsigned long long* address,unsigned long long val)
+    * - ``unsigned long long atomicMax(unsigned long long* address,unsigned long long val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicInc(unsigned int* address)
+    * - ``unsigned int atomicInc(unsigned int* address)``
       - ✗
       - ✓
 
-    * - unsigned int atomicDec(unsigned int* address)
+    * - ``unsigned int atomicDec(unsigned int* address)``
       - ✗
       - ✓
 
-    * - int atomicCAS(int* address, int compare, int val)
+    * - ``int atomicCAS(int* address, int compare, int val)``
       - ✓
       - ✓
 
-    * - int atomicCAS_system(int* address, int compare, int val)
+    * - ``int atomicCAS_system(int* address, int compare, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicCAS(unsigned int* address,unsigned int compare,unsigned int val)
+    * - ``unsigned int atomicCAS(unsigned int* address,unsigned int compare,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val)
+    * - ``unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicCAS(unsigned long long* address,unsigned long long compare,unsigned long long val)
+    * - ``unsigned long long atomicCAS(unsigned long long* address,unsigned long long compare,unsigned long long val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare, unsigned long long val)
+    * - ``unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare, unsigned long long val)``
       - ✓
       - ✓
 
-    * - int atomicAnd(int* address, int val)
+    * - ``int atomicAnd(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicAnd_system(int* address, int val)
+    * - ``int atomicAnd_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicAnd(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicAnd(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicAnd_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicAnd_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicAnd(unsigned long long* address,unsigned long long val)
+    * - ``unsigned long long atomicAnd(unsigned long long* address,unsigned long long val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val)
+    * - ``unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val)``
       - ✓
       - ✓
 
-    * - int atomicOr(int* address, int val)
+    * - ``int atomicOr(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicOr_system(int* address, int val)
+    * - ``int atomicOr_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicOr(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicOr(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicOr_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicOr_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicOr_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicOr_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicOr(unsigned long long int* address,unsigned long long val)
+    * - ``unsigned long long atomicOr(unsigned long long int* address,unsigned long long val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val)
+    * - ``unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val)``
       - ✓
       - ✓
 
-    * - int atomicXor(int* address, int val)
+    * - ``int atomicXor(int* address, int val)``
       - ✓
       - ✓
 
-    * - int atomicXor_system(int* address, int val)
+    * - ``int atomicXor_system(int* address, int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicXor(unsigned int* address,unsigned int val)
+    * - ``unsigned int atomicXor(unsigned int* address,unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned int atomicXor_system(unsigned int* address, unsigned int val)
+    * - ``unsigned int atomicXor_system(unsigned int* address, unsigned int val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicXor(unsigned long long* address,unsigned long long val)
+    * - ``unsigned long long atomicXor(unsigned long long* address,unsigned long long val)``
       - ✓
       - ✓
 
-    * - unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val)
+    * - ``unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val)``
       - ✓
       - ✓
 
@@ -1826,103 +1951,103 @@ HIP supports the following kernel language cooperative groups types and function
       - **Supported in HIP**
       - **Supported in CUDA**
 
-    * - void thread_group.sync();
+    * - ``void thread_group.sync();``
       - ✓
       - ✓
 
-    * - unsigned thread_group.size();
+    * - ``unsigned thread_group.size();``
       - ✓
       - ✓
 
-    * - unsigned thread_group.thread_rank()
+    * - ``unsigned thread_group.thread_rank()``
       - ✓
       - ✓
 
-    * - bool thread_group.is_valid();
+    * - ``bool thread_group.is_valid();``
       - ✓
       - ✓
 
-    * - grid_group this_grid()
+    * - ``grid_group this_grid()``
       - ✓
       - ✓
 
-    * - void grid_group.sync()
+    * - ``void grid_group.sync()``
       - ✓
       - ✓
 
-    * - unsigned grid_group.size()
+    * - ``unsigned grid_group.size()``
       - ✓
       - ✓
 
-    * - unsigned grid_group.thread_rank()
+    * - ``unsigned grid_group.thread_rank()``
       - ✓
       - ✓
 
-    * - bool grid_group.is_valid()
+    * - ``bool grid_group.is_valid()``
       - ✓
       - ✓
 
-    * - multi_grid_group this_multi_grid()
+    * - ``multi_grid_group this_multi_grid()``
       - ✓
       - ✓
 
-    * - void multi_grid_group.sync()
+    * - ``void multi_grid_group.sync()``
       - ✓
       - ✓
 
-    * - unsigned multi_grid_group.size()
+    * - ``unsigned multi_grid_group.size()``
       - ✓
       - ✓
 
-    * - unsigned multi_grid_group.thread_rank()
+    * - ``unsigned multi_grid_group.thread_rank()``
       - ✓
       - ✓
 
-    * - bool multi_grid_group.is_valid()
+    * - ``bool multi_grid_group.is_valid()``
       - ✓
       - ✓
 
-    * - unsigned multi_grid_group.num_grids()
+    * - ``unsigned multi_grid_group.num_grids()``
       - ✓
       - ✓
 
-    * - unsigned multi_grid_group.grid_rank()
+    * - ``unsigned multi_grid_group.grid_rank()``
       - ✓
       - ✓
 
-    * - thread_block this_thread_block()
+    * - ``thread_block this_thread_block()``
       - ✓
       - ✓
 
-    * - multi_grid_group this_multi_grid()
+    * - ``multi_grid_group this_multi_grid()``
       - ✓
       - ✓
 
-    * - void multi_grid_group.sync()
+    * - ``void multi_grid_group.sync()``
       - ✓
       - ✓
 
-    * - void thread_block.sync()
+    * - ``void thread_block.sync()``
       - ✓
       - ✓
 
-    * - unsigned thread_block.size()
+    * - ``unsigned thread_block.size()``
       - ✓
       - ✓
 
-    * - unsigned thread_block.thread_rank()
+    * - ``unsigned thread_block.thread_rank()``
       - ✓
       - ✓
 
-    * - bool thread_block.is_valid()
+    * - ``bool thread_block.is_valid()``
       - ✓
       - ✓
 
-    * - dim3 thread_block.group_index()
+    * - ``dim3 thread_block.group_index()``
       - ✓
       - ✓
 
-    * - dim3 thread_block.thread_index()
+    * - ``dim3 thread_block.thread_index()``
       - ✓
       - ✓
 
@@ -1940,23 +2065,23 @@ HIP does not support kernel language warp matrix types or functions.
       - **Supported in HIP**
       - **Supported in CUDA**
 
-    * - void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda)
+    * - ``void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda)``
       - ✗
       - ✓
 
-    * - void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda, layout_t layout)
+    * - ``void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda, layout_t layout)``
       - ✗
       - ✓
 
-    * - void store_matrix_sync(T* mptr, fragment<...> &a,  unsigned lda, layout_t layout)
+    * - ``void store_matrix_sync(T* mptr, fragment<...> &a,  unsigned lda, layout_t layout)``
       - ✗
       - ✓
 
-    * - void fill_fragment(fragment<...> &a, const T &value)
+    * - ``void fill_fragment(fragment<...> &a, const T &value)``
       - ✗
       - ✓
 
-    * - void mma_sync(fragment<...> &d, const fragment<...> &a, const fragment<...> &b, const fragment<...> &c , bool sat)
+    * - ``void mma_sync(fragment<...> &d, const fragment<...> &a, const fragment<...> &b, const fragment<...> &c , bool sat)``
       - ✗
       - ✓