diff --git a/CHANGELOG b/CHANGELOG
index 2c4e9abf..cc919b77 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,10 @@ Development version (next version)
 - Fix a bug in the pre-processor that would cause issues on Arm GPUs
 - Fix DLL install directory in mingw
 - Added tuned parameters for various devices (see doc/tuning.md)
+- Modifications to the python bindings (pyclblast)
+  * Convert float scalar values to cl_half for fp16 routines
+  * Amax/amin, max/min routines accept unsigned integer buffers for index
+- Generator script now always use LF endings, independent of the platform
 
 Version 1.6.1
 - Fix pointer error in pyclblast on Arm
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 3d66960f..1da396d3 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -49,7 +49,7 @@
     "/src/clblast_cuda.cpp",
     "/src/pyclblast/src/pyclblast.pyx"
 ]
-HEADER_LINES = [129, 21, 133, 24, 29, 45, 29, 66, 40, 96, 21, 327]
+HEADER_LINES = [129, 21, 133, 24, 29, 45, 29, 66, 40, 96, 21, 341]
 FOOTER_LINES = [98, 57, 112, 275, 6, 6, 6, 9, 2, 41, 56, 37]
 HEADER_LINES_DOC = 0
 FOOTER_LINES_DOC = 232
@@ -215,7 +215,7 @@ def main(argv):
         file_footer = original[-FOOTER_LINES[i]:]
 
         # Re-writes the body of the file
-        with open(library_root + FILES[i], "w") as f:
+        with open(library_root + FILES[i], "w", newline="\n") as f:
             body = ""
             levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4]
             for level in levels:
@@ -261,14 +261,14 @@ def main(argv):
 
                 # Correctness tests
                 filename = library_root + "/test/correctness/routines/" + routine_suffix
-                with open(filename, "w") as f:
+                with open(filename, "w", newline="\n") as f:
                     f.write(cpp.HEADER + "\n")
                     f.write(cpp.correctness_test(routine, level_string))
                     f.write(cpp.FOOTER)
 
                 # Performance tests
                 filename = library_root + "/test/performance/routines/" + routine_suffix
-                with open(filename, "w") as f:
+                with open(filename, "w", newline="\n") as f:
                     f.write(cpp.HEADER + "\n")
                     f.write(cpp.performance_test(routine, level_string))
                     f.write(cpp.FOOTER)
@@ -283,7 +283,7 @@ def main(argv):
     file_footer = original[-FOOTER_LINES_DOC:]
 
     # Outputs the API documentation
-    with open(filename, "w") as f:
+    with open(filename, "w", newline="\n") as f:
 
         # Outputs the header
         f.write("".join(file_header))
diff --git a/scripts/generator/generator/pyclblast.py b/scripts/generator/generator/pyclblast.py
index 92143c8d..8cf9e18e 100644
--- a/scripts/generator/generator/pyclblast.py
+++ b/scripts/generator/generator/pyclblast.py
@@ -8,7 +8,7 @@
 import os
 
 
-NL = os.linesep
+NL = '\n'
 SEPARATOR = "####################################################################################################"
 
 
@@ -43,7 +43,7 @@ def scalar_cython_conversion(scalar, flavour):
     if scalar_type in ["cl_double2", "double2"]:
         return "<cl_double2>cl_double2(x=" + scalar + ".real,y=" + scalar + ".imag)"
     if scalar_type in ["cl_half", "half"]:
-        return "<cl_half>" + scalar
+        return "<cl_half>val_to_half(" + scalar + ")"
     raise RuntimeError("Could not convert flavour '%s:%s'" % (flavour.precision_name, scalar_type))
 
 
@@ -82,8 +82,18 @@ def generate_pyx(routine):
         result += NL
 
         # Data types and checks
-        result += indent + "dtype = check_dtype([" + ", ".join(buffers) + "], "
+        int_buff = []
+        other_buff = []
+        for buf in buffers:
+            if buf in routine.index_buffers():
+                int_buff.append(buf)
+            else:
+                other_buff.append(buf)
+        result += indent + "dtype = check_dtype([" + ", ".join(other_buff) + "], "
         result += "[" + ", ".join(['"%s"' % d for d in np_dtypes]) + "])" + NL
+        if int_buff:
+            result += indent + "check_dtype([" + ", ".join(int_buff) + "], "
+            result += "[" + ", ".join(['"uint16", "uint32", "uint64"']) + "])" + NL
         for buf in buffers:
             if buf in routine.buffers_vector():
                 result += indent + "check_vector("
diff --git a/src/pyclblast/README.md b/src/pyclblast/README.md
index c6acc7b0..fe4cd7e3 100644
--- a/src/pyclblast/README.md
+++ b/src/pyclblast/README.md
@@ -54,5 +54,5 @@ How to release a new version on PyPi
 Following [the guide](https://packaging.python.org/tutorials/packaging-projects/), in essence doing (after changing the version number in `setup.py`):
 
     python3 setup.py sdist bdist_wheel
-    python3 -m twine upload --repository pypi dist/pyclblast-1.3.2.tar.gz
+    python3 -m twine upload --repository pypi dist/pyclblast-1.4.0.tar.gz
     # use '__token__' as username and supply the token from your PyPi account
diff --git a/src/pyclblast/setup.py b/src/pyclblast/setup.py
index 66d4955f..c6811935 100644
--- a/src/pyclblast/setup.py
+++ b/src/pyclblast/setup.py
@@ -10,6 +10,11 @@
 from distutils.extension import Extension
 from Cython.Distutils import build_ext
 import platform
+import numpy
+import os
+
+np_incdir = numpy.get_include()
+np_libdir = os.path.join(np_incdir, '..', 'lib', '')
 
 runtime_library_dirs = list()
 if platform.system() == "Linux":
@@ -23,15 +28,17 @@
     Extension(
         "pyclblast",
         ["src/pyclblast.pyx"],
-        libraries=["clblast"],
+        libraries=["clblast", "npymath"],
         runtime_library_dirs=runtime_library_dirs,
+        library_dirs=[np_libdir],
+        include_dirs=[np_incdir],
         language="c++"
     )
 )
 
 setup(
     name="pyclblast",
-    version="1.3.2",
+    version="1.4.0",
     author="Cedric Nugteren",
     author_email="web@cedricnugteren.nl",
     url="https://github.com/CNugteren/CLBlast/blob/master/src/pyclblast",
diff --git a/src/pyclblast/src/pyclblast.pyx b/src/pyclblast/src/pyclblast.pyx
index 9d517896..19c40fff 100644
--- a/src/pyclblast/src/pyclblast.pyx
+++ b/src/pyclblast/src/pyclblast.pyx
@@ -19,6 +19,7 @@ from pyopencl.array import Array
 from libcpp cimport bool
 from cpython.mem cimport PyMem_Malloc, PyMem_Free
 from libc.string cimport strdup
+from libc.stdint cimport uint16_t
 
 ####################################################################################################
 # CLBlast and OpenCL data-types
@@ -294,6 +295,19 @@ def check_vector(a, name):
 # Half-precision utility functions
 ####################################################################################################
 
+cdef extern from "numpy/halffloat.h":
+    ctypedef uint16_t npy_half
+
+    # conversion functions
+    npy_half npy_float_to_half(float f)
+    npy_half npy_double_to_half(double d)
+
+cdef npy_half val_to_half(object val):
+    if isinstance(val, (np.float32, np.float16)):
+        return npy_float_to_half(val)
+    else:
+        return npy_double_to_half(val)
+
 def float32_to_float16(float32):
     # Taken from https://gamedev.stackexchange.com/a/28756
     F16_EXPONENT_BITS = 0x1F
@@ -403,7 +417,7 @@ def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0):
     elif dtype == np.dtype("complex128"):
         err = CLBlastZscal(n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHscal(n, <cl_half>alpha, x_buffer, x_offset, x_inc, &command_queue, &event)
+        err = CLBlastHscal(n, <cl_half>val_to_half(alpha), x_buffer, x_offset, x_inc, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -491,7 +505,7 @@ def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offs
     elif dtype == np.dtype("complex128"):
         err = CLBlastZaxpy(n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHaxpy(n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
+        err = CLBlastHaxpy(n, <cl_half>val_to_half(alpha), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -761,7 +775,8 @@ def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0):
     xAMAX: Index of absolute maximum value in a vector
     """
 
-    dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128", "float16"])
+    dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128", "float16"])
+    check_dtype([imax], ["uint16", "uint32", "uint64"])
     check_vector(x, "x")
     check_matrix(imax, "imax")
 
@@ -805,7 +820,8 @@ def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0):
     xAMIN: Index of absolute minimum value in a vector (non-BLAS function)
     """
 
-    dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128", "float16"])
+    dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128", "float16"])
+    check_dtype([imin], ["uint16", "uint32", "uint64"])
     check_vector(x, "x")
     check_matrix(imin, "imin")
 
@@ -849,7 +865,8 @@ def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0):
     xMAX: Index of maximum value in a vector (non-BLAS function)
     """
 
-    dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128", "float16"])
+    dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128", "float16"])
+    check_dtype([imax], ["uint16", "uint32", "uint64"])
     check_vector(x, "x")
     check_matrix(imax, "imax")
 
@@ -893,7 +910,8 @@ def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0):
     xMIN: Index of minimum value in a vector (non-BLAS function)
     """
 
-    dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128", "float16"])
+    dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128", "float16"])
+    check_dtype([imin], ["uint16", "uint32", "uint64"])
     check_vector(x, "x")
     check_matrix(imin, "imin")
 
@@ -960,7 +978,7 @@ def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0
     elif dtype == np.dtype("complex128"):
         err = CLBlastZgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+        err = CLBlastHgemv(CLBlastLayoutRowMajor, a_transpose, m, n, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>val_to_half(beta), y_buffer, y_offset, y_inc, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1007,7 +1025,7 @@ def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0,
     elif dtype == np.dtype("complex128"):
         err = CLBlastZgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double2>cl_double2(x=beta.real,y=beta.imag), y_buffer, y_offset, y_inc, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+        err = CLBlastHgbmv(CLBlastLayoutRowMajor, a_transpose, m, n, kl, ku, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>val_to_half(beta), y_buffer, y_offset, y_inc, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1162,7 +1180,7 @@ def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0,
     elif dtype == np.dtype("float64"):
         err = CLBlastDsymv(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHsymv(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+        err = CLBlastHsymv(CLBlastLayoutRowMajor, triangle, n, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>val_to_half(beta), y_buffer, y_offset, y_inc, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1203,7 +1221,7 @@ def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0
     elif dtype == np.dtype("float64"):
         err = CLBlastDsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_double>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+        err = CLBlastHsbmv(CLBlastLayoutRowMajor, triangle, n, k, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, x_buffer, x_offset, x_inc, <cl_half>val_to_half(beta), y_buffer, y_offset, y_inc, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1244,7 +1262,7 @@ def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.
     elif dtype == np.dtype("float64"):
         err = CLBlastDspmv(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_double>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHspmv(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_half>beta, y_buffer, y_offset, y_inc, &command_queue, &event)
+        err = CLBlastHspmv(CLBlastLayoutRowMajor, triangle, n, <cl_half>val_to_half(alpha), ap_buffer, ap_offset, x_buffer, x_offset, x_inc, <cl_half>val_to_half(beta), y_buffer, y_offset, y_inc, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1469,7 +1487,7 @@ def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset
     elif dtype == np.dtype("float64"):
         err = CLBlastDger(CLBlastLayoutRowMajor, m, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHger(CLBlastLayoutRowMajor, m, n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+        err = CLBlastHger(CLBlastLayoutRowMajor, m, n, <cl_half>val_to_half(alpha), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1730,7 +1748,7 @@ def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_
     elif dtype == np.dtype("float64"):
         err = CLBlastDsyr(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHsyr(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+        err = CLBlastHsyr(CLBlastLayoutRowMajor, triangle, n, <cl_half>val_to_half(alpha), x_buffer, x_offset, x_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1769,7 +1787,7 @@ def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False,
     elif dtype == np.dtype("float64"):
         err = CLBlastDspr(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHspr(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event)
+        err = CLBlastHspr(CLBlastLayoutRowMajor, triangle, n, <cl_half>val_to_half(alpha), x_buffer, x_offset, x_inc, ap_buffer, ap_offset, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1810,7 +1828,7 @@ def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_trian
     elif dtype == np.dtype("float64"):
         err = CLBlastDsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
+        err = CLBlastHsyr2(CLBlastLayoutRowMajor, triangle, n, <cl_half>val_to_half(alpha), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, a_buffer, a_offset, a_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1851,7 +1869,7 @@ def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_tri
     elif dtype == np.dtype("float64"):
         err = CLBlastDspr2(CLBlastLayoutRowMajor, triangle, n, <cl_double>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHspr2(CLBlastLayoutRowMajor, triangle, n, <cl_half>alpha, x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event)
+        err = CLBlastHspr2(CLBlastLayoutRowMajor, triangle, n, <cl_half>val_to_half(alpha), x_buffer, x_offset, x_inc, y_buffer, y_offset, y_inc, ap_buffer, ap_offset, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1899,7 +1917,7 @@ def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_t
     elif dtype == np.dtype("complex128"):
         err = CLBlastZgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+        err = CLBlastHgemm(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>val_to_half(beta), c_buffer, c_offset, c_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -1947,7 +1965,7 @@ def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_
     elif dtype == np.dtype("complex128"):
         err = CLBlastZsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+        err = CLBlastHsymm(CLBlastLayoutRowMajor, side, triangle, m, n, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>val_to_half(beta), c_buffer, c_offset, c_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -2032,7 +2050,7 @@ def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle
     elif dtype == np.dtype("complex128"):
         err = CLBlastZsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+        err = CLBlastHsyrk(CLBlastLayoutRowMajor, triangle, a_transpose, n, k, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, <cl_half>val_to_half(beta), c_buffer, c_offset, c_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -2117,7 +2135,7 @@ def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower
     elif dtype == np.dtype("complex128"):
         err = CLBlastZsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>beta, c_buffer, c_offset, c_ld, &command_queue, &event)
+        err = CLBlastHsyr2k(CLBlastLayoutRowMajor, triangle, ab_transpose, n, k, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, <cl_half>val_to_half(beta), c_buffer, c_offset, c_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -2204,7 +2222,7 @@ def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_t
     elif dtype == np.dtype("complex128"):
         err = CLBlastZtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_half>alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
+        err = CLBlastHtrmm(CLBlastLayoutRowMajor, side, triangle, a_transpose, diagonal, m, n, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)
 
@@ -2298,7 +2316,7 @@ def axpyBatched(queue, n, x, y, alphas, x_offsets, y_offsets, x_inc = 1, y_inc =
         elif dtype == np.dtype("complex128"):
             (<cl_double2*>alphas_c)[i] = <cl_double2>cl_double2(x=alphas[i].real,y=alphas[i].imag)
         elif dtype == np.dtype("float16"):
-            (<cl_half*>alphas_c)[i] = <cl_half>alphas[i]
+            (<cl_half*>alphas_c)[i] = <cl_half>val_to_half(alphas[i])
 
     cdef cl_mem x_buffer = <cl_mem><ptrdiff_t>x.base_data.int_ptr
     cdef cl_mem y_buffer = <cl_mem><ptrdiff_t>y.base_data.int_ptr
@@ -2373,7 +2391,7 @@ def gemmBatched(queue, m, n, k, a, b, c, alphas, betas, a_ld, b_ld, c_ld, a_offs
         elif dtype == np.dtype("complex128"):
             (<cl_double2*>alphas_c)[i] = <cl_double2>cl_double2(x=alphas[i].real,y=alphas[i].imag)
         elif dtype == np.dtype("float16"):
-            (<cl_half*>alphas_c)[i] = <cl_half>alphas[i]
+            (<cl_half*>alphas_c)[i] = <cl_half>val_to_half(alphas[i])
     cdef void *betas_c = <void *> PyMem_Malloc(batch_count * sizeof(dtype_size[dtype]))
     for i in range(batch_count):
         if dtype == np.dtype("float32"):
@@ -2385,7 +2403,7 @@ def gemmBatched(queue, m, n, k, a, b, c, alphas, betas, a_ld, b_ld, c_ld, a_offs
         elif dtype == np.dtype("complex128"):
             (<cl_double2*>betas_c)[i] = <cl_double2>cl_double2(x=betas[i].real,y=betas[i].imag)
         elif dtype == np.dtype("float16"):
-            (<cl_half*>betas_c)[i] = <cl_half>betas[i]
+            (<cl_half*>betas_c)[i] = <cl_half>val_to_half(betas[i])
 
     cdef cl_mem a_buffer = <cl_mem><ptrdiff_t>a.base_data.int_ptr
     cdef cl_mem b_buffer = <cl_mem><ptrdiff_t>b.base_data.int_ptr
@@ -2460,7 +2478,7 @@ def gemmStridedBatched(queue, m, n, k, batch_count, a, b, c, a_ld, b_ld, c_ld, a
     elif dtype == np.dtype("complex128"):
         err = CLBlastZgemmStridedBatched(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_double2>cl_double2(x=alpha.real,y=alpha.imag), a_buffer, a_offset, a_ld, a_stride, b_buffer, b_offset, b_ld, b_stride, <cl_double2>cl_double2(x=beta.real,y=beta.imag), c_buffer, c_offset, c_ld, c_stride, batch_count, &command_queue, &event)
     elif dtype == np.dtype("float16"):
-        err = CLBlastHgemmStridedBatched(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_half>alpha, a_buffer, a_offset, a_ld, a_stride, b_buffer, b_offset, b_ld, b_stride, <cl_half>beta, c_buffer, c_offset, c_ld, c_stride, batch_count, &command_queue, &event)
+        err = CLBlastHgemmStridedBatched(CLBlastLayoutRowMajor, a_transpose, b_transpose, m, n, k, <cl_half>val_to_half(alpha), a_buffer, a_offset, a_ld, a_stride, b_buffer, b_offset, b_ld, b_stride, <cl_half>val_to_half(beta), c_buffer, c_offset, c_ld, c_stride, batch_count, &command_queue, &event)
     else:
         raise ValueError("PyCLBlast: Unrecognized data-type '%s'" % dtype)