Skip to content

Commit

Permalink
Emulate float16 on CPU but with warnings (#609)
Browse files Browse the repository at this point in the history
* Emulate float16 on CPU but with warnings

* Disable CPU float16 in CUDA codegen for now
  • Loading branch information
roastduck authored Apr 17, 2024
1 parent a27d6da commit 6f947ac
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@
[submodule "3rd-party/cutlass"]
path = 3rd-party/cutlass
url = ../../NVIDIA/cutlass.git
[submodule "3rd-party/half"]
path = 3rd-party/half
url = ../../suruoxi/half.git
1 change: 1 addition & 0 deletions 3rd-party/half
Submodule half added at 7cd91f
2 changes: 2 additions & 0 deletions runtime/cpu_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "mdspan.h"
#include "unchecked_opt.h"

#include "../3rd-party/half/include/half.hpp"

#define restrict __restrict__
#define __ByValArray std::array

Expand Down
2 changes: 1 addition & 1 deletion src/codegen/code_gen_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void CodeGenCUDA::genMdPtrDef(const VarDef &def,
}

std::string CodeGenCUDA::gen(const DataType &dtype) {
if (dtype == DataType::Float16) {
if (dtype.base() == DataType::Float16) {
return "__half";
} else {
return CodeGenC::gen(dtype);
Expand Down
8 changes: 8 additions & 0 deletions src/codegen/detail/code_gen_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,14 @@ std::string CodeGenC<Stream>::gen(const DataType &dtype) {
return "double";
case DataType::Float32:
return "float";
case DataType::Float16:
WARNING(
"float16 arithmetics on CPU is supported via emulation and comes "
"with a performance cost, which is only for compatibility purpose. "
"If you intend to do float32 computation on float16 variables, "
"please convert them explicitly. Please ignore this warning if you "
"are only allocating buffers and not performing arithmetics.");
return "half_float::half"; // From 3rd-party/half
case DataType::Int64:
return "int64_t";
case DataType::Int32:
Expand Down
29 changes: 29 additions & 0 deletions test/40.codegen/cpu/test_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,32 @@ def test(x, y):

y_std = np.array([2, 3, 4, 5], dtype="int32")
assert np.array_equal(y_np, y_std)


def test_float16_compute():
# Not testing float16 I/O here

@ft.transform
def test(x, y):
x: ft.Var[(4,), "float32", "input"]
y: ft.Var[(), "float32", "output"]
x16 = ft.empty((4,), "float16")
y16 = ft.empty((), "float16")
for j in range(4):
x16[j] = ft.cast(x[j], "float16")
y16[...] = 0
for j in range(4):
y16[...] += x16[j]
y[...] = ft.cast(y16[...], "float32")

func = ft.lower(test, verbose=1)
code = ft.codegen(func, verbose=True)
x_np = np.random.uniform(size=(4,)).astype("float32")
y_np = np.zeros((), dtype="float32")
x_arr = ft.array(x_np)
y_arr = ft.array(y_np)
ft.build_binary(code)(x=x_arr, y=y_arr)
y_np = y_arr.numpy()

y_std = np.sum(x_np.astype("float16")).astype("float32")
assert np.all(np.isclose(y_np, y_std, atol=1e-2))

0 comments on commit 6f947ac

Please sign in to comment.